From 630ddc059a19dfee704d3ba80afa6e6f6e7483ba Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 23 Aug 2023 21:09:34 -0700 Subject: [PATCH 001/203] server: add back heap profile HTTP API and make it secure (#15408) close tikv/tikv#11161 Add back heap profile HTTP API and make it secure. The API is removed by #11162 due to a secure issue that can visit arbitrary files on the server. This PR makes it only show the file name instead of the absolute path, and adds a paranoid check to make sure the passed file name is in the set of heap profiles. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/mod.rs | 41 +++++++++++++++++++++++------ src/server/status_server/profile.rs | 17 +++++++++--- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 679f21fdf6cd..b49fdce12af1 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,8 +40,9 @@ use openssl::{ }; use pin_project::pin_project; pub use profile::{ - activate_heap_profile, deactivate_heap_profile, jeprof_heap_profile, list_heap_profiles, - read_file, start_one_cpu_profile, start_one_heap_profile, + activate_heap_profile, deactivate_heap_profile, heap_profiles_dir, jeprof_heap_profile, + list_heap_profiles, read_file, start_one_cpu_profile, start_one_heap_profile, + HEAP_PROFILE_REGEX, }; use prometheus::TEXT_FORMAT; use regex::Regex; @@ -207,10 +208,34 @@ where let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); let result = if let Some(name) = query_pairs.get("name") { - if use_jeprof { - jeprof_heap_profile(name) + let re = Regex::new(HEAP_PROFILE_REGEX).unwrap(); + if !re.is_match(name) { + let errmsg = format!("heap profile name {} is invalid", name); + return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); + } + let profiles = match list_heap_profiles() { + Ok(s) => s, + Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), + }; + if profiles.iter().any(|(f, _)| f == name) { + let dir = match heap_profiles_dir() { + Some(path) => path, + None => { + return Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + "heap profile is not active", + )); + } + }; + let path = dir.join(name.as_ref()); + if use_jeprof { + jeprof_heap_profile(path.to_str().unwrap()) + } else { + read_file(path.to_str().unwrap()) + } } else { - read_file(name) + let errmsg = format!("heap profile {} not found", name); + return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); } } else { let mut seconds = 10; @@ -649,9 +674,9 @@ where (Method::GET, "/debug/pprof/heap_deactivate") => { Self::deactivate_heap_prof(req) } - // (Method::GET, "/debug/pprof/heap") => { - // Self::dump_heap_prof_to_resp(req).await - // } + (Method::GET, "/debug/pprof/heap") => { + Self::dump_heap_prof_to_resp(req).await + } (Method::GET, "/config") => { Self::get_config(req, &cfg_controller).await } diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index b3d91d3bea6e..dd49c394046b 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -31,7 +31,8 @@ pub use self::test_utils::TEST_PROFILE_MUTEX; use self::test_utils::{activate_prof, deactivate_prof, dump_prof}; // File name suffix for periodically dumped heap profiles. -const HEAP_PROFILE_SUFFIX: &str = ".heap"; +pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; +pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; lazy_static! { // If it's locked it means there are already a heap or CPU profiling. @@ -244,9 +245,17 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { Ok(output.stdout) } +pub fn heap_profiles_dir() -> Option { + PROFILE_ACTIVE + .lock() + .unwrap() + .as_ref() + .map(|(_, dir)| dir.path().to_owned()) +} + pub fn list_heap_profiles() -> Result, String> { - let path = match &*PROFILE_ACTIVE.lock().unwrap() { - Some((_, ref dir)) => dir.path().to_str().unwrap().to_owned(), + let path = match heap_profiles_dir() { + Some(path) => path.into_os_string().into_string().unwrap(), None => return Ok(vec![]), }; @@ -257,7 +266,7 @@ pub fn list_heap_profiles() -> Result, String> { Ok(x) => x, _ => continue, }; - let f = item.path().to_str().unwrap().to_owned(); + let f = item.file_name().to_str().unwrap().to_owned(); if !f.ends_with(HEAP_PROFILE_SUFFIX) { continue; } From 6560d758f9143dc5125b0c5c3b0eaadbfecffa3c Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 24 Aug 2023 13:59:04 +0800 Subject: [PATCH 002/203] raftstore-v2: fix compact range bugs that causes false positive clean tablet (#15332) ref tikv/tikv#12842 - Fix a bug of compact range that causes a dirty tablet being reported as clean. - Added an additional check to ensure trim's correctness. - Fix a bug that some tablets are not destroyed and block peer destroy progress. Signed-off-by: tabokie Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +- components/engine_panic/src/compact.rs | 4 ++ components/engine_rocks/src/compact.rs | 4 ++ components/engine_traits/src/compact.rs | 3 + .../operation/command/admin/compact_log.rs | 42 +++++++++++--- components/raftstore-v2/src/operation/life.rs | 6 +- .../src/operation/ready/snapshot.rs | 2 + components/raftstore-v2/src/worker/tablet.rs | 12 ++++ components/test_raftstore/src/util.rs | 9 ++- tests/failpoints/cases/test_sst_recovery.rs | 4 +- .../raftstore/test_compact_after_delete.rs | 6 +- tests/integrations/raftstore/test_snap.rs | 55 ++++++++++++++++++- 12 files changed, 131 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc8233ed5099..abe174e638f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3107,7 +3107,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3126,7 +3126,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "bzip2-sys", "cc", @@ -5100,7 +5100,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_panic/src/compact.rs b/components/engine_panic/src/compact.rs index 988bec790de5..f64c97ff5b03 100644 --- a/components/engine_panic/src/compact.rs +++ b/components/engine_panic/src/compact.rs @@ -44,6 +44,10 @@ impl CompactExt for PanicEngine { ) -> Result<()> { panic!() } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + panic!() + } } pub struct PanicCompactedEvent; diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index 199b7d9f3beb..f64c9a7d49eb 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -121,6 +121,10 @@ impl CompactExt for RocksEngine { db.compact_files_cf(handle, &opts, &files, output_level) .map_err(r2e) } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + self.as_inner().check_in_range(start, end).map_err(r2e) + } } #[cfg(test)] diff --git a/components/engine_traits/src/compact.rs b/components/engine_traits/src/compact.rs index 05590a1ff32d..2a4341a67889 100644 --- a/components/engine_traits/src/compact.rs +++ b/components/engine_traits/src/compact.rs @@ -71,6 +71,9 @@ pub trait CompactExt: CfNamesExt { max_subcompactions: u32, exclude_l0: bool, ) -> Result<()>; + + // Check all data is in the range [start, end). + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()>; } pub trait CompactedEvent: Send { diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 8920ea97e1d0..93876475f5f6 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -13,7 +13,13 @@ //! Updates truncated index, and compacts logs if the corresponding changes have //! been persisted in kvdb. -use std::path::PathBuf; +use std::{ + path::PathBuf, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; @@ -50,6 +56,10 @@ pub struct CompactLogContext { /// persisted. When persisted_apply is advanced, we need to notify tablet /// worker to destroy them. tombstone_tablets_wait_index: Vec, + /// Sometimes a tombstone tablet can be registered after tablet index is + /// advanced. We should not consider it as an active tablet otherwise it + /// might block peer destroy progress. + persisted_tablet_index: Arc, } impl CompactLogContext { @@ -60,6 +70,7 @@ impl CompactLogContext { last_applying_index, last_compacted_idx: 0, tombstone_tablets_wait_index: vec![], + persisted_tablet_index: AtomicU64::new(0).into(), } } @@ -379,7 +390,9 @@ impl Peer { )); } - /// Returns if there's any tombstone being removed. + /// Returns if there's any tombstone being removed. `persisted` state may + /// not be persisted yet, caller is responsible for actually destroying the + /// physical tablets afterwards. #[inline] pub fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { let compact_log_context = self.compact_log_context_mut(); @@ -398,11 +411,21 @@ impl Peer { } } + /// User can only increase this counter. + #[inline] + pub fn remember_persisted_tablet_index(&self) -> Arc { + self.compact_log_context().persisted_tablet_index.clone() + } + + /// Returns whether there's any tombstone tablet newer than persisted tablet + /// index. They might still be referenced by inflight apply and cannot be + /// destroyed. pub fn has_pending_tombstone_tablets(&self) -> bool { - !self - .compact_log_context() - .tombstone_tablets_wait_index - .is_empty() + let ctx = self.compact_log_context(); + let persisted = ctx.persisted_tablet_index.load(Ordering::Relaxed); + ctx.tombstone_tablets_wait_index + .iter() + .any(|i| *i > persisted) } #[inline] @@ -411,6 +434,8 @@ impl Peer { ctx: &StoreContext, task: &mut WriteTask, ) { + let applied_index = self.entry_storage().applied_index(); + self.remove_tombstone_tablets(applied_index); assert!( !self.has_pending_tombstone_tablets(), "{} all tombstone should be cleared before being destroyed.", @@ -421,7 +446,6 @@ impl Peer { None => return, }; let region_id = self.region_id(); - let applied_index = self.entry_storage().applied_index(); let sched = ctx.schedulers.tablet.clone(); let _ = sched.schedule(tablet::Task::prepare_destroy( tablet, @@ -557,13 +581,17 @@ impl Peer { } if self.remove_tombstone_tablets(new_persisted) { let sched = store_ctx.schedulers.tablet.clone(); + let counter = self.remember_persisted_tablet_index(); if !task.has_snapshot { task.persisted_cbs.push(Box::new(move || { let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); + // Writer guarantees no race between different callbacks. + counter.store(new_persisted, Ordering::Relaxed); })); } else { // In snapshot, the index is persisted, tablet can be destroyed directly. let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); + counter.store(new_persisted, Ordering::Relaxed); } } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 52f00d137f86..e0e7f63785d2 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -795,9 +795,13 @@ impl Peer { } // Wait for critical commands like split. if self.has_pending_tombstone_tablets() { + let applied_index = self.entry_storage().applied_index(); + let last_index = self.entry_storage().last_index(); info!( self.logger, - "postpone destroy because there're pending tombstone tablets" + "postpone destroy because there're pending tombstone tablets"; + "applied_index" => applied_index, + "last_index" => last_index, ); return true; } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 17deed333c16..9e0ed449cef5 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -343,10 +343,12 @@ impl Peer { } self.schedule_apply_fsm(ctx); if self.remove_tombstone_tablets(snapshot_index) { + let counter = self.remember_persisted_tablet_index(); let _ = ctx .schedulers .tablet .schedule(tablet::Task::destroy(region_id, snapshot_index)); + counter.store(snapshot_index, Ordering::Relaxed); } if let Some(msg) = self.split_pending_append_mut().take_append_message() { let _ = ctx.router.send_raft_message(msg); diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 183bb33cd346..7c330353836e 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -298,6 +298,8 @@ impl Runner { .spawn(async move { let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); + // Note: Refer to https://github.com/facebook/rocksdb/pull/11468. There's could be + // some files missing from compaction if dynamic_level_bytes is off. for r in [range1, range2] { // When compaction filter is present, trivial move is disallowed. if let Err(e) = @@ -323,6 +325,16 @@ impl Runner { return; } } + if let Err(e) = tablet.check_in_range(Some(&start_key), Some(&end_key)) { + debug_assert!(false, "check_in_range failed {:?}, is titan enabled?", e); + error!( + logger, + "trim did not remove all dirty data"; + "path" => tablet.path(), + "err" => %e, + ); + return; + } // drop before callback. drop(tablet); fail_point!("tablet_trimmed_finished"); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 02a74136bb60..f63c69f9631c 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -81,15 +81,14 @@ pub fn must_get( } debug!("last try to get {}", log_wrappers::hex_encode_upper(key)); let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); - if value.is_none() && res.is_none() - || value.is_some() && res.is_some() && value.unwrap() == &*res.unwrap() - { + if value == res.as_ref().map(|r| r.as_ref()) { return; } panic!( - "can't get value {:?} for key {}", + "can't get value {:?} for key {}, actual={:?}", value.map(escape), - log_wrappers::hex_encode_upper(key) + log_wrappers::hex_encode_upper(key), + res ) } diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index a4c1f10b5aed..da5a3da1a329 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -105,7 +105,7 @@ fn test_sst_recovery_overlap_range_sst_exist() { must_get_equal(&engine1, b"7", b"val_1"); // Validate the damaged sst has been deleted. - compact_files_to_target_level(&engine1, true, 3).unwrap(); + compact_files_to_target_level(&engine1, true, 6).unwrap(); let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 1); @@ -252,7 +252,7 @@ fn create_tikv_cluster_with_one_node_damaged() disturb_sst_file(&sst_path); // The sst file is damaged, so this action will fail. - assert_corruption(compact_files_to_target_level(&engine1, true, 3)); + assert_corruption(compact_files_to_target_level(&engine1, true, 6)); (cluster, pd_client, engine1) } diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 6ba405bb9183..a79fdfd44259 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -98,7 +98,8 @@ fn test_node_compact_after_delete_v2() { // disable it cluster.cfg.raft_store.region_compact_min_redundant_rows = 10000000; cluster.cfg.raft_store.region_compact_check_step = Some(2); - cluster.cfg.rocksdb.titan.enabled = true; + // TODO: v2 doesn't support titan. + // cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); let region = cluster.get_region(b""); @@ -169,7 +170,8 @@ fn test_node_compact_after_update_v2() { cluster.cfg.raft_store.region_compact_redundant_rows_percent = 40; cluster.cfg.raft_store.region_compact_min_redundant_rows = 50; cluster.cfg.raft_store.region_compact_check_step = Some(2); - cluster.cfg.rocksdb.titan.enabled = true; + // TODO: titan is not supported in v2. + // cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); let region = cluster.get_region(b""); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 9eda281e9e46..0b71978f63bd 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -227,8 +227,6 @@ fn test_server_snap_gc() { #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] -#[test_case(test_raftstore_v2::new_server_cluster)] fn test_concurrent_snap() { let mut cluster = new_cluster(0, 3); // Test that the handling of snapshot is correct when there are multiple @@ -279,6 +277,59 @@ fn test_concurrent_snap() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_concurrent_snap_v2() { + let mut cluster = new_cluster(0, 3); + // TODO: v2 doesn't support titan. + // Test that the handling of snapshot is correct when there are multiple + // snapshots which have overlapped region ranges arrive at the same + // raftstore. + // cluster.cfg.rocksdb.titan.enabled = true; + // Disable raft log gc in this test case. + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); + // For raftstore v2, after split, follower delays first messages (see + // is_first_message() for details), so leader does not send snapshot to + // follower and CollectSnapshotFilter holds parent region snapshot forever. + // We need to set a short wait duration so that leader can send snapshot + // in time and thus CollectSnapshotFilter can send parent region snapshot. + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + + let r1 = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + pd_client.must_add_peer(r1, new_peer(2, 2)); + // Force peer 2 to be followers all the way. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(r1, 2) + .msg_type(MessageType::MsgRequestVote) + .direction(Direction::Send), + )); + cluster.must_transfer_leader(r1, new_peer(1, 1)); + cluster.must_put(b"k3", b"v3"); + // Pile up snapshots of overlapped region ranges and deliver them all at once. + let (tx, rx) = mpsc::channel(); + cluster.add_recv_filter_on_node(3, Box::new(CollectSnapshotFilter::new(tx))); + pd_client.must_add_peer(r1, new_peer(3, 3)); + let region = cluster.get_region(b"k1"); + // Ensure the snapshot of range ("", "") is sent and piled in filter. + if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { + panic!("the snapshot is not sent before split, e: {:?}", e); + } + // Split the region range and then there should be another snapshot for the + // split ranges. + cluster.must_split(®ion, b"k2"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Ensure the regions work after split. + cluster.must_put(b"k11", b"v11"); + must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); + cluster.must_put(b"k4", b"v4"); + must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); +} + #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] From 3ae1fb4320737c71a1c9d3f8ee6a3b7a9af6f6ea Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 24 Aug 2023 16:36:35 +0800 Subject: [PATCH 003/203] scheduler: not panic in the case of unexepected dropped channel when shutting dowm (#15426) ref tikv/tikv#15202 not panic in the case of unexepected dropped channel when shutting dowm Signed-off-by: SpadeA-Tang --- src/storage/txn/scheduler.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 4df7033c21a4..3c6a66c3941c 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1665,10 +1665,15 @@ impl TxnScheduler { // it may break correctness. // However, not release latch will cause deadlock which may ultimately block all // following txns, so we panic here. - panic!( - "response channel is unexpectedly dropped, tag {:?}, cid {}", - tag, cid - ); + // + // todo(spadea): Now, we only panic if it's not shutting down, although even in + // close, this behavior is not acceptable. + if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { + panic!( + "response channel is unexpectedly dropped, tag {:?}, cid {}", + tag, cid + ); + } } /// Returns whether it succeeds to write pessimistic locks to the in-memory From 8a44a2c4c11b3da9d776d2877f631922d3833933 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 24 Aug 2023 19:50:06 +0800 Subject: [PATCH 004/203] raftstore: disable duplicated mvcc key compaction check by default (#15427) close tikv/tikv#15282 disable duplicated mvcc key check compaction by default Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/operation/misc.rs | 2 +- components/raftstore/src/store/config.rs | 27 ++++++++++++++++--- components/raftstore/src/store/fsm/store.rs | 2 +- etc/config-template.toml | 9 +++++++ src/config/mod.rs | 3 +++ tests/integrations/config/mod.rs | 2 +- .../raftstore/test_compact_after_delete.rs | 4 ++- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs index 867b4192dac1..fafca29ea852 100644 --- a/components/raftstore-v2/src/operation/misc.rs +++ b/components/raftstore-v2/src/operation/misc.rs @@ -102,7 +102,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { self.store_ctx.cfg.region_compact_min_tombstones, self.store_ctx.cfg.region_compact_tombstones_percent, self.store_ctx.cfg.region_compact_min_redundant_rows, - self.store_ctx.cfg.region_compact_redundant_rows_percent, + self.store_ctx.cfg.region_compact_redundant_rows_percent(), ), })) { diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 817be7eb9695..257480b4c250 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -140,7 +140,7 @@ pub struct Config { pub region_compact_min_redundant_rows: u64, /// Minimum percentage of redundant rows to trigger manual compaction. /// Should between 1 and 100. - pub region_compact_redundant_rows_percent: u64, + pub region_compact_redundant_rows_percent: Option, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, @@ -429,7 +429,7 @@ impl Default for Config { region_compact_min_tombstones: 10000, region_compact_tombstones_percent: 30, region_compact_min_redundant_rows: 50000, - region_compact_redundant_rows_percent: 20, + region_compact_redundant_rows_percent: None, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), notify_capacity: 40960, @@ -581,6 +581,10 @@ impl Config { self.region_compact_check_step.unwrap() } + pub fn region_compact_redundant_rows_percent(&self) -> u64 { + self.region_compact_redundant_rows_percent.unwrap() + } + #[inline] pub fn warmup_entry_cache_enabled(&self) -> bool { self.max_entry_cache_warmup_duration.0 != Duration::from_secs(0) @@ -604,8 +608,11 @@ impl Config { if self.region_compact_check_step.is_none() { if raft_kv_v2 { self.region_compact_check_step = Some(5); + self.region_compact_redundant_rows_percent = Some(20); } else { self.region_compact_check_step = Some(100); + // Disable redundant rows check in default for v1. + self.region_compact_redundant_rows_percent = Some(100); } } @@ -766,6 +773,15 @@ impl Config { )); } + let region_compact_redundant_rows_percent = + self.region_compact_redundant_rows_percent.unwrap(); + if !(1..=100).contains(®ion_compact_redundant_rows_percent) { + return Err(box_err!( + "region-compact-redundant-rows-percent must between 1 and 100, current value is {}", + region_compact_redundant_rows_percent + )); + } + if self.local_read_batch_size == 0 { return Err(box_err!("local-read-batch-size must be greater than 0")); } @@ -992,8 +1008,11 @@ impl Config { .with_label_values(&["region_compact_min_redundant_rows"]) .set(self.region_compact_min_redundant_rows as f64); CONFIG_RAFTSTORE_GAUGE - .with_label_values(&["region_compact_tombstones_percent"]) - .set(self.region_compact_tombstones_percent as f64); + .with_label_values(&["region_compact_redundant_rows_percent"]) + .set( + self.region_compact_redundant_rows_percent + .unwrap_or_default() as f64, + ); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_heartbeat_tick_interval"]) .set(self.pd_heartbeat_tick_interval.as_secs_f64()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c21ea65a5894..df11ba51fc89 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2525,7 +2525,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.ctx.cfg.region_compact_min_tombstones, self.ctx.cfg.region_compact_tombstones_percent, self.ctx.cfg.region_compact_min_redundant_rows, - self.ctx.cfg.region_compact_redundant_rows_percent, + self.ctx.cfg.region_compact_redundant_rows_percent(), ), }, )) { diff --git a/etc/config-template.toml b/etc/config-template.toml index 36d8d25d883e..3c8a60159105 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -437,6 +437,15 @@ ## exceeds `region-compact-tombstones-percent`. # region-compact-tombstones-percent = 30 +## The minimum number of duplicated MVCC keys to trigger manual compaction. +# region-compact-min-redundant-rows = 50000 + +## The minimum percentage of duplicated MVCC keys to trigger manual compaction. +## It should be set between 1 and 100. Manual compaction is only triggered when the number of +## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys +## exceeds `region-compact-redundant-rows-percent`. +# region-compact-redundant-rows-percent = 100 + ## Interval to check whether to start a manual compaction for Lock Column Family. ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will ## trigger a manual compaction for Lock Column Family. diff --git a/src/config/mod.rs b/src/config/mod.rs index 5c7f1424c38a..ecb31c8aec61 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5966,6 +5966,9 @@ mod tests { default_cfg .server .optimize_for(default_cfg.coprocessor.region_split_size()); + default_cfg + .raft_store + .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); default_cfg.security.redact_info_log = Some(false); default_cfg.coprocessor.region_max_size = Some(default_cfg.coprocessor.region_max_size()); default_cfg.coprocessor.region_max_keys = Some(default_cfg.coprocessor.region_max_keys()); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a65d4cfb46c3..8fdbaa00f253 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -200,7 +200,7 @@ fn test_serde_custom_tikv_config() { region_compact_min_tombstones: 999, region_compact_tombstones_percent: 33, region_compact_min_redundant_rows: 999, - region_compact_redundant_rows_percent: 33, + region_compact_redundant_rows_percent: Some(33), pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), notify_capacity: 12_345, diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index a79fdfd44259..24034c831924 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -36,6 +36,7 @@ fn test_compact_after_delete(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(1); cluster.cfg.raft_store.region_compact_check_step = Some(1); cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); @@ -97,6 +98,7 @@ fn test_node_compact_after_delete_v2() { cluster.cfg.raft_store.region_compact_tombstones_percent = 50; // disable it cluster.cfg.raft_store.region_compact_min_redundant_rows = 10000000; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(100); cluster.cfg.raft_store.region_compact_check_step = Some(2); // TODO: v2 doesn't support titan. // cluster.cfg.rocksdb.titan.enabled = true; @@ -167,7 +169,7 @@ fn test_node_compact_after_update_v2() { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); // disable it cluster.cfg.raft_store.region_compact_min_tombstones = 1000000; - cluster.cfg.raft_store.region_compact_redundant_rows_percent = 40; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(40); cluster.cfg.raft_store.region_compact_min_redundant_rows = 50; cluster.cfg.raft_store.region_compact_check_step = Some(2); // TODO: titan is not supported in v2. From 25959655f33ac27985962887d25a0da593fd62c8 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 24 Aug 2023 22:48:35 +0800 Subject: [PATCH 005/203] server: fix memory trace's leak metrics (#15353) close tikv/tikv#15357 Correct the raft_router/apply_router's alive and leak metrics. Signed-off-by: tonyxuqqi --- components/server/src/memory.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/components/server/src/memory.rs b/components/server/src/memory.rs index 303ff257a785..fadf18f7534f 100644 --- a/components/server/src/memory.rs +++ b/components/server/src/memory.rs @@ -19,9 +19,24 @@ impl MemoryTraceManager { for id in ids { let sub_trace = provider.sub_trace(id); let sub_trace_name = sub_trace.name(); - MEM_TRACE_SUM_GAUGE - .with_label_values(&[&format!("{}-{}", provider_name, sub_trace_name)]) - .set(sub_trace.sum() as i64) + let leaf_ids = sub_trace.get_children_ids(); + if leaf_ids.is_empty() { + MEM_TRACE_SUM_GAUGE + .with_label_values(&[&format!("{}-{}", provider_name, sub_trace_name)]) + .set(sub_trace.sum() as i64); + } else { + for leaf_id in leaf_ids { + let leaf = sub_trace.sub_trace(leaf_id); + MEM_TRACE_SUM_GAUGE + .with_label_values(&[&format!( + "{}-{}-{}", + provider_name, + sub_trace_name, + leaf.name(), + )]) + .set(leaf.sum() as i64); + } + } } MEM_TRACE_SUM_GAUGE From bea230d98c61de9847121a0f0bb9c4588b20e4de Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 25 Aug 2023 12:35:07 +0800 Subject: [PATCH 006/203] raftstore: fix unwrap panic of region_compact_redundant_rows_percent (#15440) close tikv/tikv#15438 fix unwrap panic of region_compact_redundant_rows_percent Signed-off-by: SpadeA-Tang --- components/raftstore/src/store/config.rs | 8 ++- src/config/mod.rs | 63 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 257480b4c250..f96ed2b7a45a 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -608,9 +608,15 @@ impl Config { if self.region_compact_check_step.is_none() { if raft_kv_v2 { self.region_compact_check_step = Some(5); - self.region_compact_redundant_rows_percent = Some(20); } else { self.region_compact_check_step = Some(100); + } + } + + if self.region_compact_redundant_rows_percent.is_none() { + if raft_kv_v2 { + self.region_compact_redundant_rows_percent = Some(20); + } else { // Disable redundant rows check in default for v1. self.region_compact_redundant_rows_percent = Some(100); } diff --git a/src/config/mod.rs b/src/config/mod.rs index ecb31c8aec61..f7c338379ef2 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6428,4 +6428,67 @@ mod tests { Some(ReadableSize::gb(1)) ); } + + #[test] + fn test_compact_check_default() { + let content = r#" + [raftstore] + region-compact-check-step = 50 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 50); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 100 + ); + + let content = r#" + [raftstore] + region-compact-check-step = 50 + [storage] + engine = "partitioned-raft-kv" + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 50); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 20 + ); + + let content = r#" + [raftstore] + region-compact-redundant-rows-percent = 50 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 100); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 50 + ); + + let content = r#" + [raftstore] + region-compact-redundant-rows-percent = 50 + [storage] + engine = "partitioned-raft-kv" + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 5); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 50 + ); + } } From 40440210d81ea1770d5921475a51350f0bee50cd Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 25 Aug 2023 00:14:05 -0700 Subject: [PATCH 007/203] batch-system: use concurrent hashmap to avoid router cache (#15431) close tikv/tikv#15430 Use concurrent hashmap to avoid router cache occupying too much memory Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 5 +- components/batch-system/Cargo.toml | 1 + components/batch-system/src/router.rs | 171 +++++------------- components/batch-system/tests/cases/router.rs | 20 +- components/raftstore/src/store/fsm/store.rs | 6 - components/tikv_util/src/mpsc/mod.rs | 25 ++- 6 files changed, 65 insertions(+), 163 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abe174e638f3..3c44a639e384 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -652,6 +652,7 @@ dependencies = [ "collections", "criterion", "crossbeam", + "dashmap", "derive_more", "fail", "file_system", @@ -1449,9 +1450,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.1.0" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0834a35a3fce649144119e18da2a4d8ed12ef3862f47183fd46f625d072d96c" +checksum = "4c8858831f7781322e539ea39e72449c46b059638250c14344fec8d0aa6e539c" dependencies = [ "cfg-if 1.0.0", "num_cpus", diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index ac69d544a212..bd1ae6c56b40 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -10,6 +10,7 @@ test-runner = ["derive_more"] [dependencies] collections = { workspace = true } crossbeam = "0.8" +dashmap = "5.2" derive_more = { version = "0.99", optional = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 119b7875506f..4f886fe3b3d7 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -1,21 +1,17 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::{ - cell::Cell, - mem, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, Mutex, - }, +use std::sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, }; -use collections::HashMap; use crossbeam::channel::{SendError, TrySendError}; -use tikv_util::{debug, info, lru::LruCache, time::Instant, Either}; +use dashmap::DashMap; +use tikv_util::{debug, info, time::Instant, Either}; use crate::{ - fsm::{Fsm, FsmScheduler, FsmState}, + fsm::{Fsm, FsmScheduler}, mailbox::{BasicMailbox, Mailbox}, metrics::*, }; @@ -27,18 +23,14 @@ pub struct RouterTrace { pub leak: usize, } -struct NormalMailMap { - map: HashMap>, - // Count of Mailboxes that is stored in `map`. - alive_cnt: Arc, -} - enum CheckDoResult { NotExist, Invalid, Valid(T), } +const ROUTER_SHRINK_SIZE: usize = 1000; + /// Router routes messages to its target FSM's mailbox. /// /// In our abstract model, every batch system has two different kind of @@ -54,8 +46,7 @@ enum CheckDoResult { /// Normal FSM and control FSM can have different scheduler, but this is not /// required. pub struct Router { - normals: Arc>>, - caches: Cell>>, + normals: Arc>>, pub(super) control_box: BasicMailbox, // TODO: These two schedulers should be unified as single one. However // it's not possible to write FsmScheduler + FsmScheduler @@ -85,11 +76,7 @@ where state_cnt: Arc, ) -> Router { Router { - normals: Arc::new(Mutex::new(NormalMailMap { - map: HashMap::default(), - alive_cnt: Arc::default(), - })), - caches: Cell::new(LruCache::with_capacity_and_sample(1024, 7)), + normals: Arc::new(DashMap::default()), control_box, normal_scheduler, control_scheduler, @@ -106,72 +93,32 @@ where /// A helper function that tries to unify a common access pattern to /// mailbox. /// - /// Generally, when sending a message to a mailbox, cache should be - /// check first, if not found, lock should be acquired. - /// /// Returns None means there is no mailbox inside the normal registry. /// Some(None) means there is expected mailbox inside the normal registry /// but it returns None after apply the given function. Some(Some) means - /// the given function returns Some and cache is updated if it's invalid. + /// the given function returns Some. #[inline] fn check_do(&self, addr: u64, mut f: F) -> CheckDoResult where F: FnMut(&BasicMailbox) -> Option, { - let caches = unsafe { &mut *self.caches.as_ptr() }; - let mut connected = true; - if let Some(mailbox) = caches.get(&addr) { - match f(mailbox) { - Some(r) => return CheckDoResult::Valid(r), - None => { - connected = false; - } - } - } - - let (cnt, mailbox) = { - let mut boxes = self.normals.lock().unwrap(); - let cnt = boxes.map.len(); - let b = match boxes.map.get_mut(&addr) { - Some(mailbox) => mailbox.clone(), - None => { - drop(boxes); - if !connected { - caches.remove(&addr); - } - return CheckDoResult::NotExist; - } - }; - (cnt, b) - }; - if cnt > caches.capacity() || cnt < caches.capacity() / 2 { - caches.resize(cnt); - } - - let res = f(&mailbox); - match res { - Some(r) => { - caches.insert(addr, mailbox); - CheckDoResult::Valid(r) - } + let mailbox = match self.normals.get_mut(&addr) { + Some(mailbox) => mailbox, None => { - if !connected { - caches.remove(&addr); - } - CheckDoResult::Invalid + return CheckDoResult::NotExist; } + }; + match f(&mailbox) { + Some(r) => CheckDoResult::Valid(r), + None => CheckDoResult::Invalid, } } /// Register a mailbox with given address. pub fn register(&self, addr: u64, mailbox: BasicMailbox) { - let mut normals = self.normals.lock().unwrap(); - if let Some(mailbox) = normals.map.insert(addr, mailbox) { + if let Some(mailbox) = self.normals.insert(addr, mailbox) { mailbox.close(); } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); } /// Same as send a message and then register the mailbox. @@ -183,32 +130,22 @@ where mailbox: BasicMailbox, msg: N::Message, ) -> Result<(), (BasicMailbox, N::Message)> { - let mut normals = self.normals.lock().unwrap(); - // Send has to be done within lock, otherwise the message may be handled - // before the mailbox is register. + if let Some(mailbox) = self.normals.insert(addr, mailbox.clone()) { + mailbox.close(); + } if let Err(SendError(m)) = mailbox.force_send(msg, &self.normal_scheduler) { + self.normals.remove(&addr); return Err((mailbox, m)); } - if let Some(mailbox) = normals.map.insert(addr, mailbox) { - mailbox.close(); - } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); Ok(()) } pub fn register_all(&self, mailboxes: Vec<(u64, BasicMailbox)>) { - let mut normals = self.normals.lock().unwrap(); - normals.map.reserve(mailboxes.len()); for (addr, mailbox) in mailboxes { - if let Some(m) = normals.map.insert(addr, mailbox) { + if let Some(m) = self.normals.insert(addr, mailbox) { m.close(); } } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); } /// Get the mailbox of specified address. @@ -280,13 +217,11 @@ where pub fn force_send(&self, addr: u64, msg: N::Message) -> Result<(), SendError> { match self.send(addr, msg) { Ok(()) => Ok(()), - Err(TrySendError::Full(m)) => { - let caches = unsafe { &mut *self.caches.as_ptr() }; - caches - .get(&addr) - .unwrap() - .force_send(m, &self.normal_scheduler) - } + Err(TrySendError::Full(m)) => self + .normals + .get(&addr) + .unwrap() + .force_send(m, &self.normal_scheduler), Err(TrySendError::Disconnected(m)) => { if self.is_shutdown() { Ok(()) @@ -321,10 +256,9 @@ where /// Try to notify all normal FSMs a message. pub fn broadcast_normal(&self, mut msg_gen: impl FnMut() -> N::Message) { let timer = Instant::now_coarse(); - let mailboxes = self.normals.lock().unwrap(); - for mailbox in mailboxes.map.values() { + self.normals.iter().for_each(|mailbox| { let _ = mailbox.force_send(msg_gen(), &self.normal_scheduler); - } + }); BROADCAST_NORMAL_DURATION.observe(timer.saturating_elapsed_secs()); } @@ -332,12 +266,13 @@ where pub fn broadcast_shutdown(&self) { info!("broadcasting shutdown"); self.shutdown.store(true, Ordering::SeqCst); - unsafe { &mut *self.caches.as_ptr() }.clear(); - let mut mailboxes = self.normals.lock().unwrap(); - for (addr, mailbox) in mailboxes.map.drain() { + for e in self.normals.iter() { + let addr = e.key(); + let mailbox = e.value(); debug!("[region {}] shutdown mailbox", addr); mailbox.close(); } + self.normals.clear(); self.control_box.close(); self.normal_scheduler.shutdown(); self.control_scheduler.shutdown(); @@ -346,51 +281,32 @@ where /// Close the mailbox of address. pub fn close(&self, addr: u64) { info!("shutdown mailbox"; "region_id" => addr); - unsafe { &mut *self.caches.as_ptr() }.remove(&addr); - let mut mailboxes = self.normals.lock().unwrap(); - if let Some(mb) = mailboxes.map.remove(&addr) { + if let Some((_, mb)) = self.normals.remove(&addr) { mb.close(); } - mailboxes - .alive_cnt - .store(mailboxes.map.len(), Ordering::Relaxed); - } - - pub fn clear_cache(&self) { - unsafe { &mut *self.caches.as_ptr() }.clear(); + if self.normals.capacity() - self.normals.len() > ROUTER_SHRINK_SIZE { + self.normals.shrink_to_fit(); + } } pub fn state_cnt(&self) -> &Arc { &self.state_cnt } - pub fn alive_cnt(&self) -> Arc { - self.normals.lock().unwrap().alive_cnt.clone() + pub fn alive_cnt(&self) -> usize { + self.normals.len() } pub fn trace(&self) -> RouterTrace { - let alive = self.normals.lock().unwrap().alive_cnt.clone(); + let alive = self.alive_cnt(); let total = self.state_cnt.load(Ordering::Relaxed); - let alive = alive.load(Ordering::Relaxed); // 1 represents the control fsm. let leak = if total > alive + 1 { total - alive - 1 } else { 0 }; - let mailbox_unit = mem::size_of::<(u64, BasicMailbox)>(); - let state_unit = mem::size_of::>(); - // Every message in crossbeam sender needs 8 bytes to store state. - let message_unit = mem::size_of::() + 8; - // crossbeam unbounded channel sender has a list of blocks. Every block has 31 - // unit and every sender has at least one sender. - let sender_block_unit = 31; - RouterTrace { - alive: (mailbox_unit * 8 / 7 // hashmap uses 7/8 of allocated memory. - + state_unit + message_unit * sender_block_unit) - * alive, - leak: (state_unit + message_unit * sender_block_unit) * leak, - } + RouterTrace { alive, leak } } } @@ -398,7 +314,6 @@ impl Clone for Router { fn clone(&self) -> Router { Router { normals: self.normals.clone(), - caches: Cell::new(LruCache::with_capacity_and_sample(1024, 7)), control_box: self.control_box.clone(), // These two schedulers should be unified as single one. However // it's not possible to write FsmScheduler + FsmScheduler diff --git a/components/batch-system/tests/cases/router.rs b/components/batch-system/tests/cases/router.rs index d746dfad5cb8..66d0770d544e 100644 --- a/components/batch-system/tests/cases/router.rs +++ b/components/batch-system/tests/cases/router.rs @@ -143,25 +143,19 @@ fn test_router_trace() { router.close(addr); }; - let router_clone = router.clone(); + let mut mailboxes = vec![]; for i in 0..10 { register_runner(i); - // Read mailbox to cache. - router_clone.mailbox(i).unwrap(); + mailboxes.push(router.mailbox(i).unwrap()); } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 10); + assert_eq!(router.alive_cnt(), 10); assert_eq!(router.state_cnt().load(Ordering::Relaxed), 11); - // Routers closed but exist in the cache. for i in 0..10 { close_runner(i); } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 0); + assert_eq!(router.alive_cnt(), 0); assert_eq!(router.state_cnt().load(Ordering::Relaxed), 11); - for i in 0..1024 { - register_runner(i); - // Read mailbox to cache, closed routers should be evicted. - router_clone.mailbox(i).unwrap(); - } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 1024); - assert_eq!(router.state_cnt().load(Ordering::Relaxed), 1025); + drop(mailboxes); + assert_eq!(router.alive_cnt(), 0); + assert_eq!(router.state_cnt().load(Ordering::Relaxed), 1); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index df11ba51fc89..11167a4c395f 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -468,10 +468,6 @@ where self.update_trace(); } - pub fn clear_cache(&self) { - self.router.clear_cache(); - } - fn update_trace(&self) { let router_trace = self.router.trace(); MEMTRACE_RAFT_ROUTER_ALIVE.trace(TraceEvent::Reset(router_trace.alive)); @@ -1847,8 +1843,6 @@ impl RaftBatchSystem { warn!("set thread priority for raftstore failed"; "error" => ?e); } self.workers = Some(workers); - // This router will not be accessed again, free all caches. - self.router.clear_cache(); Ok(()) } diff --git a/components/tikv_util/src/mpsc/mod.rs b/components/tikv_util/src/mpsc/mod.rs index 700691f1189f..9a71dbc0c5e0 100644 --- a/components/tikv_util/src/mpsc/mod.rs +++ b/components/tikv_util/src/mpsc/mod.rs @@ -8,9 +8,8 @@ pub mod future; pub mod priority_queue; use std::{ - cell::Cell, sync::{ - atomic::{AtomicBool, AtomicIsize, Ordering}, + atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering}, Arc, }, time::Duration, @@ -208,7 +207,7 @@ const CHECK_INTERVAL: usize = 8; /// A sender of channel that limits the maximun pending messages count loosely. pub struct LooseBoundedSender { sender: Sender, - tried_cnt: Cell, + tried_cnt: AtomicUsize, limit: usize, } @@ -230,25 +229,23 @@ impl LooseBoundedSender { /// Send a message regardless its capacity limit. #[inline] pub fn force_send(&self, t: T) -> Result<(), SendError> { - let cnt = self.tried_cnt.get(); - self.tried_cnt.set(cnt + 1); + self.tried_cnt.fetch_add(1, Ordering::AcqRel); self.sender.send(t) } /// Attempts to send a message into the channel without blocking. #[inline] pub fn try_send(&self, t: T) -> Result<(), TrySendError> { - let cnt = self.tried_cnt.get(); let check_interval = || { fail_point!("loose_bounded_sender_check_interval", |_| 0); CHECK_INTERVAL }; - if cnt < check_interval() { - self.tried_cnt.set(cnt + 1); - } else if self.len() < self.limit { - self.tried_cnt.set(1); - } else { - return Err(TrySendError::Full(t)); + if self.tried_cnt.fetch_add(1, Ordering::AcqRel) >= check_interval() { + if self.len() < self.limit { + self.tried_cnt.store(1, Ordering::Release); + } else { + return Err(TrySendError::Full(t)); + } } match self.sender.send(t) { @@ -275,7 +272,7 @@ impl Clone for LooseBoundedSender { fn clone(&self) -> LooseBoundedSender { LooseBoundedSender { sender: self.sender.clone(), - tried_cnt: self.tried_cnt.clone(), + tried_cnt: AtomicUsize::new(0), limit: self.limit, } } @@ -287,7 +284,7 @@ pub fn loose_bounded(cap: usize) -> (LooseBoundedSender, Receiver) { ( LooseBoundedSender { sender, - tried_cnt: Cell::new(0), + tried_cnt: AtomicUsize::new(0), limit: cap, }, receiver, From 40b225f70c92db96baae7b85891c193c1674d2d4 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Fri, 25 Aug 2023 15:29:05 +0800 Subject: [PATCH 008/203] raftstore: fix meta inconsistency issue (#15423) close tikv/tikv#13311 Fix the possible meta inconsistency issue. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 62 ++++++++------ components/raftstore/src/store/fsm/store.rs | 3 +- .../raftstore/src/store/peer_storage.rs | 3 + tests/failpoints/cases/test_split_region.rs | 80 ++++++++++++++++++- 4 files changed, 121 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index da91e26eb09e..62a3a2650de5 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -97,7 +97,7 @@ use crate::{ UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, util, - util::{is_region_initialized, KeysInfoFormatter, LeaseState}, + util::{KeysInfoFormatter, LeaseState}, worker::{ Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, @@ -322,6 +322,7 @@ where "replicate peer"; "region_id" => region_id, "peer_id" => peer.get_id(), + "store_id" => store_id, ); let mut region = metapb::Region::default(); @@ -2460,6 +2461,7 @@ where } }); + let is_initialized_peer = self.fsm.peer.is_initialized(); debug!( "handle raft message"; "region_id" => self.region_id(), @@ -2467,6 +2469,7 @@ where "message_type" => %util::MsgType(&msg), "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), + "is_initialized_peer" => is_initialized_peer, ); if self.fsm.peer.pending_remove || self.fsm.stopped { @@ -3664,14 +3667,7 @@ where } let region_id = self.region_id(); - let is_initialized = self.fsm.peer.is_initialized(); - info!( - "starts destroy"; - "region_id" => region_id, - "peer_id" => self.fsm.peer_id(), - "merged_by_target" => merged_by_target, - "is_initialized" => is_initialized, - ); + let is_peer_initialized = self.fsm.peer.is_initialized(); // We can't destroy a peer which is handling snapshot. assert!(!self.fsm.peer.is_handling_snapshot()); @@ -3688,27 +3684,40 @@ where .snapshot_recovery_maybe_finish_wait_apply(/* force= */ true); } + (|| { + fail_point!( + "before_destroy_peer_on_peer_1003", + self.fsm.peer.peer_id() == 1003, + |_| {} + ); + })(); let mut meta = self.ctx.store_meta.lock().unwrap(); - let is_region_initialized_in_meta = meta - .regions - .get(®ion_id) - .map_or(false, |region| is_region_initialized(region)); - if !is_initialized && is_region_initialized_in_meta { - let region_in_meta = meta.regions.get(®ion_id).unwrap(); - error!( - "peer is destroyed inconsistently"; - "region_id" => region_id, + let is_latest_initialized = { + if let Some(latest_region_info) = meta.regions.get(®ion_id) { + util::is_region_initialized(latest_region_info) + } else { + false + } + }; + + if !is_peer_initialized && is_latest_initialized { + info!("skip destroy uninitialized peer as it's already initialized in meta"; + "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), - "peers" => ?self.region().get_peers(), "merged_by_target" => merged_by_target, - "is_initialized" => is_initialized, - "is_region_initialized_in_meta" => is_region_initialized_in_meta, - "start_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_start_key()), - "end_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_end_key()), - "peers_in_meta" => ?region_in_meta.get_peers(), ); + return false; } + info!( + "starts destroy"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "merged_by_target" => merged_by_target, + "is_peer_initialized" => is_peer_initialized, + "is_latest_initialized" => is_latest_initialized, + ); + if meta.atomic_snap_regions.contains_key(&self.region_id()) { drop(meta); panic!( @@ -3764,7 +3773,7 @@ where self.ctx.router.close(region_id); self.fsm.stop(); - if is_initialized + if is_peer_initialized && !merged_by_target && meta .region_ranges @@ -3773,6 +3782,7 @@ where { panic!("{} meta corruption detected", self.fsm.peer.tag); } + if meta.regions.remove(®ion_id).is_none() && !merged_by_target { panic!("{} meta corruption detected", self.fsm.peer.tag) } @@ -4139,6 +4149,7 @@ where // Insert new regions and validation let mut is_uninitialized_peer_exist = false; + let self_store_id = self.ctx.store.get_id(); if let Some(r) = meta.regions.get(&new_region_id) { // Suppose a new node is added by conf change and the snapshot comes slowly. // Then, the region splits and the first vote message comes to the new node @@ -4160,6 +4171,7 @@ where "region_id" => new_region_id, "region" => ?new_region, "is_uninitialized_peer_exist" => is_uninitialized_peer_exist, + "store_id" => self_store_id, ); let (sender, mut new_peer) = match PeerFsm::create( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 11167a4c395f..53559bbe1b83 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1955,7 +1955,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } info!( "region doesn't exist yet, wait for it to be split"; - "region_id" => region_id + "region_id" => region_id, + "to_peer_id" => msg.get_to_peer().get_id(), ); return Ok(CheckMsgStatus::FirstRequest); } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index d89eafc3a46d..a888929ca985 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1017,6 +1017,9 @@ where // The `region` is updated after persisting in order to stay consistent with the // one in `StoreMeta::regions` (will be updated soon). // See comments in `apply_snapshot` for more details. + (|| { + fail_point!("before_set_region_on_peer_3", self.peer_id == 3, |_| {}); + })(); self.set_region(res.region.clone()); } } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 3520de4e3adf..dfd7002495ca 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1,5 +1,4 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - use std::{ sync::{ atomic::{AtomicBool, Ordering}, @@ -41,6 +40,85 @@ use tikv_util::{ }; use txn_types::{Key, LastChange, PessimisticLock, TimeStamp}; +#[test] +fn test_meta_inconsistency() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.apply_batch_system.pool_size = 2; + cluster.cfg.raft_store.apply_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.hibernate_regions = false; + cluster.cfg.raft_store.raft_log_gc_threshold = 1000; + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + cluster.must_transfer_leader(region_id, new_peer(1, 1)); + cluster.must_put(b"k1", b"v1"); + + // Add new peer on node 3, its snapshot apply is paused. + fail::cfg("before_set_region_on_peer_3", "pause").unwrap(); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + + // Let only heartbeat msg to pass so a replicate peer could be created on node 3 + // for peer 1003. + let region_packet_filter_region_1000_peer_1003 = + RegionPacketFilter::new(1000, 3).skip(MessageType::MsgHeartbeat); + cluster + .sim + .wl() + .add_recv_filter(3, Box::new(region_packet_filter_region_1000_peer_1003)); + + // Trigger a region split to create region 1000 with peer 1001, 1002 and 1003. + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k5"); + + // Scheduler a larger peed id heartbeat msg to trigger peer destroy for peer + // 1003, pause it before the meta.lock operation so new region insertions by + // region split could go first. + // Thus a inconsistency could happen because the destroy is handled + // by a uninitialized peer but the new initialized region info is inserted into + // the meta by region split. + fail::cfg("before_destroy_peer_on_peer_1003", "pause").unwrap(); + let new_region = cluster.get_region(b"k4"); + let mut larger_id_msg = Box::::default(); + larger_id_msg.set_region_id(1000); + larger_id_msg.set_to_peer(new_peer(3, 1113)); + larger_id_msg.set_region_epoch(new_region.get_region_epoch().clone()); + larger_id_msg + .mut_region_epoch() + .set_conf_ver(new_region.get_region_epoch().get_conf_ver() + 1); + larger_id_msg.set_from_peer(new_peer(1, 1001)); + let raft_message = larger_id_msg.mut_message(); + raft_message.set_msg_type(MessageType::MsgHeartbeat); + raft_message.set_from(1001); + raft_message.set_to(1113); + raft_message.set_term(6); + cluster.sim.wl().send_raft_msg(*larger_id_msg).unwrap(); + thread::sleep(Duration::from_millis(500)); + + // Let snapshot apply continue on peer 3 from region 0, then region split would + // be applied too. + fail::remove("before_set_region_on_peer_3"); + thread::sleep(Duration::from_millis(2000)); + + // Let self destroy continue after the region split is finished. + fail::remove("before_destroy_peer_on_peer_1003"); + sleep_ms(1000); + + // Clear the network partition nemesis, trigger a new region split, panic would + // be encountered The thread 'raftstore-3-1::test_message_order_3' panicked + // at 'meta corrupted: no region for 1000 7A6B35 when creating 1004 + // region_id: 1004 from_peer { id: 1005 store_id: 1 } to_peer { id: 1007 + // store_id: 3 } message { msg_type: MsgRequestPreVote to: 1007 from: 1005 + // term: 6 log_term: 5 index: 5 commit: 5 commit_term: 5 } region_epoch { + // conf_ver: 3 version: 3 } end_key: 6B32'. + cluster.sim.wl().clear_recv_filters(3); + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + cluster.must_put(b"k1", b"v1"); +} + #[test] fn test_follower_slow_split() { let mut cluster = new_node_cluster(0, 3); From 503648f18312b8978f19b17f4e58b3f011bb3cb0 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 25 Aug 2023 18:42:35 +0800 Subject: [PATCH 009/203] *: add memory quota to resolved_ts::Resolver (#15400) ref tikv/tikv#14864 This is the first PR to fix OOM caused by Resolver tracking large txns. Resolver checks memory quota before tracking a lock, and returns false if it exceeds memory quota. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 19 ++- components/cdc/src/channel.rs | 87 ++------------ components/cdc/src/delegate.rs | 23 ++-- components/cdc/src/endpoint.rs | 49 ++++---- components/cdc/src/initializer.rs | 14 ++- components/cdc/src/lib.rs | 2 +- components/cdc/src/service.rs | 10 +- components/cdc/tests/mod.rs | 7 +- components/resolved_ts/src/endpoint.rs | 100 ++++++++++------ components/resolved_ts/src/resolver.rs | 87 +++++++++++--- components/server/src/server.rs | 9 +- components/server/src/server2.rs | 9 +- components/tikv_util/src/memory.rs | 113 +++++++++++++++++- 13 files changed, 347 insertions(+), 182 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index e92759bc2b22..ef6e24d9d8f0 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -9,7 +9,7 @@ use dashmap::{ use kvproto::metapb::Region; use raftstore::coprocessor::*; use resolved_ts::Resolver; -use tikv_util::{info, warn}; +use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; use crate::{debug, metrics::TRACK_REGION, utils}; @@ -401,7 +401,7 @@ impl<'a> SubscriptionRef<'a> { } } -/// This enhanced version of `Resolver` allow some unordered lock events. +/// This enhanced version of `Resolver` allow some unordered lock events. /// The name "2-phase" means this is used for 2 *concurrency* phases of /// observing a region: /// 1. Doing the initial scanning. @@ -479,7 +479,8 @@ impl TwoPhaseResolver { if !self.in_phase_one() { warn!("backup stream tracking lock as if in phase one"; "start_ts" => %start_ts, "key" => %utils::redact(&key)) } - self.resolver.track_lock(start_ts, key, None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(start_ts, key, None)); } pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec) { @@ -487,7 +488,8 @@ impl TwoPhaseResolver { self.future_locks.push(FutureLock::Lock(key, start_ts)); return; } - self.resolver.track_lock(start_ts, key, None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(start_ts, key, None)); } pub fn untrack_lock(&mut self, key: &[u8]) { @@ -501,7 +503,10 @@ impl TwoPhaseResolver { fn handle_future_lock(&mut self, lock: FutureLock) { match lock { - FutureLock::Lock(key, ts) => self.resolver.track_lock(ts, key, None), + FutureLock::Lock(key, ts) => { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(ts, key, None)); + } FutureLock::Unlock(key) => self.resolver.untrack_lock(&key, None), } } @@ -523,8 +528,10 @@ impl TwoPhaseResolver { } pub fn new(region_id: u64, stable_ts: Option) -> Self { + // TODO: limit the memory usage of the resolver. + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); Self { - resolver: Resolver::new(region_id), + resolver: Resolver::new(region_id, memory_quota), future_locks: Default::default(), stable_ts, } diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index b11799d87c11..6a8c3d5c3aa3 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -1,13 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, - time::Duration, -}; +use std::{fmt, sync::Arc, time::Duration}; use futures::{ channel::mpsc::{ @@ -20,7 +13,9 @@ use futures::{ use grpcio::WriteFlags; use kvproto::cdcpb::{ChangeDataEvent, Event, ResolvedTs}; use protobuf::Message; -use tikv_util::{future::block_on_timeout, impl_display_as_debug, time::Instant, warn}; +use tikv_util::{ + future::block_on_timeout, impl_display_as_debug, memory::MemoryQuota, time::Instant, warn, +}; use crate::metrics::*; @@ -185,71 +180,7 @@ impl EventBatcher { } } -#[derive(Clone)] -pub struct MemoryQuota { - capacity: Arc, - in_use: Arc, -} - -impl MemoryQuota { - pub fn new(capacity: usize) -> MemoryQuota { - MemoryQuota { - capacity: Arc::new(AtomicUsize::new(capacity)), - in_use: Arc::new(AtomicUsize::new(0)), - } - } - - pub fn in_use(&self) -> usize { - self.in_use.load(Ordering::Relaxed) - } - - pub(crate) fn capacity(&self) -> usize { - self.capacity.load(Ordering::Acquire) - } - - pub(crate) fn set_capacity(&self, capacity: usize) { - self.capacity.store(capacity, Ordering::Release) - } - - fn alloc(&self, bytes: usize) -> bool { - let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - let capacity = self.capacity.load(Ordering::Acquire); - loop { - if in_use_bytes + bytes > capacity { - return false; - } - let new_in_use_bytes = in_use_bytes + bytes; - match self.in_use.compare_exchange_weak( - in_use_bytes, - new_in_use_bytes, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => return true, - Err(current) => in_use_bytes = current, - } - } - } - - fn free(&self, bytes: usize) { - let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - loop { - // Saturating at the numeric bounds instead of overflowing. - let new_in_use_bytes = in_use_bytes - std::cmp::min(bytes, in_use_bytes); - match self.in_use.compare_exchange_weak( - in_use_bytes, - new_in_use_bytes, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => return, - Err(current) => in_use_bytes = current, - } - } - } -} - -pub fn channel(buffer: usize, memory_quota: MemoryQuota) -> (Sink, Drain) { +pub fn channel(buffer: usize, memory_quota: Arc) -> (Sink, Drain) { let (unbounded_sender, unbounded_receiver) = unbounded(); let (bounded_sender, bounded_receiver) = bounded(buffer); ( @@ -304,7 +235,7 @@ impl_from_future_send_error! { pub struct Sink { unbounded_sender: UnboundedSender<(CdcEvent, usize)>, bounded_sender: Sender<(CdcEvent, usize)>, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl Sink { @@ -354,7 +285,7 @@ impl Sink { pub struct Drain { unbounded_receiver: UnboundedReceiver<(CdcEvent, usize)>, bounded_receiver: Receiver<(CdcEvent, usize)>, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl<'a> Drain { @@ -451,7 +382,7 @@ mod tests { type Send = Box Result<(), SendError>>; fn new_test_channel(buffer: usize, capacity: usize, force_send: bool) -> (Send, Drain) { - let memory_quota = MemoryQuota::new(capacity); + let memory_quota = Arc::new(MemoryQuota::new(capacity)); let (mut tx, rx) = channel(buffer, memory_quota); let mut flag = true; let send = move |event| { @@ -599,7 +530,7 @@ mod tests { // 1KB let max_pending_bytes = 1024; let buffer = max_pending_bytes / event.size(); - let memory_quota = MemoryQuota::new(max_pending_bytes as _); + let memory_quota = Arc::new(MemoryQuota::new(max_pending_bytes as _)); let (tx, _rx) = channel(buffer as _, memory_quota); for _ in 0..buffer { tx.unbounded_send(CdcEvent::Event(e.clone()), false) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 4c8b2226f49d..da5c26aad30f 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -414,7 +414,10 @@ impl Delegate { for lock in mem::take(&mut pending.locks) { match lock { - PendingLock::Track { key, start_ts } => resolver.track_lock(start_ts, key, None), + PendingLock::Track { key, start_ts } => { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(resolver.track_lock(start_ts, key, None)); + } PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), } } @@ -822,7 +825,8 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - resolver.track_lock(row.start_ts.into(), row.key.clone(), None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(resolver.track_lock(row.start_ts.into(), row.key.clone(), None)); } None => { assert!(self.pending.is_some(), "region resolver not ready"); @@ -1151,9 +1155,10 @@ mod tests { use api_version::RawValue; use futures::{executor::block_on, stream::StreamExt}; use kvproto::{errorpb::Error as ErrorHeader, metapb::Region}; + use tikv_util::memory::MemoryQuota; use super::*; - use crate::channel::{channel, recv_timeout, MemoryQuota}; + use crate::channel::{channel, recv_timeout}; #[test] fn test_error() { @@ -1165,7 +1170,7 @@ mod tests { region.mut_region_epoch().set_conf_ver(2); let region_epoch = region.get_region_epoch().clone(); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (sink, mut drain) = crate::channel::channel(1, quota); let rx = drain.drain(); let request_id = 123; @@ -1182,7 +1187,8 @@ mod tests { let mut delegate = Delegate::new(region_id, Default::default()); delegate.subscribe(downstream).unwrap(); assert!(delegate.handle.is_observing()); - let resolver = Resolver::new(region_id); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(region_id, memory_quota); assert!(delegate.on_region_ready(resolver, region).is_empty()); assert!(delegate.downstreams()[0].observed_range.all_key_covered); @@ -1333,7 +1339,8 @@ mod tests { region.mut_region_epoch().set_conf_ver(1); region.mut_region_epoch().set_version(1); { - let failures = delegate.on_region_ready(Resolver::new(1), region); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let failures = delegate.on_region_ready(Resolver::new(1, memory_quota), region); assert_eq!(failures.len(), 1); let id = failures[0].0.id; delegate.unsubscribe(id, None); @@ -1456,7 +1463,7 @@ mod tests { } assert_eq!(map.len(), 5); - let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let (sink, mut drain) = channel(1, Arc::new(MemoryQuota::new(1024))); let downstream = Downstream { id: DownstreamId::new(), req_id: 1, @@ -1529,7 +1536,7 @@ mod tests { } assert_eq!(map.len(), 5); - let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let (sink, mut drain) = channel(1, Arc::new(MemoryQuota::new(1024))); let downstream = Downstream { id: DownstreamId::new(), req_id: 1, diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 23a3e4104677..72042bb5aecf 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -41,6 +41,7 @@ use tikv::{ }; use tikv_util::{ debug, defer, error, impl_display_as_debug, info, + memory::MemoryQuota, mpsc::bounded, slow_log, sys::thread::ThreadBuildWrapper, @@ -56,7 +57,7 @@ use tokio::{ use txn_types::{TimeStamp, TxnExtra, TxnExtraScheduler}; use crate::{ - channel::{CdcEvent, MemoryQuota, SendError}, + channel::{CdcEvent, SendError}, delegate::{on_init_downstream, Delegate, Downstream, DownstreamId, DownstreamState}, initializer::Initializer, metrics::*, @@ -370,7 +371,7 @@ pub struct Endpoint { scan_speed_limiter: Limiter, max_scan_batch_bytes: usize, max_scan_batch_size: usize, - sink_memory_quota: MemoryQuota, + sink_memory_quota: Arc, old_value_cache: OldValueCache, resolved_region_heap: RefCell, @@ -401,7 +402,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, security_mgr: Arc, - sink_memory_quota: MemoryQuota, + sink_memory_quota: Arc, causal_ts_provider: Option>, ) -> Endpoint { let workers = Builder::new_multi_thread() @@ -1455,7 +1456,7 @@ mod tests { ConcurrencyManager::new(1.into()), env, security_mgr, - MemoryQuota::new(usize::MAX), + Arc::new(MemoryQuota::new(usize::MAX)), causal_ts_provider, ); @@ -1476,7 +1477,7 @@ mod tests { let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -1732,7 +1733,7 @@ mod tests { #[test] fn test_raftstore_is_busy() { - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, _rx) = channel::channel(1, quota); let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); @@ -1785,7 +1786,7 @@ mod tests { }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -1966,7 +1967,7 @@ mod tests { let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); let mut region = Region::default(); @@ -1999,7 +2000,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(1); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(1, memory_quota); let observe_id = suite.endpoint.capture_regions[&1].handle.id; suite.on_region_ready(observe_id, resolver, region.clone()); suite.run(Task::MinTs { @@ -2035,7 +2037,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(2); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(2, memory_quota); region.set_id(2); let observe_id = suite.endpoint.capture_regions[&2].handle.id; suite.on_region_ready(observe_id, resolver, region); @@ -2056,7 +2059,7 @@ mod tests { } // Register region 3 to another conn which is not support batch resolved ts. - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx2) = channel::channel(1, quota); let mut rx2 = rx2.drain(); let mut region = Region::default(); @@ -2084,7 +2087,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(3); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(3, memory_quota); region.set_id(3); let observe_id = suite.endpoint.capture_regions[&3].handle.id; suite.on_region_ready(observe_id, resolver, region); @@ -2127,7 +2131,7 @@ mod tests { fn test_deregister() { let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -2279,7 +2283,7 @@ mod tests { // Open two connections a and b, registers region 1, 2 to conn a and // region 3 to conn b. let mut conn_rxs = vec![]; - let quota = channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); for region_ids in vec![vec![1, 2], vec![3]] { let (tx, rx) = channel::channel(1, quota.clone()); conn_rxs.push(rx); @@ -2311,7 +2315,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(region_id); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(region_id, memory_quota); let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; let mut region = Region::default(); region.set_id(region_id); @@ -2392,7 +2397,7 @@ mod tests { fn test_deregister_conn_then_delegate() { let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); // Open conn a let (tx1, _rx1) = channel::channel(1, quota.clone()); @@ -2470,10 +2475,11 @@ mod tests { let mut region = Region::default(); region.id = 1; region.set_region_epoch(region_epoch_2); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); suite.run(Task::ResolverReady { observe_id, region: region.clone(), - resolver: Resolver::new(1), + resolver: Resolver::new(1, memory_quota), }); // Deregister deletgate due to epoch not match for conn b. @@ -2557,7 +2563,7 @@ mod tests { ..Default::default() }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -2596,8 +2602,9 @@ mod tests { conn_id, }); - let mut resolver = Resolver::new(id); - resolver.track_lock(TimeStamp::compose(0, id), vec![], None); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(id, memory_quota); + assert!(resolver.track_lock(TimeStamp::compose(0, id), vec![], None)); let mut region = Region::default(); region.id = id; region.set_region_epoch(region_epoch); @@ -2646,7 +2653,7 @@ mod tests { }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 2c0884bb3035..44b564ce6632 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -36,6 +36,7 @@ use tikv_util::{ box_err, codec::number, debug, error, info, + memory::MemoryQuota, sys::inspector::{self_thread_inspector, ThreadInspector}, time::{Instant, Limiter}, warn, @@ -215,7 +216,9 @@ impl Initializer { "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); let mut resolver = if self.build_resolver { - Some(Resolver::new(region_id)) + // TODO: limit the memory usage of the resolver. + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + Some(Resolver::new(region_id, memory_quota)) } else { None }; @@ -418,7 +421,11 @@ impl Initializer { let key = Key::from_encoded_slice(encoded_key).into_raw().unwrap(); let lock = Lock::parse(value)?; match lock.lock_type { - LockType::Put | LockType::Delete => resolver.track_lock(lock.ts, key, None), + LockType::Put | LockType::Delete => { + // TODO: handle memory quota exceed, for now, quota is set to + // usize::MAX. + assert!(resolver.track_lock(lock.ts, key, None)); + } _ => (), }; } @@ -587,6 +594,7 @@ mod tests { TestEngineBuilder, }; use tikv_util::{ + memory::MemoryQuota, sys::thread::ThreadBuildWrapper, worker::{LazyWorker, Runnable}, }; @@ -629,7 +637,7 @@ mod tests { crate::channel::Drain, ) { let (receiver_worker, rx) = new_receiver_worker(); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (sink, drain) = crate::channel::channel(buffer, quota); let pool = Builder::new_multi_thread() diff --git a/components/cdc/src/lib.rs b/components/cdc/src/lib.rs index c913cefb92e1..64f110f5c456 100644 --- a/components/cdc/src/lib.rs +++ b/components/cdc/src/lib.rs @@ -15,7 +15,7 @@ mod old_value; mod service; mod txn_source; -pub use channel::{recv_timeout, CdcEvent, MemoryQuota}; +pub use channel::{recv_timeout, CdcEvent}; pub use config::CdcConfigManager; pub use delegate::Delegate; pub use endpoint::{CdcTxnExtraScheduler, Endpoint, Task, Validate}; diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index d07b5283380f..7478e3afbade 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -16,10 +16,10 @@ use kvproto::{ }, kvrpcpb::ApiVersion, }; -use tikv_util::{error, info, warn, worker::*}; +use tikv_util::{error, info, memory::MemoryQuota, warn, worker::*}; use crate::{ - channel::{channel, MemoryQuota, Sink, CDC_CHANNLE_CAPACITY}, + channel::{channel, Sink, CDC_CHANNLE_CAPACITY}, delegate::{Downstream, DownstreamId, DownstreamState, ObservedRange}, endpoint::{Deregister, Task}, }; @@ -244,14 +244,14 @@ impl EventFeedHeaders { #[derive(Clone)] pub struct Service { scheduler: Scheduler, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl Service { /// Create a ChangeData service. /// /// It requires a scheduler of an `Endpoint` in order to schedule tasks. - pub fn new(scheduler: Scheduler, memory_quota: MemoryQuota) -> Service { + pub fn new(scheduler: Scheduler, memory_quota: Arc) -> Service { Service { scheduler, memory_quota, @@ -518,7 +518,7 @@ mod tests { use crate::channel::{recv_timeout, CdcEvent}; fn new_rpc_suite(capacity: usize) -> (Server, ChangeDataClient, ReceiverWrapper) { - let memory_quota = MemoryQuota::new(capacity); + let memory_quota = Arc::new(MemoryQuota::new(capacity)); let (scheduler, rx) = dummy_scheduler(); let cdc_service = Service::new(scheduler, memory_quota); let env = Arc::new(EnvBuilder::new().build()); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index d2c4519d50de..ec479909793d 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -6,7 +6,7 @@ use std::{ }; use causal_ts::CausalTsProvider; -use cdc::{recv_timeout, CdcObserver, Delegate, FeatureGate, MemoryQuota, Task, Validate}; +use cdc::{recv_timeout, CdcObserver, Delegate, FeatureGate, Task, Validate}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; @@ -26,6 +26,7 @@ use test_raftstore::*; use tikv::{config::CdcConfig, server::DEFAULT_CLUSTER_ID, storage::kv::LocalTablets}; use tikv_util::{ config::ReadableDuration, + memory::MemoryQuota, worker::{LazyWorker, Runnable}, HandyRwLock, }; @@ -183,7 +184,7 @@ impl TestSuiteBuilder { .push(Box::new(move || { create_change_data(cdc::Service::new( scheduler.clone(), - MemoryQuota::new(memory_quota), + Arc::new(MemoryQuota::new(memory_quota)), )) })); sim.txn_extra_schedulers.insert( @@ -223,7 +224,7 @@ impl TestSuiteBuilder { cm.clone(), env, sim.security_mgr.clone(), - MemoryQuota::new(usize::MAX), + Arc::new(MemoryQuota::new(usize::MAX)), sim.get_causal_ts_provider(*id), ); let mut updated_cfg = cfg.clone(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 5d0dbdcd689a..36cd3030d2a2 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -28,6 +28,7 @@ use raftstore::{ use security::SecurityManager; use tikv::config::ResolvedTsConfig; use tikv_util::{ + memory::MemoryQuota, warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; @@ -80,9 +81,9 @@ struct ObserveRegion { } impl ObserveRegion { - fn new(meta: Region, rrp: Arc) -> Self { + fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { ObserveRegion { - resolver: Resolver::with_read_progress(meta.id, Some(rrp)), + resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota), meta, handle: ObserveHandle::new(), resolver_status: ResolverStatus::Pending { @@ -93,8 +94,8 @@ impl ObserveRegion { } } - fn read_progress(&self) -> &RegionReadProgress { - self.resolver.read_progress.as_ref().unwrap() + fn read_progress(&self) -> &Arc { + self.resolver.read_progress().unwrap() } fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> std::result::Result<(), String> { @@ -192,21 +193,29 @@ impl ObserveRegion { } }, ChangeLog::Rows { rows, index } => { - rows.iter().for_each(|row| match row { - ChangeRow::Prewrite { key, start_ts, .. } => self - .resolver - .track_lock(*start_ts, key.to_raw().unwrap(), Some(*index)), - ChangeRow::Commit { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(*index)), - // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => { - self.resolver.update_tracked_index(*index); - } - ChangeRow::IngestSsT => { - self.resolver.update_tracked_index(*index); + for row in rows { + match row { + ChangeRow::Prewrite { key, start_ts, .. } => { + if !self.resolver.track_lock( + *start_ts, + key.to_raw().unwrap(), + Some(*index), + ) { + return Err("memory quota exceed".to_owned()); + } + } + ChangeRow::Commit { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(*index)), + // One pc command do not contains any lock, so just skip it + ChangeRow::OnePc { .. } => { + self.resolver.update_tracked_index(*index); + } + ChangeRow::IngestSsT => { + self.resolver.update_tracked_index(*index); + } } - }); + } } } } @@ -215,7 +224,10 @@ impl ObserveRegion { Ok(()) } - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) { + /// Track locks in incoming scan entries. + /// Return false if resolver exceeds memory quota. + #[must_use] + fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> bool { for es in entries { match es { ScanEntry::Lock(locks) => { @@ -223,8 +235,13 @@ impl ObserveRegion { panic!("region {:?} resolver has ready", self.meta.id) } for (key, lock) in locks { - self.resolver - .track_lock(lock.ts, key.to_raw().unwrap(), Some(apply_index)); + if !self.resolver.track_lock( + lock.ts, + key.to_raw().unwrap(), + Some(apply_index), + ) { + return false; + } } } ScanEntry::None => { @@ -237,18 +254,25 @@ impl ObserveRegion { tracked_index, .. } => { - locks.into_iter().for_each(|lock| match lock { - PendingLock::Track { key, start_ts } => { - self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(tracked_index), - ) + for lock in locks { + match lock { + PendingLock::Track { key, start_ts } => { + if !self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(tracked_index), + ) { + return false; + } + } + PendingLock::Untrack { key, .. } => { + self.resolver.untrack_lock( + &key.to_raw().unwrap(), + Some(tracked_index), + ) + } } - PendingLock::Untrack { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(tracked_index)), - }); + } tracked_index } ResolverStatus::Ready => { @@ -266,12 +290,14 @@ impl ObserveRegion { ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), } } + true } } pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, + memory_quota: Arc, advance_notify: Arc, store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, @@ -321,6 +347,8 @@ where let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), + // TODO: add memory quota to config. + memory_quota: Arc::new(MemoryQuota::new(std::usize::MAX)), advance_notify: Arc::new(Notify::new()), scheduler, store_meta, @@ -343,7 +371,7 @@ where "register observe region"; "region" => ?region ); - ObserveRegion::new(region.clone(), read_progress) + ObserveRegion::new(region.clone(), read_progress, self.memory_quota.clone()) } else { warn!( "try register unexit region"; @@ -537,6 +565,7 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. drop(observe_region); self.re_register_region(region_id, observe_id, e); } @@ -561,7 +590,8 @@ where match self.regions.get_mut(®ion_id) { Some(observe_region) => { if observe_region.handle.id == observe_id { - observe_region.track_scan_locks(entries, apply_index); + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(observe_region.track_scan_locks(entries, apply_index)); } } None => { @@ -904,7 +934,7 @@ where .next() .cloned() .map(TimeStamp::into_inner); - lock_num = Some(ob.resolver.locks_by_key.len()); + lock_num = Some(ob.resolver.num_locks()); } info!( "the max gap of safe-ts is large"; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 799c5584723b..4b04bf02322e 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -4,7 +4,10 @@ use std::{cmp, collections::BTreeMap, sync::Arc}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; -use tikv_util::time::Instant; +use tikv_util::{ + memory::{HeapSize, MemoryQuota}, + time::Instant, +}; use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; @@ -16,7 +19,7 @@ const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub struct Resolver { region_id: u64, // key -> start_ts - pub(crate) locks_by_key: HashMap, TimeStamp>, + locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, // The timestamps that guarantees no more commit will happen before. @@ -24,11 +27,14 @@ pub struct Resolver { // The highest index `Resolver` had been tracked tracked_index: u64, // The region read progress used to utilize `resolved_ts` to serve stale read request - pub(crate) read_progress: Option>, + read_progress: Option>, // The timestamps that advance the resolved_ts when there is no more write. min_ts: TimeStamp, // Whether the `Resolver` is stopped stopped: bool, + + // The memory quota for the `Resolver` and its lock keys and timestamps. + memory_quota: Arc, } impl std::fmt::Debug for Resolver { @@ -39,27 +45,38 @@ impl std::fmt::Debug for Resolver { if let Some((ts, keys)) = far_lock { dt.field(&format_args!( - "far_lock={:?}", + "oldest_lock={:?}", keys.iter() // We must use Display format here or the redact won't take effect. .map(|k| format!("{}", log_wrappers::Value::key(k))) .collect::>() )); - dt.field(&format_args!("far_lock_ts={:?}", ts)); + dt.field(&format_args!("oldest_lock_ts={:?}", ts)); } dt.finish() } } +impl Drop for Resolver { + fn drop(&mut self) { + // Free memory quota used by locks_by_key. + for key in self.locks_by_key.keys() { + let bytes = key.heap_size(); + self.memory_quota.free(bytes); + } + } +} + impl Resolver { - pub fn new(region_id: u64) -> Resolver { - Resolver::with_read_progress(region_id, None) + pub fn new(region_id: u64, memory_quota: Arc) -> Resolver { + Resolver::with_read_progress(region_id, None, memory_quota) } pub fn with_read_progress( region_id: u64, read_progress: Option>, + memory_quota: Arc, ) -> Resolver { Resolver { region_id, @@ -70,6 +87,7 @@ impl Resolver { tracked_index: 0, min_ts: TimeStamp::zero(), stopped: false, + memory_quota, } } @@ -87,11 +105,9 @@ impl Resolver { pub fn size(&self) -> usize { self.locks_by_key.keys().map(|k| k.len()).sum::() - + self - .lock_ts_heap - .values() - .map(|h| h.iter().map(|k| k.len()).sum::()) - .sum::() + + self.locks_by_key.len() * std::mem::size_of::() + + self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) } pub fn locks(&self) -> &BTreeMap>> { @@ -115,7 +131,8 @@ impl Resolver { self.tracked_index = index; } - pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) { + #[must_use] + pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { self.update_tracked_index(index); } @@ -125,9 +142,14 @@ impl Resolver { start_ts, self.region_id ); + let bytes = key.as_slice().heap_size(); + if !self.memory_quota.alloc(bytes) { + return false; + } let key: Arc<[u8]> = key.into_boxed_slice().into(); self.locks_by_key.insert(key.clone(), start_ts); self.lock_ts_heap.entry(start_ts).or_default().insert(key); + true } pub fn untrack_lock(&mut self, key: &[u8], index: Option) { @@ -135,6 +157,8 @@ impl Resolver { self.update_tracked_index(index); } let start_ts = if let Some(start_ts) = self.locks_by_key.remove(key) { + let bytes = key.heap_size(); + self.memory_quota.free(bytes); start_ts } else { debug!("untrack a lock that was not tracked before"; "key" => &log_wrappers::Value::key(key)); @@ -230,6 +254,10 @@ impl Resolver { pub(crate) fn num_transactions(&self) -> u64 { self.lock_ts_heap.len() as u64 } + + pub(crate) fn read_progress(&self) -> Option<&Arc> { + self.read_progress.as_ref() + } } #[cfg(test)] @@ -300,11 +328,16 @@ mod tests { ]; for (i, case) in cases.into_iter().enumerate() { - let mut resolver = Resolver::new(1); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); for e in case.clone() { match e { Event::Lock(start_ts, key) => { - resolver.track_lock(start_ts.into(), key.into_raw().unwrap(), None) + assert!(resolver.track_lock( + start_ts.into(), + key.into_raw().unwrap(), + None + )); } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { @@ -319,4 +352,28 @@ mod tests { } } } + + #[test] + fn test_memory_quota() { + let memory_quota = Arc::new(MemoryQuota::new(1024)); + let mut resolver = Resolver::new(1, memory_quota.clone()); + let mut key = vec![0; 77]; + let mut ts = TimeStamp::default(); + while resolver.track_lock(ts, key.clone(), None) { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + } + let remain = 1024 % key.len(); + assert_eq!(memory_quota.in_use(), 1024 - remain); + + let mut ts = TimeStamp::default(); + for _ in 0..5 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + assert_eq!(memory_quota.in_use(), 1024 - 5 * key.len() - remain); + drop(resolver); + assert_eq!(memory_quota.in_use(), 0); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 7ff51474d7dc..57afb85d5b57 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -28,7 +28,7 @@ use backup_stream::{ BackupStreamResolver, }; use causal_ts::CausalTsProviderImpl; -use cdc::{CdcConfigManager, MemoryQuota}; +use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; @@ -108,6 +108,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, @@ -266,7 +267,7 @@ struct Servers { node: Node, importer: Arc, cdc_scheduler: tikv_util::worker::Scheduler, - cdc_memory_quota: MemoryQuota, + cdc_memory_quota: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, backup_stream_scheduler: Option>, debugger: DebuggerImpl>, LockManager, F>, @@ -986,7 +987,9 @@ where } // Start CDC. - let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index fe2b685313e0..32d7ab14da99 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -32,7 +32,7 @@ use backup_stream::{ BackupStreamResolver, }; use causal_ts::CausalTsProviderImpl; -use cdc::{CdcConfigManager, MemoryQuota}; +use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_DEFAULT, CF_WRITE}; @@ -106,6 +106,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, @@ -243,7 +244,7 @@ struct TikvServer { env: Arc, cdc_worker: Option>>, cdc_scheduler: Option>, - cdc_memory_quota: Option, + cdc_memory_quota: Option>, backup_stream_scheduler: Option>, sst_worker: Option>>, quota_limiter: Arc, @@ -637,7 +638,9 @@ where Box::new(CdcConfigManager(cdc_scheduler.clone())), ); // Start cdc endpoint. - let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 0a2f49461c5c..17b6b23cf788 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -1,6 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::mem; +use std::{ + mem, + sync::atomic::{AtomicUsize, Ordering}, +}; use kvproto::{ encryptionpb::EncryptionMeta, @@ -28,6 +31,12 @@ pub trait HeapSize { } } +impl HeapSize for [u8] { + fn heap_size(&self) -> usize { + self.len() * mem::size_of::() + } +} + impl HeapSize for Region { fn heap_size(&self) -> usize { let mut size = self.start_key.capacity() + self.end_key.capacity(); @@ -65,3 +74,105 @@ impl HeapSize for RaftCmdRequest { + mem::size_of_val(&self.status_request) } } + +pub struct MemoryQuota { + capacity: AtomicUsize, + in_use: AtomicUsize, +} + +impl MemoryQuota { + pub fn new(capacity: usize) -> MemoryQuota { + MemoryQuota { + capacity: AtomicUsize::new(capacity), + in_use: AtomicUsize::new(0), + } + } + + pub fn in_use(&self) -> usize { + self.in_use.load(Ordering::Relaxed) + } + + pub fn capacity(&self) -> usize { + self.capacity.load(Ordering::Acquire) + } + + pub fn set_capacity(&self, capacity: usize) { + self.capacity.store(capacity, Ordering::Release) + } + + pub fn alloc(&self, bytes: usize) -> bool { + let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); + let capacity = self.capacity.load(Ordering::Acquire); + loop { + if in_use_bytes + bytes > capacity { + return false; + } + let new_in_use_bytes = in_use_bytes + bytes; + match self.in_use.compare_exchange_weak( + in_use_bytes, + new_in_use_bytes, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => return true, + Err(current) => in_use_bytes = current, + } + } + } + + pub fn free(&self, bytes: usize) { + let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); + loop { + // Saturating at the numeric bounds instead of overflowing. + let new_in_use_bytes = in_use_bytes - std::cmp::min(bytes, in_use_bytes); + match self.in_use.compare_exchange_weak( + in_use_bytes, + new_in_use_bytes, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => return, + Err(current) => in_use_bytes = current, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memory_quota() { + let quota = MemoryQuota::new(100); + assert!(quota.alloc(10)); + assert_eq!(quota.in_use(), 10); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 10); + quota.free(5); + assert_eq!(quota.in_use(), 5); + assert!(quota.alloc(95)); + assert_eq!(quota.in_use(), 100); + quota.free(95); + assert_eq!(quota.in_use(), 5); + } + + #[test] + fn test_resize_memory_quota() { + let quota = MemoryQuota::new(100); + assert!(quota.alloc(10)); + assert_eq!(quota.in_use(), 10); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 10); + quota.set_capacity(200); + assert!(quota.alloc(100)); + assert_eq!(quota.in_use(), 110); + quota.set_capacity(50); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 110); + quota.free(100); + assert_eq!(quota.in_use(), 10); + assert!(quota.alloc(40)); + assert_eq!(quota.in_use(), 50); + } +} From f3b5bf51e9105fb5685ef23e454301f48fd27caf Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 28 Aug 2023 11:30:36 +0800 Subject: [PATCH 010/203] config: support changed adjust max-background-compactions dynamically (#15425) close tikv/tikv#15424 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/common.rs | 6 ++++- components/server/src/server.rs | 6 ++++- components/server/src/server2.rs | 6 ++++- src/config/mod.rs | 42 +++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 165a1c8509ec..c8cf879d9052 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -762,7 +762,11 @@ impl ConfiguredRaftEngine for RocksEngine { fn register_config(&self, cfg_controller: &mut ConfigController) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + self.clone(), + DbType::Raft, + )), ); } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 57afb85d5b57..72f7b9369568 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1552,7 +1552,11 @@ impl TikvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new(kv_engine.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + kv_engine.clone(), + DbType::Kv, + )), ); let reg = TabletRegistry::new( Box::new(SingletonFactory::new(kv_engine)), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 32d7ab14da99..1289ffe848d6 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1485,7 +1485,11 @@ impl TikvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new(registry.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + registry.clone(), + DbType::Kv, + )), ); self.tablet_registry = Some(registry.clone()); raft_engine.register_config(cfg_controller); diff --git a/src/config/mod.rs b/src/config/mod.rs index f7c338379ef2..38369b3ee93a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1903,13 +1903,14 @@ pub enum DbType { } pub struct DbConfigManger { + cfg: DbConfig, db: D, db_type: DbType, } impl DbConfigManger { - pub fn new(db: D, db_type: DbType) -> Self { - DbConfigManger { db, db_type } + pub fn new(cfg: DbConfig, db: D, db_type: DbType) -> Self { + DbConfigManger { cfg, db, db_type } } } @@ -1944,10 +1945,31 @@ impl DbConfigManger { _ => Err(format!("invalid cf {:?} for db {:?}", cf, self.db_type).into()), } } + + fn update_background_cfg( + &self, + max_background_jobs: i32, + max_background_flushes: i32, + ) -> Result<(), Box> { + assert!(max_background_jobs > 0 && max_background_flushes > 0); + let max_background_compacts = + std::cmp::max(max_background_jobs - max_background_flushes, 1); + self.db + .set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; + self.db.set_db_config(&[( + "max_background_flushes", + &max_background_flushes.to_string(), + )])?; + self.db.set_db_config(&[( + "max_background_compactions", + &max_background_compacts.to_string(), + )]) + } } impl ConfigManager for DbConfigManger { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { + self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); @@ -2011,8 +2033,7 @@ impl ConfigManager for DbConfigManger { .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); - self.db - .set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; + self.update_background_cfg(max_background_jobs, self.cfg.max_background_flushes)?; } if let Some(background_subcompactions_config) = change @@ -2029,10 +2050,7 @@ impl ConfigManager for DbConfigManger { .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); - self.db.set_db_config(&[( - "max_background_flushes", - &max_background_flushes.to_string(), - )])?; + self.update_background_cfg(self.cfg.max_background_jobs, max_background_flushes)?; } if !change.is_empty() { @@ -4958,7 +4976,11 @@ mod tests { let cfg_controller = ConfigController::new(cfg); cfg_controller.register( Module::Rocksdb, - Box::new(DbConfigManger::new(engine.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + engine.clone(), + DbType::Kv, + )), ); let (scheduler, receiver) = dummy_scheduler(); cfg_controller.register( @@ -5108,6 +5130,7 @@ mod tests { .update_config("rocksdb.max-background-jobs", "8") .unwrap(); assert_eq!(db.get_db_options().get_max_background_jobs(), 8); + assert_eq!(db.get_db_options().get_max_background_compactions(), 6); // update max_background_flushes, set to a bigger value assert_eq!(db.get_db_options().get_max_background_flushes(), 2); @@ -5116,6 +5139,7 @@ mod tests { .update_config("rocksdb.max-background-flushes", "5") .unwrap(); assert_eq!(db.get_db_options().get_max_background_flushes(), 5); + assert_eq!(db.get_db_options().get_max_background_compactions(), 3); // update rate_bytes_per_sec assert_eq!( From e5efbe697455bd7814c6979df06a8ccf0189909a Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 28 Aug 2023 15:53:06 +0800 Subject: [PATCH 011/203] raftstore-v2: enable failpoint for raftstore v2 in stale-peer (#15421) ref tikv/tikv#15409 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- .../src/operation/command/admin/conf_change.rs | 9 +++++++++ components/raftstore/src/store/fsm/apply.rs | 4 ++-- tests/failpoints/cases/test_stale_peer.rs | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 2bd06fca6c24..c7b8481aa7cf 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -10,6 +10,7 @@ use std::time::Instant; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use fail::fail_point; use kvproto::{ metapb::{self, PeerRole}, raft_cmdpb::{AdminRequest, AdminResponse, ChangePeerRequest, RaftCmdRequest}, @@ -392,6 +393,14 @@ impl Apply { match change_type { ConfChangeType::AddNode => { + let add_node_fp = || { + fail_point!( + "apply_on_add_node_1_2", + self.peer_id() == 2 && self.region_id() == 1, + |_| {} + ) + }; + add_node_fp(); PEER_ADMIN_CMD_COUNTER_VEC .with_label_values(&["add_peer", "all"]) .inc(); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e2b1cedc88d9..0bc1ccf7d85d 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2114,14 +2114,14 @@ where match change_type { ConfChangeType::AddNode => { - let add_ndoe_fp = || { + let add_node_fp = || { fail_point!( "apply_on_add_node_1_2", self.id() == 2 && self.region_id() == 1, |_| {} ) }; - add_ndoe_fp(); + add_node_fp(); PEER_ADMIN_CMD_COUNTER_VEC .with_label_values(&["add_peer", "all"]) diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 39fa09ef014d..80c73f03a163 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -12,6 +12,7 @@ use kvproto::raft_serverpb::{PeerState, RaftLocalState, RaftMessage}; use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; #[test] @@ -44,7 +45,8 @@ fn test_one_node_leader_missing() { fail::remove(check_stale_state); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_update_localreader_after_removed() { let mut cluster = new_node_cluster(0, 6); let pd_client = cluster.pd_client.clone(); @@ -90,7 +92,8 @@ fn test_node_update_localreader_after_removed() { cluster.must_region_not_exist(r1, 2); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_stale_learner_restart() { let mut cluster = new_node_cluster(0, 2); cluster.pd_client.disable_default_operator(); @@ -133,9 +136,11 @@ fn test_stale_learner_restart() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } +/// pass /// Test if a peer can be destroyed through tombstone msg when applying /// snapshot. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_stale_peer_destroy_when_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); @@ -210,9 +215,11 @@ fn test_stale_peer_destroy_when_apply_snapshot() { must_get_none(&cluster.get_engine(3), b"k1"); } +/// pass /// Test if destroy a uninitialized peer through tombstone msg would allow a /// staled peer be created again. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { // 4 stores cluster. let mut cluster = new_node_cluster(0, 4); @@ -291,7 +298,8 @@ fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { /// Logs scan are now moved to raftlog gc threads. The case is to test if logs /// are still cleaned up when there is stale logs before first index during /// destroy. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_destroy_clean_up_logs_with_unfinished_log_gc() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(15); From c66bfe87c17a2892c5d7440cd30d17147b3fff15 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 29 Aug 2023 17:03:38 +0800 Subject: [PATCH 012/203] resolved_ts: re-register region if memory quota exceeded (#15411) close tikv/tikv#14864 Fix resolved ts OOM caused by Resolver tracking large txns. `ObserveRegion` is deregistered if it exceeds memory quota. It may cause higher CPU usage because of scanning locks, but it's better than OOM. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/endpoint.rs | 98 ++++++++++++------- components/resolved_ts/src/errors.rs | 53 +--------- components/resolved_ts/src/resolver.rs | 57 ++++++++--- components/resolved_ts/src/scanner.rs | 61 +++++++----- .../resolved_ts/tests/integrations/mod.rs | 92 ++++++++++++++++- components/resolved_ts/tests/mod.rs | 15 ++- src/config/mod.rs | 2 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 9 files changed, 254 insertions(+), 126 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 36cd3030d2a2..3c1ad9d8c8d2 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -41,10 +41,12 @@ use crate::{ metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, + Error, Result, }; /// grace period for logging safe-ts and resolved-ts gap in slow log const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; +const MEMORY_QUOTA_EXCEEDED_BACKOFF: Duration = Duration::from_secs(30); enum ResolverStatus { Pending { @@ -98,7 +100,7 @@ impl ObserveRegion { self.resolver.read_progress().unwrap() } - fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> std::result::Result<(), String> { + fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> Result<()> { match &mut self.resolver_status { ResolverStatus::Pending { locks, @@ -119,7 +121,7 @@ impl ObserveRegion { // TODO: for admin cmd that won't change the region meta like peer list // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to // return error - return Err(format!( + return Err(box_err!( "region met admin command {:?} while initializing resolver", req_type )); @@ -201,7 +203,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(*index), ) { - return Err("memory quota exceed".to_owned()); + return Err(Error::MemoryQuotaExceeded); } } ChangeRow::Commit { key, .. } => self @@ -225,9 +227,7 @@ impl ObserveRegion { } /// Track locks in incoming scan entries. - /// Return false if resolver exceeds memory quota. - #[must_use] - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> bool { + fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> Result<()> { for es in entries { match es { ScanEntry::Lock(locks) => { @@ -240,7 +240,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(apply_index), ) { - return false; + return Err(Error::MemoryQuotaExceeded); } } } @@ -262,7 +262,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(tracked_index), ) { - return false; + return Err(Error::MemoryQuotaExceeded); } } PendingLock::Untrack { key, .. } => { @@ -290,7 +290,7 @@ impl ObserveRegion { ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), } } - true + Ok(()) } } @@ -347,8 +347,7 @@ where let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), - // TODO: add memory quota to config. - memory_quota: Arc::new(MemoryQuota::new(std::usize::MAX)), + memory_quota: Arc::new(MemoryQuota::new(cfg.memory_quota.0 as usize)), advance_notify: Arc::new(Notify::new()), scheduler, store_meta, @@ -362,7 +361,7 @@ where ep } - fn register_region(&mut self, region: Region) { + fn register_region(&mut self, region: Region, backoff: Option) { let region_id = region.get_id(); assert!(self.regions.get(®ion_id).is_none()); let observe_region = { @@ -390,7 +389,7 @@ where .update_advance_resolved_ts_notify(self.advance_notify.clone()); self.regions.insert(region_id, observe_region); - let scan_task = self.build_scan_task(region, observe_handle, cancelled); + let scan_task = self.build_scan_task(region, observe_handle, cancelled, backoff); self.scanner_pool.spawn_task(scan_task); RTS_SCAN_TASKS.with_label_values(&["total"]).inc(); } @@ -400,6 +399,7 @@ where region: Region, observe_handle: ObserveHandle, cancelled: Arc, + backoff: Option, ) -> ScanTask { let scheduler = self.scheduler.clone(); let scheduler_error = self.scheduler.clone(); @@ -411,6 +411,7 @@ where mode: ScanMode::LockOnly, region, checkpoint_ts: TimeStamp::zero(), + backoff, is_cancelled: Box::new(move || cancelled.load(Ordering::Acquire)), send_entries: Box::new(move |entries, apply_index| { scheduler @@ -424,13 +425,16 @@ where RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); }), on_error: Some(Box::new(move |observe_id, _region, e| { - scheduler_error - .schedule(Task::ReRegisterRegion { - region_id, - observe_id, - cause: format!("met error while handle scan task {:?}", e), - }) - .unwrap_or_else(|schedule_err| warn!("schedule re-register task failed"; "err" => ?schedule_err, "re_register_cause" => ?e)); + if let Err(e) = scheduler_error.schedule(Task::ReRegisterRegion { + region_id, + observe_id, + cause: e, + }) { + warn!("schedule re-register task failed"; + "region_id" => region_id, + "observe_id" => ?observe_id, + "error" => ?e); + } RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); })), } @@ -476,7 +480,7 @@ where // the `Resolver`'s lock heap // - `PrepareMerge` and `RollbackMerge`, the key range is unchanged self.deregister_region(region_id); - self.register_region(incoming_region); + self.register_region(incoming_region, None); } } @@ -507,7 +511,13 @@ where } // Deregister current observed region and try to register it again. - fn re_register_region(&mut self, region_id: u64, observe_id: ObserveId, cause: String) { + fn re_register_region( + &mut self, + region_id: u64, + observe_id: ObserveId, + cause: Error, + backoff: Option, + ) { if let Some(observe_region) = self.regions.get(®ion_id) { if observe_region.handle.id != observe_id { warn!("resolved ts deregister region failed due to observe_id not match"); @@ -518,7 +528,7 @@ where "register region again"; "region_id" => region_id, "observe_id" => ?observe_id, - "cause" => cause + "cause" => ?cause ); self.deregister_region(region_id); let region; @@ -529,7 +539,7 @@ where None => return, } } - self.register_region(region); + self.register_region(region, backoff); } } @@ -565,9 +575,12 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. drop(observe_region); - self.re_register_region(region_id, observe_id, e); + let backoff = match e { + Error::MemoryQuotaExceeded => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), + Error::Other(_) => None, + }; + self.re_register_region(region_id, observe_id, e, backoff); } } else { debug!("resolved ts CmdBatch discarded"; @@ -587,16 +600,23 @@ where entries: Vec, apply_index: u64, ) { - match self.regions.get_mut(®ion_id) { - Some(observe_region) => { - if observe_region.handle.id == observe_id { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(observe_region.track_scan_locks(entries, apply_index)); + let mut is_memory_quota_exceeded = false; + if let Some(observe_region) = self.regions.get_mut(®ion_id) { + if observe_region.handle.id == observe_id { + if let Err(Error::MemoryQuotaExceeded) = + observe_region.track_scan_locks(entries, apply_index) + { + is_memory_quota_exceeded = true; } } - None => { - debug!("scan locks region not exist"; "region_id" => region_id, "observe_id" => ?observe_id); - } + } else { + debug!("scan locks region not exist"; + "region_id" => region_id, + "observe_id" => ?observe_id); + } + if is_memory_quota_exceeded { + let backoff = Some(MEMORY_QUOTA_EXCEEDED_BACKOFF); + self.re_register_region(region_id, observe_id, Error::MemoryQuotaExceeded, backoff); } } @@ -616,6 +636,8 @@ where warn!("resolved-ts config fails"; "error" => ?e); } else { self.advance_notify.notify_waiters(); + self.memory_quota + .set_capacity(self.cfg.memory_quota.0 as usize); info!( "resolved-ts config changed"; "prev" => prev, @@ -668,7 +690,7 @@ pub enum Task { ReRegisterRegion { region_id: u64, observe_id: ObserveId, - cause: String, + cause: Error, }, AdvanceResolvedTs { leader_resolver: LeadershipResolver, @@ -780,13 +802,13 @@ where match task { Task::RegionDestroyed(region) => self.region_destroyed(region), Task::RegionUpdated(region) => self.region_updated(region), - Task::RegisterRegion { region } => self.register_region(region), + Task::RegisterRegion { region } => self.register_region(region, None), Task::DeRegisterRegion { region_id } => self.deregister_region(region_id), Task::ReRegisterRegion { region_id, observe_id, cause, - } => self.re_register_region(region_id, observe_id, cause), + } => self.re_register_region(region_id, observe_id, cause, None), Task::AdvanceResolvedTs { leader_resolver } => { self.handle_advance_resolved_ts(leader_resolver) } @@ -897,7 +919,7 @@ where unresolved_count += 1; } ResolverStatus::Ready { .. } => { - lock_heap_size += observe_region.resolver.size(); + lock_heap_size += observe_region.resolver.approximate_heap_bytes(); resolved_count += 1; } } diff --git a/components/resolved_ts/src/errors.rs b/components/resolved_ts/src/errors.rs index d9845440c079..b4a59a2c7a0b 100644 --- a/components/resolved_ts/src/errors.rs +++ b/components/resolved_ts/src/errors.rs @@ -1,62 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::io::Error as IoError; - -use engine_traits::Error as EngineTraitsError; -use kvproto::errorpb::Error as ErrorHeader; -use raftstore::Error as RaftstoreError; use thiserror::Error; -use tikv::storage::{ - kv::{Error as KvError, ErrorInner as EngineErrorInner}, - mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, - txn::{Error as TxnError, ErrorInner as TxnErrorInner}, -}; -use txn_types::Error as TxnTypesError; #[derive(Debug, Error)] pub enum Error { - #[error("IO error {0}")] - Io(#[from] IoError), - #[error("Engine error {0}")] - Kv(#[from] KvError), - #[error("Transaction error {0}")] - Txn(#[from] TxnError), - #[error("Mvcc error {0}")] - Mvcc(#[from] MvccError), - #[error("Request error {0:?}")] - Request(Box), - #[error("Engine traits error {0}")] - EngineTraits(#[from] EngineTraitsError), - #[error("Txn types error {0}")] - TxnTypes(#[from] TxnTypesError), - #[error("Raftstore error {0}")] - Raftstore(#[from] RaftstoreError), + #[error("Memory quota exceeded")] + MemoryQuotaExceeded, #[error("Other error {0}")] Other(#[from] Box), } -impl Error { - pub fn request(err: ErrorHeader) -> Error { - Error::Request(Box::new(err)) - } - - pub fn extract_error_header(self) -> ErrorHeader { - match self { - Error::Kv(KvError(box EngineErrorInner::Request(e))) - | Error::Txn(TxnError(box TxnErrorInner::Engine(KvError( - box EngineErrorInner::Request(e), - )))) - | Error::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::Kv( - KvError(box EngineErrorInner::Request(e)), - ))))) - | Error::Request(box e) => e, - other => { - let mut e = ErrorHeader::default(); - e.set_message(format!("{:?}", other)); - e - } - } - } -} - pub type Result = std::result::Result; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 4b04bf02322e..1b0a07bf8e2e 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,6 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; +const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -61,10 +62,19 @@ impl std::fmt::Debug for Resolver { impl Drop for Resolver { fn drop(&mut self) { // Free memory quota used by locks_by_key. + let mut bytes = 0; + let num_locks = self.num_locks(); for key in self.locks_by_key.keys() { - let bytes = key.heap_size(); - self.memory_quota.free(bytes); + bytes += self.lock_heap_size(key); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("drop huge resolver"; + "region_id" => self.region_id, + "bytes" => bytes, + "num_locks" => num_locks, + ); } + self.memory_quota.free(bytes); } } @@ -103,13 +113,6 @@ impl Resolver { self.stopped } - pub fn size(&self) -> usize { - self.locks_by_key.keys().map(|k| k.len()).sum::() - + self.locks_by_key.len() * std::mem::size_of::() - + self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) - } - pub fn locks(&self) -> &BTreeMap>> { &self.lock_ts_heap } @@ -131,6 +134,33 @@ impl Resolver { self.tracked_index = index; } + // Return an approximate heap memory usage in bytes. + pub fn approximate_heap_bytes(&self) -> usize { + // memory used by locks_by_key. + let memory_quota_in_use = self.memory_quota.in_use(); + + // memory used by lock_ts_heap. + let memory_lock_ts_heap = self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) + // memory used by HashSet> + + self.locks_by_key.len() * std::mem::size_of::>(); + + memory_quota_in_use + memory_lock_ts_heap + } + + fn lock_heap_size(&self, key: &[u8]) -> usize { + // A resolver has + // * locks_by_key: HashMap, TimeStamp> + // * lock_ts_heap: BTreeMap>> + // + // We only count memory used by locks_by_key. Because the majority of + // memory is consumed by keys, locks_by_key and lock_ts_heap shares + // the same Arc<[u8]>, so lock_ts_heap is negligible. Also, it's hard to + // track accurate memory usage of lock_ts_heap as a timestamp may have + // many keys. + key.heap_size() + std::mem::size_of::() + } + #[must_use] pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { @@ -142,7 +172,7 @@ impl Resolver { start_ts, self.region_id ); - let bytes = key.as_slice().heap_size(); + let bytes = self.lock_heap_size(&key); if !self.memory_quota.alloc(bytes) { return false; } @@ -157,7 +187,7 @@ impl Resolver { self.update_tracked_index(index); } let start_ts = if let Some(start_ts) = self.locks_by_key.remove(key) { - let bytes = key.heap_size(); + let bytes = self.lock_heap_size(key); self.memory_quota.free(bytes); start_ts } else { @@ -358,12 +388,13 @@ mod tests { let memory_quota = Arc::new(MemoryQuota::new(1024)); let mut resolver = Resolver::new(1, memory_quota.clone()); let mut key = vec![0; 77]; + let lock_size = resolver.lock_heap_size(&key); let mut ts = TimeStamp::default(); while resolver.track_lock(ts, key.clone(), None) { ts.incr(); key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); } - let remain = 1024 % key.len(); + let remain = 1024 % lock_size; assert_eq!(memory_quota.in_use(), 1024 - remain); let mut ts = TimeStamp::default(); @@ -372,7 +403,7 @@ mod tests { key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); resolver.untrack_lock(&key, None); } - assert_eq!(memory_quota.in_use(), 1024 - 5 * key.len() - remain); + assert_eq!(memory_quota.in_use(), 1024 - 5 * lock_size - remain); drop(resolver); assert_eq!(memory_quota.in_use(), 0); } diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 0ca74bda29da..e8665e9d8609 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -45,6 +45,7 @@ pub struct ScanTask { pub mode: ScanMode, pub region: Region, pub checkpoint_ts: TimeStamp, + pub backoff: Option, pub is_cancelled: IsCancelledCallback, pub send_entries: OnEntriesCallback, pub on_error: Option, @@ -84,6 +85,18 @@ impl, E: KvEngine> ScannerPool { pub fn spawn_task(&self, mut task: ScanTask) { let cdc_handle = self.cdc_handle.clone(); let fut = async move { + if let Some(backoff) = task.backoff { + if let Err(e) = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + backoff) + .compat() + .await + { + error!("failed to backoff"; "err" => ?e); + } + if (task.is_cancelled)() { + return; + } + } let snap = match Self::get_snapshot(&mut task, cdc_handle).await { Ok(snap) => snap, Err(e) => { @@ -193,37 +206,36 @@ impl, E: KvEngine> ScannerPool { error!("failed to backoff"; "err" => ?e); } if (task.is_cancelled)() { - return Err(Error::Other("scan task cancelled".into())); + return Err(box_err!("scan task cancelled")); } } let (cb, fut) = tikv_util::future::paired_future_callback(); let change_cmd = ChangeObserver::from_rts(task.region.id, task.handle.clone()); - cdc_handle.capture_change( - task.region.id, - task.region.get_region_epoch().clone(), - change_cmd, - Callback::read(Box::new(cb)), - )?; + cdc_handle + .capture_change( + task.region.id, + task.region.get_region_epoch().clone(), + change_cmd, + Callback::read(Box::new(cb)), + ) + .map_err(|e| Error::Other(box_err!("{:?}", e)))?; let mut resp = box_try!(fut.await); if resp.response.get_header().has_error() { let err = resp.response.take_header().take_error(); // These two errors can't handled by retrying since the epoch and observe id is // unchanged if err.has_epoch_not_match() || err.get_message().contains("stale observe id") { - return Err(Error::request(err)); + return Err(box_err!("get snapshot failed: {:?}", err)); } last_err = Some(err) } else { return Ok(resp.snapshot.unwrap()); } } - Err(Error::Other( - format!( - "backoff timeout after {} try, last error: {:?}", - GET_SNAPSHOT_RETRY_TIME, - last_err.unwrap() - ) - .into(), + Err(box_err!( + "backoff timeout after {} try, last error: {:?}", + GET_SNAPSHOT_RETRY_TIME, + last_err.unwrap() )) } @@ -232,12 +244,14 @@ impl, E: KvEngine> ScannerPool { start: Option<&Key>, _checkpoint_ts: TimeStamp, ) -> Result<(Vec<(Key, Lock)>, bool)> { - let (locks, has_remaining) = reader.scan_locks( - start, - None, - |lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), - DEFAULT_SCAN_BATCH_SIZE, - )?; + let (locks, has_remaining) = reader + .scan_locks( + start, + None, + |lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), + DEFAULT_SCAN_BATCH_SIZE, + ) + .map_err(|e| Error::Other(box_err!("{:?}", e)))?; Ok((locks, has_remaining)) } @@ -245,7 +259,10 @@ impl, E: KvEngine> ScannerPool { let mut entries = Vec::with_capacity(DEFAULT_SCAN_BATCH_SIZE); let mut has_remaining = true; while entries.len() < entries.capacity() { - match scanner.next_entry()? { + match scanner + .next_entry() + .map_err(|e| Error::Other(box_err!("{:?}", e)))? + { Some(entry) => { entries.push(entry); } diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 7802108b92b1..634aa66c6014 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -2,11 +2,12 @@ #[path = "../mod.rs"] mod testsuite; -use std::time::Duration; +use std::{sync::mpsc::channel, time::Duration}; use futures::executor::block_on; use kvproto::{kvrpcpb::*, metapb::RegionEpoch}; use pd_client::PdClient; +use resolved_ts::Task; use tempfile::Builder; use test_raftstore::sleep_ms; use test_sst_importer::*; @@ -141,3 +142,92 @@ fn test_dynamic_change_advance_ts_interval() { suite.stop(); } + +#[test] +fn test_change_log_memory_quota_exceeded() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + suite.must_get_rts_ge( + region.id, + block_on(suite.cluster.pd_client.get_tso()).unwrap(), + ); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Resolved ts should not advance. + let (tx, rx) = channel(); + suite.must_schedule_task( + 1, + Task::GetDiagnosisInfo { + region_id: 1, + log_locks: false, + min_start_ts: u64::MAX, + callback: Box::new(move |res| { + tx.send(res).unwrap(); + }), + }, + ); + let res = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + assert_eq!(res.unwrap().1, 0, "{:?}", res); + + suite.stop(); +} + +#[test] +fn test_scan_log_memory_quota_exceeded() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + suite.must_get_rts_ge( + region.id, + block_on(suite.cluster.pd_client.get_tso()).unwrap(), + ); + + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + // Split region + suite.cluster.must_split(®ion, k); + + let r1 = suite.cluster.get_region(&[]); + let r2 = suite.cluster.get_region(k); + let current_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + // Wait for scan log. + sleep_ms(500); + // Resolved ts of region1 should be advanced + suite.must_get_rts_ge(r1.id, current_ts); + + // Resolved ts should not advance. + let (tx, rx) = channel(); + suite.must_schedule_task( + r2.id, + Task::GetDiagnosisInfo { + region_id: r2.id, + log_locks: false, + min_start_ts: u64::MAX, + callback: Box::new(move |res| { + tx.send(res).unwrap(); + }), + }, + ); + let res = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + assert_eq!(res.unwrap().1, 0, "{:?}", res); + + suite.stop(); +} diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 4e6226f89351..830e2156e9f5 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -122,8 +122,21 @@ impl TestSuite { ); c }; + self.must_schedule_task(store_id, Task::ChangeConfig { change }); + } + + pub fn must_change_memory_quota(&self, store_id: u64, bytes: u64) { + let change = { + let mut c = std::collections::HashMap::default(); + c.insert("memory_quota".to_owned(), ConfigValue::Size(bytes)); + c + }; + self.must_schedule_task(store_id, Task::ChangeConfig { change }); + } + + pub fn must_schedule_task(&self, store_id: u64, task: Task) { let scheduler = self.endpoints.get(&store_id).unwrap().scheduler(); - scheduler.schedule(Task::ChangeConfig { change }).unwrap(); + scheduler.schedule(task).unwrap(); } pub fn must_kv_prewrite( diff --git a/src/config/mod.rs b/src/config/mod.rs index 38369b3ee93a..d9b9263e9286 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2955,6 +2955,7 @@ pub struct ResolvedTsConfig { pub advance_ts_interval: ReadableDuration, #[online_config(skip)] pub scan_lock_pool_size: usize, + pub memory_quota: ReadableSize, } impl ResolvedTsConfig { @@ -2975,6 +2976,7 @@ impl Default for ResolvedTsConfig { enable: true, advance_ts_interval: ReadableDuration::secs(20), scan_lock_pool_size: 2, + memory_quota: ReadableSize::mb(256), } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8fdbaa00f253..87b1830e4f65 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -851,6 +851,7 @@ fn test_serde_custom_tikv_config() { enable: true, advance_ts_interval: ReadableDuration::secs(5), scan_lock_pool_size: 1, + memory_quota: ReadableSize::mb(1), }; value.causal_ts = CausalTsConfig { renew_interval: ReadableDuration::millis(100), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 053e7c459396..94f9ef1ecf12 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -705,6 +705,7 @@ sink-memory-quota = "7MB" enable = true advance-ts-interval = "5s" scan-lock-pool-size = 1 +memory-quota = "1MB" [split] detect-times = 10 From 517522b5e77b8e0aae667790b2961d88fb61a23b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 29 Aug 2023 18:57:37 +0800 Subject: [PATCH 013/203] raftstore-v2: support column family based write buffer manager (#15453) ref tikv/tikv#12842 support column family based write buffer manager Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +-- components/engine_traits/src/flush.rs | 5 ++ src/config/mod.rs | 49 ++++++++++++++++++-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_engine.rs | 53 ++++++++++++++++++++++ tests/integrations/config/mod.rs | 5 ++ tests/integrations/config/test-custom.toml | 1 + 7 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 tests/failpoints/cases/test_engine.rs diff --git a/Cargo.lock b/Cargo.lock index 3c44a639e384..162d1f3ae073 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index eebf0e7c32ae..d0f9f892f349 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -204,6 +204,11 @@ impl PersistenceListener { /// /// `smallest_seqno` should be the smallest seqno of the memtable. pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64) { + (|| { + fail_point!("on_memtable_sealed", |t| { + assert_eq!(t.unwrap().as_str(), cf); + }) + })(); // The correctness relies on the assumption that there will be only one // thread writting to the DB and increasing apply index. // Apply index will be set within DB lock, so it's correct even with manual diff --git a/src/config/mod.rs b/src/config/mod.rs index d9b9263e9286..2494e84dfbdc 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,6 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: u64 = ReadableSize::mb(32).0; /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -311,6 +312,7 @@ macro_rules! cf_config { #[online_config(skip)] pub compression_per_level: [DBCompressionType; 7], pub write_buffer_size: Option, + pub write_buffer_limit: Option, pub max_write_buffer_number: i32, #[online_config(skip)] pub min_write_buffer_number_to_merge: i32, @@ -668,6 +670,7 @@ macro_rules! build_cf_opt { pub struct CfResources { pub cache: Cache, pub compaction_thread_limiters: HashMap<&'static str, ConcurrentTaskLimiter>, + pub write_buffer_managers: HashMap<&'static str, Arc>, } cf_config!(DefaultCfConfig); @@ -734,6 +737,7 @@ impl Default for DefaultCfConfig { ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), + write_buffer_limit: None, } } } @@ -832,6 +836,9 @@ impl DefaultCfConfig { } } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_DEFAULT) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -906,6 +913,7 @@ impl Default for WriteCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -962,6 +970,9 @@ impl WriteCfConfig { .unwrap(); } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_WRITE) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -1028,6 +1039,7 @@ impl Default for LockCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -1062,6 +1074,9 @@ impl LockCfConfig { .unwrap(); } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_LOCK) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -1127,6 +1142,7 @@ impl Default for RaftCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -1385,9 +1401,12 @@ impl DbConfig { // strategy is consistent with single RocksDB. self.defaultcf.max_compactions.get_or_insert(1); self.writecf.max_compactions.get_or_insert(1); - if self.lockcf.write_buffer_size.is_none() { - self.lockcf.write_buffer_size = Some(ReadableSize::mb(4)); - } + self.lockcf + .write_buffer_size + .get_or_insert(ReadableSize::mb(4)); + self.lockcf + .write_buffer_limit + .get_or_insert(ReadableSize::mb(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT)); } } } @@ -1510,9 +1529,29 @@ impl DbConfig { ConcurrentTaskLimiter::new(CF_RAFT, n), ); } + let mut write_buffer_managers = HashMap::default(); + self.lockcf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_LOCK, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); + self.defaultcf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_DEFAULT, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); + self.writecf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_WRITE, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); CfResources { cache, compaction_thread_limiters, + write_buffer_managers, } } @@ -1556,6 +1595,9 @@ impl DbConfig { self.writecf.validate()?; self.raftcf.validate()?; self.titan.validate()?; + if self.raftcf.write_buffer_limit.is_some() { + return Err("raftcf does not support cf based write buffer manager".into()); + } if self.enable_unordered_write { if self.titan.enabled { return Err("RocksDB.unordered_write does not support Titan".into()); @@ -1660,6 +1702,7 @@ impl Default for RaftDefaultCfConfig { ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), + write_buffer_limit: None, } } } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 9c90211c073d..9baa04d0b4f1 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -10,6 +10,7 @@ mod test_coprocessor; mod test_disk_full; mod test_early_apply; mod test_encryption; +mod test_engine; mod test_gc_metrics; mod test_gc_worker; mod test_hibernate; diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs new file mode 100644 index 000000000000..93d1c96597b1 --- /dev/null +++ b/tests/failpoints/cases/test_engine.rs @@ -0,0 +1,53 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use tikv_util::config::ReadableSize; + +fn dummy_string(len: usize) -> String { + String::from_utf8(vec![0; len]).unwrap() +} + +#[test] +fn test_write_buffer_manager() { + use test_raftstore_v2::*; + let count = 1; + let mut cluster = new_node_cluster(0, count); + cluster.cfg.rocksdb.lockcf.write_buffer_limit = Some(ReadableSize::kb(10)); + cluster.cfg.rocksdb.defaultcf.write_buffer_limit = Some(ReadableSize::kb(10)); + cluster.cfg.rocksdb.write_buffer_limit = Some(ReadableSize::kb(30)); + + // Let write buffer size small to make memtable request fewer memories. + // Otherwise, one single memory request can exceeds the write buffer limit set + // above. + cluster.cfg.rocksdb.lockcf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.cfg.rocksdb.writecf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.cfg.rocksdb.defaultcf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.run(); + + let dummy = dummy_string(500); + let fp = "on_memtable_sealed"; + fail::cfg(fp, "return(lock)").unwrap(); + + for i in 0..10 { + let key = format!("key-{:03}", i); + for cf in &[CF_WRITE, CF_LOCK] { + cluster.must_put_cf(cf, key.as_bytes(), dummy.as_bytes()); + } + } + + fail::cfg(fp, "return(default)").unwrap(); + + for i in 0..10 { + let key = format!("key-{:03}", i); + for cf in &[CF_WRITE, CF_DEFAULT] { + cluster.must_put_cf(cf, key.as_bytes(), dummy.as_bytes()); + } + } + + fail::cfg(fp, "return(write)").unwrap(); + let dummy = dummy_string(1000); + for i in 0..10 { + let key = format!("key-{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), dummy.as_bytes()); + } +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 87b1830e4f65..d3091e30eeda 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -388,6 +388,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), @@ -461,6 +462,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), @@ -534,6 +536,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: Some(ReadableSize::mb(16)), }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), @@ -607,6 +610,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, titan: titan_db_config.clone(), }; @@ -695,6 +699,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, + write_buffer_limit: None, }, titan: titan_db_config, }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 94f9ef1ecf12..653c3d2daef7 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -435,6 +435,7 @@ compression-per-level = [ "lz4", ] write-buffer-size = "1MB" +write-buffer-limit = "16MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" From 4b3e33e6c208e445388c43a99a5707d03421f7bd Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 30 Aug 2023 10:21:37 +0800 Subject: [PATCH 014/203] pd_client: add backoff for the reconnect retries (#15429) ref tikv/pd#6556, close tikv/tikv#15428 pc_client: add store-level backoff for the reconnect retries Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client_v2.rs | 14 ++-- components/pd_client/src/metrics.rs | 27 +++++-- components/pd_client/src/util.rs | 100 +++++++++++++++++++------- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 5b0d563f2b8a..97b2702fc39d 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -117,7 +117,7 @@ impl RawClient { /// Returns Ok(true) when a new connection is established. async fn maybe_reconnect(&mut self, ctx: &ConnectContext, force: bool) -> Result { - PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); + PD_RECONNECT_COUNTER_VEC.try_connect.inc(); let start = Instant::now(); let members = self.members.clone(); @@ -135,21 +135,15 @@ impl RawClient { .await { Err(e) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["failure"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.failure.inc(); return Err(e); } Ok(None) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["no-need"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.no_need.inc(); return Ok(false); } Ok(Some(tuple)) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["success"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.success.inc(); tuple } }; diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index d92e334396a1..4e185658f156 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -2,7 +2,7 @@ use lazy_static::lazy_static; use prometheus::*; -use prometheus_static_metric::{make_static_metric, register_static_histogram_vec}; +use prometheus_static_metric::*; make_static_metric! { pub label_enum PDRequestEventType { @@ -40,9 +40,20 @@ make_static_metric! { meta_storage_watch, } + pub label_enum PDReconnectEventKind { + success, + failure, + no_need, + cancel, + try_connect, + } + pub struct PDRequestEventHistogramVec: Histogram { "type" => PDRequestEventType, } + pub struct PDReconnectEventCounterVec: IntCounter { + "type" => PDReconnectEventKind, + } } lazy_static! { @@ -66,12 +77,14 @@ lazy_static! { &["type"] ) .unwrap(); - pub static ref PD_RECONNECT_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( - "tikv_pd_reconnect_total", - "Total number of PD reconnections.", - &["type"] - ) - .unwrap(); + pub static ref PD_RECONNECT_COUNTER_VEC: PDReconnectEventCounterVec = + register_static_int_counter_vec!( + PDReconnectEventCounterVec, + "tikv_pd_reconnect_total", + "Total number of PD reconnections.", + &["type"] + ) + .unwrap(); pub static ref PD_PENDING_HEARTBEAT_GAUGE: IntGauge = register_int_gauge!( "tikv_pd_pending_heartbeat_total", "Total number of pending region heartbeat" diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 5491a51c0471..66b084d4998b 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -50,6 +50,7 @@ const MAX_RETRY_TIMES: u64 = 5; // The max duration when retrying to connect to leader. No matter if the // MAX_RETRY_TIMES is reached. const MAX_RETRY_DURATION: Duration = Duration::from_secs(10); +const MAX_BACKOFF: Duration = Duration::from_secs(3); // FIXME: Use a request-independent way to handle reconnection. pub const REQUEST_RECONNECT_INTERVAL: Duration = Duration::from_secs(1); // 1s @@ -116,6 +117,7 @@ pub struct Inner { pub rg_resp: Option>, last_try_reconnect: Instant, + bo: ExponentialBackoff, } impl Inner { @@ -168,7 +170,6 @@ pub struct Client { pub(crate) inner: RwLock, pub feature_gate: FeatureGate, enable_forwarding: bool, - retry_interval: Duration, } impl Client { @@ -219,6 +220,7 @@ impl Client { pending_heartbeat: Arc::default(), pending_buckets: Arc::default(), last_try_reconnect: Instant::now(), + bo: ExponentialBackoff::new(retry_interval), tso, meta_storage, rg_sender: Either::Left(Some(rg_sender)), @@ -226,7 +228,6 @@ impl Client { }), feature_gate: FeatureGate::default(), enable_forwarding, - retry_interval, } } @@ -363,17 +364,15 @@ impl Client { /// Note: Retrying too quickly will return an error due to cancellation. /// Please always try to reconnect after sending the request first. pub async fn reconnect(&self, force: bool) -> Result<()> { - PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); + PD_RECONNECT_COUNTER_VEC.try_connect.inc(); let start = Instant::now(); let future = { let inner = self.inner.rl(); - if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { + if start.saturating_duration_since(inner.last_try_reconnect) < inner.bo.get_interval() { // Avoid unnecessary updating. // Prevent a large number of reconnections in a short time. - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["cancel"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.cancel.inc(); return Err(box_err!("cancel reconnection due to too small interval")); } let connector = PdConnector::new(inner.env.clone(), inner.security_mgr.clone()); @@ -394,36 +393,38 @@ impl Client { { let mut inner = self.inner.wl(); - if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { + if start.saturating_duration_since(inner.last_try_reconnect) < inner.bo.get_interval() { // There may be multiple reconnections that pass the read lock at the same time. // Check again in the write lock to avoid unnecessary updating. - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["cancel"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.cancel.inc(); return Err(box_err!("cancel reconnection due to too small interval")); } inner.last_try_reconnect = start; + inner.bo.next_backoff(); } slow_log!(start.saturating_elapsed(), "try reconnect pd"); let (client, target_info, members, tso) = match future.await { Err(e) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["failure"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.failure.inc(); return Err(e); } - Ok(None) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["no-need"]) - .inc(); - return Ok(()); - } - Ok(Some(tuple)) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["success"]) - .inc(); - tuple + Ok(res) => { + // Reset the retry count. + { + let mut inner = self.inner.wl(); + inner.bo.reset() + } + match res { + None => { + PD_RECONNECT_COUNTER_VEC.no_need.inc(); + return Ok(()); + } + Some(tuple) => { + PD_RECONNECT_COUNTER_VEC.success.inc(); + tuple + } + } } }; @@ -900,6 +901,33 @@ impl PdConnector { } } +/// Simple backoff strategy. +struct ExponentialBackoff { + base: Duration, + interval: Duration, +} + +impl ExponentialBackoff { + pub fn new(base: Duration) -> Self { + Self { + base, + interval: base, + } + } + pub fn next_backoff(&mut self) -> Duration { + self.interval = std::cmp::min(self.interval * 2, MAX_BACKOFF); + self.interval + } + + pub fn get_interval(&self) -> Duration { + self.interval + } + + pub fn reset(&mut self) { + self.interval = self.base; + } +} + pub fn trim_http_prefix(s: &str) -> &str { s.trim_start_matches("http://") .trim_start_matches("https://") @@ -1045,8 +1073,11 @@ pub fn merge_bucket_stats, I: AsRef<[u8]>>( mod test { use kvproto::metapb::BucketStats; + use super::*; use crate::{merge_bucket_stats, util::find_bucket_index}; + const BASE_BACKOFF: Duration = Duration::from_millis(100); + #[test] fn test_merge_bucket_stats() { #[allow(clippy::type_complexity)] @@ -1162,4 +1193,23 @@ mod test { assert_eq!(find_bucket_index(b"k7", &keys), Some(4)); assert_eq!(find_bucket_index(b"k8", &keys), Some(4)); } + + #[test] + fn test_exponential_backoff() { + let mut backoff = ExponentialBackoff::new(BASE_BACKOFF); + assert_eq!(backoff.get_interval(), BASE_BACKOFF); + + assert_eq!(backoff.next_backoff(), 2 * BASE_BACKOFF); + assert_eq!(backoff.next_backoff(), Duration::from_millis(400)); + assert_eq!(backoff.get_interval(), Duration::from_millis(400)); + + // Should not exceed MAX_BACKOFF + for _ in 0..20 { + backoff.next_backoff(); + } + assert_eq!(backoff.get_interval(), MAX_BACKOFF); + + backoff.reset(); + assert_eq!(backoff.get_interval(), BASE_BACKOFF); + } } From 0bb270621f6d561560156c38cc21240ceae97c00 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 30 Aug 2023 14:44:08 +0800 Subject: [PATCH 015/203] coprocessor: skip transient read request (#15406) close tikv/tikv#15405 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- src/coprocessor/metrics.rs | 6 ++ src/coprocessor/tracker.rs | 175 +++++++++++++++++++++---------------- 2 files changed, 104 insertions(+), 77 deletions(-) diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 64905b3dfba9..02f45d353115 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -208,6 +208,12 @@ impl CopLocalMetrics { pub fn local_read_stats(&self) -> &ReadStats { &self.local_read_stats } + + #[cfg(test)] + pub fn clear(&mut self) { + self.local_read_stats.region_infos.clear(); + self.local_read_stats.region_buckets.clear(); + } } thread_local! { diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 18eaa0b6e988..71d84388c3b7 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -350,20 +350,24 @@ impl Tracker { false }; - tls_collect_query( - region_id, - peer, - start_key.as_encoded(), - end_key.as_encoded(), - reverse_scan, - ); - tls_collect_read_flow( - self.req_ctx.context.get_region_id(), - Some(start_key.as_encoded()), - Some(end_key.as_encoded()), - &total_storage_stats, - self.buckets.as_ref(), - ); + // only collect metrics for select and index, exclude transient read flow such + // like analyze and checksum. + if self.req_ctx.tag == ReqTag::select || self.req_ctx.tag == ReqTag::index { + tls_collect_query( + region_id, + peer, + start_key.as_encoded(), + end_key.as_encoded(), + reverse_scan, + ); + tls_collect_read_flow( + self.req_ctx.context.get_region_id(), + Some(start_key.as_encoded()), + Some(end_key.as_encoded()), + &total_storage_stats, + self.buckets.as_ref(), + ); + } self.current_stage = TrackerState::Tracked; } @@ -443,69 +447,86 @@ mod tests { #[test] fn test_track() { - let mut context = kvrpcpb::Context::default(); - context.set_region_id(1); - - let mut req_ctx = ReqContext::new( - ReqTag::test, - context, - vec![], - Duration::from_secs(0), - None, - None, - TimeStamp::max(), - None, - PerfLevel::EnableCount, - ); - req_ctx.lower_bound = vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 67, - ]; - req_ctx.upper_bound = vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 167, - ]; - let mut track: Tracker = Tracker::new(req_ctx, Duration::default()); - let mut bucket = BucketMeta::default(); - bucket.region_id = 1; - bucket.version = 1; - bucket.keys = vec![ - vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 255, 179, 95, 114, 128, 0, 0, 0, 0, 255, 0, 175, 155, - 0, 0, 0, 0, 0, 250, - ], - vec![ - 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, - ], - ]; - bucket.sizes = vec![10]; - track.buckets = Some(Arc::new(bucket)); - - let mut stat = Statistics::default(); - stat.write.flow_stats.read_keys = 10; - track.total_storage_stats = stat; - - track.track(); - drop(track); - TLS_COP_METRICS.with(|m| { - assert_eq!( - 10, - m.borrow() - .local_read_stats() - .region_infos - .get(&1) - .unwrap() - .flow - .read_keys - ); - assert_eq!( - vec![10], - m.borrow() - .local_read_stats() - .region_buckets - .get(&1) - .unwrap() - .stats - .read_keys + let check = move |tag: ReqTag, flow: u64| { + let mut context = kvrpcpb::Context::default(); + context.set_region_id(1); + let mut req_ctx = ReqContext::new( + tag, + context, + vec![], + Duration::from_secs(0), + None, + None, + TimeStamp::max(), + None, + PerfLevel::EnableCount, ); - }); + + req_ctx.lower_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 67, + ]; + req_ctx.upper_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 167, + ]; + let mut track: Tracker = Tracker::new(req_ctx, Duration::default()); + let mut bucket = BucketMeta::default(); + bucket.region_id = 1; + bucket.version = 1; + bucket.keys = vec![ + vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 255, 179, 95, 114, 128, 0, 0, 0, 0, 255, 0, 175, + 155, 0, 0, 0, 0, 0, 250, + ], + vec![ + 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, + ], + ]; + bucket.sizes = vec![10]; + track.buckets = Some(Arc::new(bucket)); + + let mut stat = Statistics::default(); + stat.write.flow_stats.read_keys = 10; + track.total_storage_stats = stat; + + track.track(); + drop(track); + TLS_COP_METRICS.with(|m| { + if flow > 0 { + assert_eq!( + flow as usize, + m.borrow() + .local_read_stats() + .region_infos + .get(&1) + .unwrap() + .flow + .read_keys + ); + assert_eq!( + flow, + m.borrow() + .local_read_stats() + .region_buckets + .get(&1) + .unwrap() + .stats + .read_keys[0] + ); + } else { + assert!(m.borrow().local_read_stats().region_infos.get(&1).is_none()); + assert!( + m.borrow() + .local_read_stats() + .region_buckets + .get(&1) + .is_none() + ); + } + + m.borrow_mut().clear(); + }); + }; + check(ReqTag::select, 10); + check(ReqTag::analyze_full_sampling, 0); } } From fb9a40d20dcfb9ceb7cecba9d471fa8575c05913 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 30 Aug 2023 15:18:38 +0800 Subject: [PATCH 016/203] raftstore-v2: init persisted_tablet_index on startup (#15441) ref tikv/tikv#12842 - Initialize `persisted_apply_index` on startup. Signed-off-by: tabokie --- .../raftstore-v2/src/operation/command/admin/compact_log.rs | 4 ++-- components/raftstore-v2/src/operation/life.rs | 4 ++++ components/raftstore-v2/src/raft/peer.rs | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 93876475f5f6..d054234b46fd 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -63,14 +63,14 @@ pub struct CompactLogContext { } impl CompactLogContext { - pub fn new(last_applying_index: u64) -> CompactLogContext { + pub fn new(last_applying_index: u64, persisted_applied: u64) -> CompactLogContext { CompactLogContext { skipped_ticks: 0, approximate_log_size: 0, last_applying_index, last_compacted_idx: 0, tombstone_tablets_wait_index: vec![], - persisted_tablet_index: AtomicU64::new(0).into(), + persisted_tablet_index: AtomicU64::new(persisted_applied).into(), } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index e0e7f63785d2..8fe1d2a07b3d 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -797,11 +797,15 @@ impl Peer { if self.has_pending_tombstone_tablets() { let applied_index = self.entry_storage().applied_index(); let last_index = self.entry_storage().last_index(); + let persisted = self + .remember_persisted_tablet_index() + .load(std::sync::atomic::Ordering::Relaxed); info!( self.logger, "postpone destroy because there're pending tombstone tablets"; "applied_index" => applied_index, "last_index" => last_index, + "persisted_applied" => persisted, ); return true; } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 2f3a3376fe9d..c3a80e3756c2 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -158,6 +158,7 @@ impl Peer { let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); let merge_context = MergeContext::from_region_state(&logger, storage.region_state()); + let persisted_applied = storage.apply_trace().persisted_apply_index(); let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); @@ -184,7 +185,7 @@ impl Peer { self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), - compact_log_context: CompactLogContext::new(applied_index), + compact_log_context: CompactLogContext::new(applied_index, persisted_applied), merge_context: merge_context.map(|c| Box::new(c)), last_sent_snapshot_index: 0, raw_write_encoder: None, From 69b8ac5717119290ba721fae61edb894440a80fc Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 30 Aug 2023 17:30:39 +0800 Subject: [PATCH 017/203] raftstore-v2: consider unmatch between region range and tablet range for mvcc scan (#15455) ref tikv/tikv#14654 consider unmatch between region range and tablet range for mvcc scan --- components/engine_rocks/src/util.rs | 8 + src/server/debug2.rs | 240 ++++++------------------ tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_debugger.rs | 147 +++++++++++++++ 4 files changed, 216 insertions(+), 180 deletions(-) create mode 100644 tests/failpoints/cases/test_debugger.rs diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 225cd1d7f063..e4991419eed9 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -3,6 +3,7 @@ use std::{ffi::CString, fs, path::Path, str::FromStr, sync::Arc}; use engine_traits::{Engines, Range, Result, CF_DEFAULT}; +use fail::fail_point; use rocksdb::{ load_latest_options, CColumnFamilyDescriptor, CFHandle, ColumnFamilyOptions, CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, @@ -462,6 +463,13 @@ pub struct RangeCompactionFilterFactory(Arc); impl RangeCompactionFilterFactory { pub fn new(start_key: Box<[u8]>, end_key: Box<[u8]>) -> Self { + fail_point!("unlimited_range_compaction_filter", |_| { + let range = OwnedRange { + start_key: keys::data_key(b"").into_boxed_slice(), + end_key: keys::data_end_key(b"").into_boxed_slice(), + }; + Self(Arc::new(range)) + }); let range = OwnedRange { start_key, end_key }; Self(Arc::new(range)) } diff --git a/src/server/debug2.rs b/src/server/debug2.rs index e914b3537604..cf17aea81ebd 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -10,7 +10,7 @@ use engine_traits::{ TabletRegistry, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use futures::future::Future; -use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; +use keys::{data_key, enc_end_key, enc_start_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ debugpb::Db as DbType, kvrpcpb::MvccInfo, @@ -36,6 +36,34 @@ use crate::{ storage::mvcc::{MvccInfoCollector, MvccInfoScanner}, }; +// `key1` and `key2` should both be start_key or end_key. +fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], is_end_key: bool) -> &'a [u8] { + if is_end_key && key1.is_empty() { + return key2; + } + if is_end_key && key2.is_empty() { + return key1; + } + if key1 < key2 { + return key1; + } + key2 +} + +// `key1` and `key2` should both be start_key or end_key. +fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], is_end_key: bool) -> &'a [u8] { + if is_end_key && key1.is_empty() { + return key1; + } + if is_end_key && key2.is_empty() { + return key2; + } + if key1 < key2 { + return key2; + } + key1 +} + // return the region containing the seek_key or the next region if not existed fn seek_region( seek_key: &[u8], @@ -98,11 +126,16 @@ impl MvccInfoIteratorV2 { )?; let tablet = tablet_cache.latest().unwrap(); + let region_start_key = enc_start_key(first_region_state.get_region()); + let region_end_key = enc_end_key(first_region_state.get_region()); + let iter_start = larger_key(start, ®ion_start_key, false); + let iter_end = smaller_key(end, ®ion_end_key, true); + assert!(!iter_start.is_empty() && !iter_start.is_empty()); let scanner = Some( MvccInfoScanner::new( |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), - if start.is_empty() { None } else { Some(start) }, - if end.is_empty() { None } else { Some(end) }, + Some(iter_start), + Some(iter_end), MvccInfoCollector::default(), ) .map_err(|e| -> Error { box_err!(e) })?, @@ -171,19 +204,16 @@ impl Iterator for MvccInfoIteratorV2 { ) .unwrap(); let tablet = tablet_cache.latest().unwrap(); + let region_start_key = enc_start_key(&self.cur_region); + let region_end_key = enc_end_key(&self.cur_region); + let iter_start = larger_key(&self.start, ®ion_start_key, false); + let iter_end = smaller_key(&self.end, ®ion_end_key, true); + assert!(!iter_start.is_empty() && !iter_start.is_empty()); self.scanner = Some( MvccInfoScanner::new( |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), - if self.start.is_empty() { - None - } else { - Some(self.start.as_bytes()) - }, - if self.end.is_empty() { - None - } else { - Some(self.end.as_bytes()) - }, + Some(iter_start), + Some(iter_end), MvccInfoCollector::default(), ) .unwrap(), @@ -1154,38 +1184,28 @@ fn deivde_regions_for_concurrency( Ok(regions_groups) } -// `key1` and `key2` should both be start_key or end_key. -fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { - if end_key && key1.is_empty() { - return key2; - } - if end_key && key2.is_empty() { - return key1; - } - if key1 < key2 { - return key1; - } - key2 -} +#[cfg(any(test, feature = "testexport"))] +pub fn new_debugger(path: &std::path::Path) -> DebuggerImplV2 { + use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; -// `key1` and `key2` should both be start_key or end_key. -fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { - if end_key && key1.is_empty() { - return key1; - } - if end_key && key2.is_empty() { - return key2; - } - if key1 < key2 { - return key2; - } - key1 + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + let cache = cfg.storage.block_cache.build_shared_cache(); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = + raft_log_engine::RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + + DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) } #[cfg(test)] mod tests { - use std::path::Path; - use collections::HashMap; use engine_traits::{ RaftEngineReadOnly, RaftLogBatch, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, @@ -1196,36 +1216,13 @@ mod tests { raft_serverpb::*, }; use raft::prelude::EntryType; - use raft_log_engine::RaftLogEngine; use raftstore::store::RAFT_INIT_LOG_INDEX; use tikv_util::store::new_peer; use super::*; - use crate::{ - config::TikvConfig, - server::KvEngineFactoryBuilder, - storage::{txn::tests::must_prewrite_put, TestEngineBuilder}, - }; - const INITIAL_TABLET_INDEX: u64 = 5; const INITIAL_APPLY_INDEX: u64 = 5; - fn new_debugger(path: &Path) -> DebuggerImplV2 { - let mut cfg = TikvConfig::default(); - cfg.storage.data_dir = path.to_str().unwrap().to_string(); - cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); - cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); - let cache = cfg.storage.block_cache.build_shared_cache(); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); - let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); - - let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); - - DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) - } - impl DebuggerImplV2 { fn set_store_id(&self, store_id: u64) { let mut ident = self.get_store_ident().unwrap_or_default(); @@ -1458,123 +1455,6 @@ mod tests { debugger.region_size(region_id, cfs.clone()).unwrap_err(); } - // For simplicity, the format of the key is inline with data in - // prepare_data_on_disk - fn extract_key(key: &[u8]) -> &[u8] { - &key[1..4] - } - - // Prepare some data - // Data for each region: - // Region 1: k00 .. k04 - // Region 2: k05 .. k09 - // Region 3: k10 .. k14 - // Region 4: k15 .. k19 - // Region 5: k20 .. k24 - // Region 6: k26 .. k28 - fn prepare_data_on_disk(path: &Path) { - let mut cfg = TikvConfig::default(); - cfg.storage.data_dir = path.to_str().unwrap().to_string(); - cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); - cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); - cfg.gc.enable_compaction_filter = false; - let cache = cfg.storage.block_cache.build_shared_cache(); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); - let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); - - let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); - let mut wb = raft_engine.log_batch(5); - for i in 0..6 { - let mut region = metapb::Region::default(); - let start_key = format!("k{:02}", i * 5); - let end_key = format!("k{:02}", (i + 1) * 5); - region.set_id(i + 1); - region.set_start_key(start_key.into_bytes()); - region.set_end_key(end_key.into_bytes()); - let mut region_state = RegionLocalState::default(); - region_state.set_tablet_index(INITIAL_TABLET_INDEX); - if region.get_id() == 4 { - region_state.set_state(PeerState::Tombstone); - } else if region.get_id() == 6 { - region.set_start_key(b"k26".to_vec()); - region.set_end_key(b"k28".to_vec()); - } - region_state.set_region(region); - - let tablet_path = reg.tablet_path(i + 1, INITIAL_TABLET_INDEX); - // Use tikv_kv::RocksEngine instead of loading tablet from registry in order to - // use prewrite method to prepare mvcc data - let mut engine = TestEngineBuilder::new().path(tablet_path).build().unwrap(); - for i in i * 5..(i + 1) * 5 { - let key = format!("zk{:02}", i); - let val = format!("val{:02}", i); - // Use prewrite only is enough for preparing mvcc data - must_prewrite_put( - &mut engine, - key.as_bytes(), - val.as_bytes(), - key.as_bytes(), - 10, - ); - } - - wb.put_region_state(i + 1, INITIAL_APPLY_INDEX, ®ion_state) - .unwrap(); - } - raft_engine.consume(&mut wb, true).unwrap(); - } - - #[test] - fn test_scan_mvcc() { - let dir = test_util::temp_dir("test-debugger", false); - prepare_data_on_disk(dir.path()); - let debugger = new_debugger(dir.path()); - // Test scan with bad start, end or limit. - assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); - assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); - - let verify_scanner = - |range, scanner: &mut dyn Iterator, MvccInfo)>>| { - for i in range { - let key = format!("k{:02}", i).into_bytes(); - assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); - } - }; - - // full scann - let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); - verify_scanner(0..15, &mut scanner); - verify_scanner(20..25, &mut scanner); - verify_scanner(26..28, &mut scanner); - assert!(scanner.next().is_none()); - - // Range has more elements than limit - let mut scanner = debugger.scan_mvcc(b"zk01", b"zk09", 5).unwrap(); - verify_scanner(1..6, &mut scanner); - assert!(scanner.next().is_none()); - - // Range has less elements than limit - let mut scanner = debugger.scan_mvcc(b"zk07", b"zk10", 10).unwrap(); - verify_scanner(7..10, &mut scanner); - assert!(scanner.next().is_none()); - - // Start from the key where no region contains it - let mut scanner = debugger.scan_mvcc(b"zk16", b"", 100).unwrap(); - verify_scanner(20..25, &mut scanner); - verify_scanner(26..28, &mut scanner); - assert!(scanner.next().is_none()); - - // Scan a range not existed in the cluster - let mut scanner = debugger.scan_mvcc(b"zk16", b"zk19", 100).unwrap(); - assert!(scanner.next().is_none()); - - // The end key is less than the start_key of the first region - let mut scanner = debugger.scan_mvcc(b"", b"zj", 100).unwrap(); - assert!(scanner.next().is_none()); - } - #[test] fn test_compact() { let dir = test_util::temp_dir("test-debugger", false); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 9baa04d0b4f1..a9dbd36a81a6 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -7,6 +7,7 @@ mod test_bootstrap; mod test_cmd_epoch_checker; mod test_conf_change; mod test_coprocessor; +mod test_debugger; mod test_disk_full; mod test_early_apply; mod test_encryption; diff --git a/tests/failpoints/cases/test_debugger.rs b/tests/failpoints/cases/test_debugger.rs new file mode 100644 index 000000000000..f70ebcb6d32f --- /dev/null +++ b/tests/failpoints/cases/test_debugger.rs @@ -0,0 +1,147 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::path::Path; + +use engine_traits::{RaftEngine, RaftLogBatch, TabletRegistry}; +use kvproto::{ + kvrpcpb::MvccInfo, + metapb, + raft_serverpb::{PeerState, RegionLocalState}, +}; +use raft_log_engine::RaftLogEngine; +use test_raftstore::new_peer; +use tikv::{ + config::TikvConfig, + server::{debug::Debugger, debug2::new_debugger, KvEngineFactoryBuilder}, + storage::{txn::tests::must_prewrite_put, TestEngineBuilder}, +}; + +const INITIAL_TABLET_INDEX: u64 = 5; +const INITIAL_APPLY_INDEX: u64 = 5; + +// Prepare some data +// Region meta range and rocksdb range of each region: +// Region 1: k01 .. k04 rocksdb: zk00 .. zk04 +// Region 2: k05 .. k09 rocksdb: zk05 .. zk09 +// Region 3: k10 .. k14 rocksdb: zk10 .. zk14 +// Region 4: k15 .. k19 rocksdb: zk15 .. zk19 +// Region 5: k20 .. k24 rocksdb: zk20 .. zk24 +// Region 6: k26 .. k27 rocksdb: zk25 .. zk29 +fn prepare_data_on_disk(path: &Path) { + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + cfg.gc.enable_compaction_filter = false; + let cache = cfg.storage.block_cache.build_shared_cache(); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + let mut wb = raft_engine.log_batch(5); + for i in 0..6 { + let mut region = metapb::Region::default(); + let start_key = if i != 0 { + format!("k{:02}", i * 5) + } else { + String::from("k01") + }; + let end_key = format!("k{:02}", (i + 1) * 5); + region.set_id(i + 1); + region.set_start_key(start_key.into_bytes()); + region.set_end_key(end_key.into_bytes()); + let mut region_state = RegionLocalState::default(); + region_state.set_tablet_index(INITIAL_TABLET_INDEX); + if region.get_id() == 4 { + region_state.set_state(PeerState::Tombstone); + } else if region.get_id() == 6 { + region.set_start_key(b"k26".to_vec()); + region.set_end_key(b"k28".to_vec()); + } + // add dummy peer to pass verification + region.mut_peers().push(new_peer(0, 0)); + region_state.set_region(region); + + let tablet_path = reg.tablet_path(i + 1, INITIAL_TABLET_INDEX); + // Use tikv_kv::RocksEngine instead of loading tablet from registry in order to + // use prewrite method to prepare mvcc data + let mut engine = TestEngineBuilder::new().path(tablet_path).build().unwrap(); + for i in i * 5..(i + 1) * 5 { + let key = format!("zk{:02}", i); + let val = format!("val{:02}", i); + // Use prewrite only is enough for preparing mvcc data + must_prewrite_put( + &mut engine, + key.as_bytes(), + val.as_bytes(), + key.as_bytes(), + 10, + ); + } + + wb.put_region_state(i + 1, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + } + raft_engine.consume(&mut wb, true).unwrap(); +} + +// For simplicity, the format of the key is inline with data in +// prepare_data_on_disk +fn extract_key(key: &[u8]) -> &[u8] { + &key[1..4] +} + +#[test] +fn test_scan_mvcc() { + // We deliberately make region meta not match with rocksdb, set unlimited range + // compaction filter to avoid trim operation. + fail::cfg("unlimited_range_compaction_filter", "return").unwrap(); + + let dir = test_util::temp_dir("test-debugger", false); + prepare_data_on_disk(dir.path()); + let debugger = new_debugger(dir.path()); + // Test scan with bad start, end or limit. + assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); + assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); + + let verify_scanner = + |range, scanner: &mut dyn Iterator, MvccInfo)>>| { + for i in range { + let key = format!("k{:02}", i).into_bytes(); + assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); + } + }; + + // full scan + let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); + verify_scanner(1..15, &mut scanner); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has more elements than limit + let mut scanner = debugger.scan_mvcc(b"zk01", b"zk09", 5).unwrap(); + verify_scanner(1..6, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has less elements than limit + let mut scanner = debugger.scan_mvcc(b"zk07", b"zk10", 10).unwrap(); + verify_scanner(7..10, &mut scanner); + assert!(scanner.next().is_none()); + + // Start from the key where no region contains it + let mut scanner = debugger.scan_mvcc(b"zk16", b"", 100).unwrap(); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Scan a range not existed in the cluster + let mut scanner = debugger.scan_mvcc(b"zk16", b"zk19", 100).unwrap(); + assert!(scanner.next().is_none()); + + // The end key is less than the start_key of the first region + let mut scanner = debugger.scan_mvcc(b"", b"zj", 100).unwrap(); + assert!(scanner.next().is_none()); +} From 1669a72fac8176cc7a2be7fe10f43f1657d4c21f Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 30 Aug 2023 17:45:40 +0800 Subject: [PATCH 018/203] txn: add logs for assertion failure (#12305) close tikv/tikv#12304 Add logs for assertion failure Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/txn/actions/prewrite.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 90f739b87050..64e22a13585a 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -239,6 +239,7 @@ impl LockStatus { } /// A single mutation to be prewritten. +#[derive(Debug)] struct PrewriteMutation<'a> { key: Key, value: Option, @@ -677,6 +678,12 @@ impl<'a> PrewriteMutation<'a> { if self.skip_constraint_check() { self.check_for_newer_version(reader)?; } + let (write, commit_ts) = write + .as_ref() + .map(|(w, ts)| (Some(w), Some(ts))) + .unwrap_or((None, None)); + error!("assertion failure"; "assertion" => ?self.assertion, "write" => ?write, + "commit_ts" => commit_ts, "mutation" => ?self); assertion_err?; } From b507aad3be0eaa6c96033ef7300605bda833bf54 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 31 Aug 2023 16:05:09 +0800 Subject: [PATCH 019/203] config: make split config can update (#15473) close tikv/tikv#15403 1. split config support to update dynamic. In past, the `optimize_for` function will set the config immutable. Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore/src/store/worker/pd.rs | 2 +- .../src/store/worker/split_config.rs | 58 ++++++++++++++----- .../src/store/worker/split_controller.rs | 45 +++++++------- src/config/mod.rs | 12 ++-- tests/integrations/config/mod.rs | 6 +- tests/integrations/raftstore/test_stats.rs | 2 +- 6 files changed, 81 insertions(+), 44 deletions(-) diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index d812830569ac..e8c8e2f575b1 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -658,7 +658,7 @@ where // Register the region CPU records collector. if auto_split_controller .cfg - .region_cpu_overload_threshold_ratio + .region_cpu_overload_threshold_ratio() > 0.0 { region_cpu_records_collector = diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 8fec853bb007..2d29bd21a891 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -68,18 +68,18 @@ pub fn get_sample_num() -> usize { #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct SplitConfig { - pub qps_threshold: usize, + pub qps_threshold: Option, pub split_balance_score: f64, pub split_contained_score: f64, pub detect_times: u64, pub sample_num: usize, pub sample_threshold: u64, - pub byte_threshold: usize, + pub byte_threshold: Option, #[doc(hidden)] pub grpc_thread_cpu_overload_threshold_ratio: f64, #[doc(hidden)] pub unified_read_pool_thread_cpu_overload_threshold_ratio: f64, - pub region_cpu_overload_threshold_ratio: f64, + pub region_cpu_overload_threshold_ratio: Option, // deprecated. #[online_config(skip)] #[doc(hidden)] @@ -95,18 +95,18 @@ pub struct SplitConfig { impl Default for SplitConfig { fn default() -> SplitConfig { SplitConfig { - qps_threshold: DEFAULT_QPS_THRESHOLD, + qps_threshold: None, split_balance_score: DEFAULT_SPLIT_BALANCE_SCORE, split_contained_score: DEFAULT_SPLIT_CONTAINED_SCORE, detect_times: DEFAULT_DETECT_TIMES, sample_num: DEFAULT_SAMPLE_NUM, sample_threshold: DEFAULT_SAMPLE_THRESHOLD, - byte_threshold: DEFAULT_BYTE_THRESHOLD, + byte_threshold: None, grpc_thread_cpu_overload_threshold_ratio: DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, unified_read_pool_thread_cpu_overload_threshold_ratio: DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, - region_cpu_overload_threshold_ratio: REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + region_cpu_overload_threshold_ratio: None, size_threshold: None, // deprecated. key_threshold: None, // deprecated. } @@ -124,7 +124,7 @@ impl SplitConfig { ("split_balance_score or split_contained_score should be between 0 and 1.").into(), ); } - if self.sample_num >= self.qps_threshold { + if self.sample_num >= self.qps_threshold() { return Err( ("sample_num should be less than qps_threshold for load-base-split.").into(), ); @@ -133,20 +133,52 @@ impl SplitConfig { || self.grpc_thread_cpu_overload_threshold_ratio < 0.0 || self.unified_read_pool_thread_cpu_overload_threshold_ratio > 1.0 || self.unified_read_pool_thread_cpu_overload_threshold_ratio < 0.0 - || self.region_cpu_overload_threshold_ratio > 1.0 - || self.region_cpu_overload_threshold_ratio < 0.0 + || self.region_cpu_overload_threshold_ratio() > 1.0 + || self.region_cpu_overload_threshold_ratio() < 0.0 { return Err(("threshold ratio should be between 0 and 1.").into()); } Ok(()) } + pub fn qps_threshold(&self) -> usize { + self.qps_threshold.unwrap_or(DEFAULT_QPS_THRESHOLD) + } + + pub fn byte_threshold(&self) -> usize { + self.byte_threshold.unwrap_or(DEFAULT_BYTE_THRESHOLD) + } + + pub fn region_cpu_overload_threshold_ratio(&self) -> f64 { + self.region_cpu_overload_threshold_ratio + .unwrap_or(REGION_CPU_OVERLOAD_THRESHOLD_RATIO) + } + pub fn optimize_for(&mut self, region_size: ReadableSize) { const LARGE_REGION_SIZE_IN_MB: u64 = 4096; - if region_size.as_mb() >= LARGE_REGION_SIZE_IN_MB { - self.qps_threshold = DEFAULT_BIG_REGION_QPS_THRESHOLD; - self.region_cpu_overload_threshold_ratio = BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO; - self.byte_threshold = DEFAULT_BIG_REGION_BYTE_THRESHOLD; + let big_size = region_size.as_mb() >= LARGE_REGION_SIZE_IN_MB; + if self.qps_threshold.is_none() { + self.qps_threshold = Some(if big_size { + DEFAULT_BIG_REGION_QPS_THRESHOLD + } else { + DEFAULT_QPS_THRESHOLD + }); + } + + if self.byte_threshold.is_none() { + self.byte_threshold = Some(if big_size { + DEFAULT_BIG_REGION_BYTE_THRESHOLD + } else { + DEFAULT_BYTE_THRESHOLD + }); + } + + if self.region_cpu_overload_threshold_ratio.is_none() { + self.region_cpu_overload_threshold_ratio = Some(if big_size { + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO + } else { + REGION_CPU_OVERLOAD_THRESHOLD_RATIO + }); } } } diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index d432f264e016..4bbcc7737638 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -608,7 +608,7 @@ impl AutoSplitController { } fn should_check_region_cpu(&self) -> bool { - self.cfg.region_cpu_overload_threshold_ratio > 0.0 + self.cfg.region_cpu_overload_threshold_ratio() > 0.0 } fn is_grpc_poll_busy(&self, avg_grpc_thread_usage: f64) -> bool { @@ -643,7 +643,7 @@ impl AutoSplitController { return false; } region_cpu_usage / unified_read_pool_thread_usage - >= self.cfg.region_cpu_overload_threshold_ratio + >= self.cfg.region_cpu_overload_threshold_ratio() } // collect the read stats from read_stats_vec and dispatch them to a Region @@ -787,9 +787,9 @@ impl AutoSplitController { debug!("load base split params"; "region_id" => region_id, "qps" => qps, - "qps_threshold" => self.cfg.qps_threshold, + "qps_threshold" => self.cfg.qps_threshold(), "byte" => byte, - "byte_threshold" => self.cfg.byte_threshold, + "byte_threshold" => self.cfg.byte_threshold(), "cpu_usage" => cpu_usage, "is_region_busy" => is_region_busy, ); @@ -800,8 +800,8 @@ impl AutoSplitController { // 1. If the QPS or the byte does not meet the threshold, skip. // 2. If the Unified Read Pool or the region is not hot enough, skip. - if qps < self.cfg.qps_threshold - && byte < self.cfg.byte_threshold + if qps < self.cfg.qps_threshold() + && byte < self.cfg.byte_threshold() && (!is_unified_read_pool_busy || !is_region_busy) { self.recorders.remove_entry(®ion_id); @@ -917,13 +917,13 @@ impl AutoSplitController { pub fn refresh_and_check_cfg(&mut self) -> SplitConfigChange { let mut cfg_change = SplitConfigChange::Noop; if let Some(incoming) = self.cfg_tracker.any_new() { - if self.cfg.region_cpu_overload_threshold_ratio <= 0.0 - && incoming.region_cpu_overload_threshold_ratio > 0.0 + if self.cfg.region_cpu_overload_threshold_ratio() <= 0.0 + && incoming.region_cpu_overload_threshold_ratio() > 0.0 { cfg_change = SplitConfigChange::UpdateRegionCpuCollector(true); } - if self.cfg.region_cpu_overload_threshold_ratio > 0.0 - && incoming.region_cpu_overload_threshold_ratio <= 0.0 + if self.cfg.region_cpu_overload_threshold_ratio() > 0.0 + && incoming.region_cpu_overload_threshold_ratio() <= 0.0 { cfg_change = SplitConfigChange::UpdateRegionCpuCollector(false); } @@ -943,12 +943,12 @@ impl AutoSplitController { mod tests { use online_config::{ConfigChange, ConfigManager, ConfigValue}; use resource_metering::{RawRecord, TagInfos}; - use tikv_util::config::VersionTrack; + use tikv_util::config::{ReadableSize, VersionTrack}; use txn_types::Key; use super::*; use crate::store::worker::split_config::{ - DEFAULT_SAMPLE_NUM, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_SAMPLE_NUM, }; enum Position { @@ -1193,7 +1193,7 @@ mod tests { fn check_split_key(mode: &[u8], qps_stats: Vec, split_keys: Vec<&[u8]>) { let mode = String::from_utf8(Vec::from(mode)).unwrap(); let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 1; + hub.cfg.qps_threshold = Some(1); hub.cfg.sample_threshold = 0; for i in 0..10 { @@ -1226,7 +1226,7 @@ mod tests { ) { let mode = String::from_utf8(Vec::from(mode)).unwrap(); let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 1; + hub.cfg.qps_threshold = Some(1); hub.cfg.sample_threshold = 0; for i in 0..10 { @@ -1291,7 +1291,7 @@ mod tests { #[test] fn test_sample_key_num() { let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 2000; + hub.cfg.qps_threshold = Some(2000); hub.cfg.sample_num = 2000; hub.cfg.sample_threshold = 0; @@ -1608,7 +1608,8 @@ mod tests { #[test] fn test_refresh_and_check_cfg() { - let split_config = SplitConfig::default(); + let mut split_config = SplitConfig::default(); + split_config.optimize_for(ReadableSize::mb(5000)); let mut split_cfg_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(split_config))); let mut auto_split_controller = @@ -1620,8 +1621,8 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO + .region_cpu_overload_threshold_ratio(), + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); // Set to zero. dispatch_split_cfg_change( @@ -1636,7 +1637,7 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, + .region_cpu_overload_threshold_ratio(), 0.0 ); assert_eq!( @@ -1647,7 +1648,7 @@ mod tests { dispatch_split_cfg_change( &mut split_cfg_manager, "region_cpu_overload_threshold_ratio", - ConfigValue::F64(REGION_CPU_OVERLOAD_THRESHOLD_RATIO), + ConfigValue::F64(0.1), ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), @@ -1656,8 +1657,8 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO + .region_cpu_overload_threshold_ratio(), + 0.1 ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), diff --git a/src/config/mod.rs b/src/config/mod.rs index 2494e84dfbdc..8c0c04957b11 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6139,12 +6139,12 @@ mod tests { assert_eq!(default_cfg.coprocessor.region_split_size(), SPLIT_SIZE); assert!(!default_cfg.coprocessor.enable_region_bucket()); - assert_eq!(default_cfg.split.qps_threshold, DEFAULT_QPS_THRESHOLD); + assert_eq!(default_cfg.split.qps_threshold(), DEFAULT_QPS_THRESHOLD); assert_eq!( - default_cfg.split.region_cpu_overload_threshold_ratio, + default_cfg.split.region_cpu_overload_threshold_ratio(), REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); - assert_eq!(default_cfg.split.byte_threshold, DEFAULT_BYTE_THRESHOLD); + assert_eq!(default_cfg.split.byte_threshold(), DEFAULT_BYTE_THRESHOLD); let mut default_cfg = TikvConfig::default(); default_cfg.storage.engine = EngineType::RaftKv2; @@ -6154,15 +6154,15 @@ mod tests { RAFTSTORE_V2_SPLIT_SIZE ); assert_eq!( - default_cfg.split.qps_threshold, + default_cfg.split.qps_threshold(), DEFAULT_BIG_REGION_QPS_THRESHOLD ); assert_eq!( - default_cfg.split.region_cpu_overload_threshold_ratio, + default_cfg.split.region_cpu_overload_threshold_ratio(), BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); assert_eq!( - default_cfg.split.byte_threshold, + default_cfg.split.byte_threshold(), DEFAULT_BIG_REGION_BYTE_THRESHOLD ); assert!(default_cfg.coprocessor.enable_region_bucket()); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d3091e30eeda..c6e98e95c05b 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -864,10 +864,14 @@ fn test_serde_custom_tikv_config() { renew_batch_max_size: 8192, alloc_ahead_buffer: ReadableDuration::millis(3000), }; + value + .split + .optimize_for(value.coprocessor.region_max_size()); value.resource_control = ResourceControlConfig { enabled: false }; let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); - let load = toml::from_str(&custom).unwrap(); + let mut load: TikvConfig = toml::from_str(&custom).unwrap(); + load.split.optimize_for(load.coprocessor.region_max_size()); assert_eq_debug(&value, &load); let dump = toml::to_string_pretty(&load).unwrap(); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 67e5e261dab3..d61d6a59182c 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -575,7 +575,7 @@ pub fn test_rollback() { fn test_query_num(query: Box, is_raw_kv: bool) { let (mut cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); - cluster.cfg.split.qps_threshold = 0; + cluster.cfg.split.qps_threshold = Some(0); cluster.cfg.split.split_balance_score = 2.0; cluster.cfg.split.split_contained_score = 2.0; cluster.cfg.split.detect_times = 1; From 251df183b0d089d01e629791124f70c3cbb6fdbf Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:24:39 +0800 Subject: [PATCH 020/203] raftstore-v2: reuse failpoint tests in async_io_test.rs (#15476) ref tikv/tikv#15409 reuse failpoint tests in async_io_test Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/command/mod.rs | 1 + components/test_raftstore-v2/src/cluster.rs | 25 ++++++++++++++++++- components/test_raftstore/src/cluster.rs | 22 ++++++++-------- tests/failpoints/cases/test_async_io.rs | 17 ++++++++++--- .../cases/test_cmd_epoch_checker.rs | 9 +++---- tests/failpoints/cases/test_disk_full.rs | 20 +++++++-------- tests/failpoints/cases/test_merge.rs | 6 ++--- .../raftstore/test_joint_consensus.rs | 16 ++++-------- tests/integrations/raftstore/test_merge.rs | 6 ++--- 9 files changed, 74 insertions(+), 48 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0fd88cc987b8..c39f2412f325 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -583,6 +583,7 @@ impl Apply { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); fail::fail_point!("on_handle_apply_1003", self.peer_id() == 1003, |_| {}); fail::fail_point!("on_handle_apply_2", self.peer_id() == 2, |_| {}); + fail::fail_point!("on_handle_apply", |_| {}); fail::fail_point!("on_handle_apply_store_1", self.store_id() == 1, |_| {}); let now = std::time::Instant::now(); let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 08de4cc3aa1d..8ede32901671 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -51,7 +51,7 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{ check_raft_cmd_request, is_error_response, new_admin_request, new_delete_cmd, - new_delete_range_cmd, new_get_cf_cmd, new_peer, new_prepare_merge, new_put_cf_cmd, + new_delete_range_cmd, new_get_cf_cmd, new_peer, new_prepare_merge, new_put_cf_cmd, new_put_cmd, new_region_detail_cmd, new_region_leader_cmd, new_request, new_status_request, new_store, new_tikv_config_with_api_ver, new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, RawEngine, @@ -1263,6 +1263,29 @@ impl, EK: KvEngine> Cluster { panic!("find no region for {}", log_wrappers::hex_encode_upper(key)); } + pub fn async_request( + &mut self, + mut req: RaftCmdRequest, + ) -> BoxFuture<'static, RaftCmdResponse> { + let region_id = req.get_header().get_region_id(); + let leader = self.leader_of_region(region_id).unwrap(); + req.mut_header().set_peer(leader.clone()); + self.sim + .wl() + .async_command_on_node(leader.get_store_id(), req) + } + + pub fn async_put( + &mut self, + key: &[u8], + value: &[u8], + ) -> Result> { + let mut region = self.get_region(key); + let reqs = vec![new_put_cmd(key, value)]; + let put = new_request(region.get_id(), region.take_region_epoch(), reqs, false); + Ok(self.async_request(put)) + } + pub fn must_put(&mut self, key: &[u8], value: &[u8]) { self.must_put_cf(CF_DEFAULT, key, value); } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 23edf0efab1f..e65028fe9683 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,7 +19,7 @@ use engine_traits::{ WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture}; +use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; use kvproto::{ errorpb::Error as PbError, kvrpcpb::{ApiVersion, Context, DiskFullOpt}, @@ -51,7 +51,6 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::server::Result as ServerResult; use tikv_util::{ - mpsc::future, thread_group::GroupProperties, time::{Instant, ThreadReadId}, worker::LazyWorker, @@ -969,7 +968,7 @@ impl Cluster { pub fn async_request( &mut self, req: RaftCmdRequest, - ) -> Result> { + ) -> Result> { self.async_request_with_opts(req, Default::default()) } @@ -977,21 +976,24 @@ impl Cluster { &mut self, mut req: RaftCmdRequest, opts: RaftCmdExtraOpts, - ) -> Result> { + ) -> Result> { let region_id = req.get_header().get_region_id(); let leader = self.leader_of_region(region_id).unwrap(); req.mut_header().set_peer(leader.clone()); - let (cb, rx) = make_cb(&req); + let (cb, mut rx) = make_cb(&req); self.sim .rl() .async_command_on_node_with_opts(leader.get_store_id(), req, cb, opts)?; - Ok(rx) + Ok(Box::pin(async move { + let fut = rx.next(); + fut.await.unwrap() + })) } pub fn async_exit_joint( &mut self, region_id: u64, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1007,7 +1009,7 @@ impl Cluster { &mut self, key: &[u8], value: &[u8], - ) -> Result> { + ) -> Result> { let mut region = self.get_region(key); let reqs = vec![new_put_cmd(key, value)]; let put = new_request(region.get_id(), region.take_region_epoch(), reqs, false); @@ -1018,7 +1020,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1031,7 +1033,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); diff --git a/tests/failpoints/cases/test_async_io.rs b/tests/failpoints/cases/test_async_io.rs index 3d53b9c5f14a..8ce349805b06 100644 --- a/tests/failpoints/cases/test_async_io.rs +++ b/tests/failpoints/cases/test_async_io.rs @@ -8,13 +8,15 @@ use std::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::HandyRwLock; // Test if the entries can be committed and applied on followers even when // leader's io is paused. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_async_io_commit_without_leader_persist() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.cmd_batch_concurrent_ready_max_count = 0; cluster.cfg.raft_store.store_io_pool_size = 2; let pd_client = Arc::clone(&cluster.pd_client); @@ -49,9 +51,10 @@ fn test_async_io_commit_without_leader_persist() { /// Test if the leader delays its destroy after applying conf change to /// remove itself. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_async_io_delay_destroy_after_conf_change() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.store_io_pool_size = 2; let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -93,6 +96,9 @@ fn test_async_io_delay_destroy_after_conf_change() { /// Test if the peer can be destroyed when it receives a tombstone msg and /// its snapshot is persisting. +/// +/// Note: snapshot flow is changed, so partitioend-raft-kv does not support this +/// test. #[test] fn test_async_io_cannot_destroy_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); @@ -176,6 +182,9 @@ fn test_async_io_cannot_destroy_when_persist_snapshot() { } /// Test if the peer can handle ready when its snapshot is persisting. +/// +/// Note: snapshot flow is changed, so partitioend-raft-kv does not support this +/// test. #[test] fn test_async_io_cannot_handle_ready_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 73bc741d9bbe..8af8e29f3ac7 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -10,7 +10,7 @@ use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; -use tikv_util::{mpsc::future, HandyRwLock}; +use tikv_util::{future::block_on_timeout, mpsc::future, HandyRwLock}; struct CbReceivers { proposed: mpsc::Receiver<()>, @@ -399,9 +399,7 @@ fn test_accept_proposal_during_conf_change() { let conf_change_fp = "apply_on_conf_change_all_1"; fail::cfg(conf_change_fp, "pause").unwrap(); let mut add_peer_rx = cluster.async_add_peer(r, new_peer(2, 2)).unwrap(); - add_peer_rx - .recv_timeout(Duration::from_millis(100)) - .unwrap_err(); + block_on_timeout(add_peer_rx.as_mut(), Duration::from_millis(100)).unwrap_err(); // Conf change doesn't affect proposals. let write_req = make_write_req(&mut cluster, b"k"); @@ -419,8 +417,7 @@ fn test_accept_proposal_during_conf_change() { fail::remove(conf_change_fp); assert!( - !add_peer_rx - .recv_timeout(Duration::from_secs(1)) + !block_on_timeout(add_peer_rx, Duration::from_secs(1)) .unwrap() .get_header() .has_error() diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index bd4271be12d6..217269bb5b85 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -86,8 +86,8 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { // Test new normal proposals won't be allowed when disk is full. let old_last_index = cluster.raft_local_state(1, 1).last_index; - let mut rx = cluster.async_put(b"k2", b"v2").unwrap(); - assert_disk_full(&rx.recv_timeout(Duration::from_secs(2)).unwrap()); + let rx = cluster.async_put(b"k2", b"v2").unwrap(); + assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); @@ -299,8 +299,8 @@ fn test_majority_disk_full() { } // Normal proposals will be rejected because of majority peers' disk full. - let mut ch = cluster.async_put(b"k2", b"v2").unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_put(b"k2", b"v2").unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); // Proposals with special `DiskFullOpt`s can be accepted even if all peers are @@ -310,8 +310,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert!(!resp.get_header().has_error()); // Reset disk full status for peer 2 and 3. 2 follower reads must success @@ -335,8 +335,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(10)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(10)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); // Peer 2 disk usage changes from already full to almost full. @@ -354,8 +354,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch, reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![3]); for i in 0..3 { diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 3cc72d44da10..eb6b8a235e1d 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -21,7 +21,7 @@ use raft::eraftpb::MessageType; use raftstore::store::*; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; -use tikv_util::{config::*, time::Instant, HandyRwLock}; +use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is rollback as expected. @@ -1532,7 +1532,7 @@ fn test_retry_pending_prepare_merge_fail() { let mut rx = cluster.async_put(b"k1", b"v11").unwrap(); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); - rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); + block_on_timeout(rx.as_mut(), Duration::from_millis(200)).unwrap_err(); // Then, start merging. PrepareMerge should become pending because applied_index // is smaller than proposed_index. @@ -1546,7 +1546,7 @@ fn test_retry_pending_prepare_merge_fail() { fail::cfg("disk_already_full_peer_1", "return").unwrap(); fail::cfg("disk_already_full_peer_2", "return").unwrap(); fail::remove("on_handle_apply"); - let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + let res = block_on_timeout(rx, Duration::from_secs(1)).unwrap(); assert!(!res.get_header().has_error(), "{:?}", res); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); diff --git a/tests/integrations/raftstore/test_joint_consensus.rs b/tests/integrations/raftstore/test_joint_consensus.rs index 282d0d0525c1..55def7a099b3 100644 --- a/tests/integrations/raftstore/test_joint_consensus.rs +++ b/tests/integrations/raftstore/test_joint_consensus.rs @@ -10,7 +10,7 @@ use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use raftstore::Result; use test_raftstore::*; -use tikv_util::{mpsc::future, store::find_peer}; +use tikv_util::{future::block_on_timeout, store::find_peer}; /// Tests multiple confchange commands can be done by one request #[test] @@ -164,24 +164,18 @@ fn test_request_in_joint_state() { // Isolated peer 2, so the old configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(2)); - let mut rx = cluster + let rx = cluster .async_request(put_request(®ion, 1, b"k3", b"v3")) .unwrap(); - assert_eq!( - rx.recv_timeout(Duration::from_millis(100)), - Err(future::RecvTimeoutError::Timeout) - ); + block_on_timeout(rx, Duration::from_millis(100)).unwrap_err(); cluster.clear_send_filters(); // Isolated peer 3, so the new configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(3)); - let mut rx = cluster + let rx = cluster .async_request(put_request(®ion, 1, b"k4", b"v4")) .unwrap(); - assert_eq!( - rx.recv_timeout(Duration::from_millis(100)), - Err(future::RecvTimeoutError::Timeout) - ); + block_on_timeout(rx, Duration::from_millis(100)).unwrap_err(); cluster.clear_send_filters(); // Leave joint diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index afc0c9afab4f..ceb888a2b22a 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -14,7 +14,7 @@ use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; -use tikv_util::{config::*, HandyRwLock}; +use tikv_util::{config::*, future::block_on_timeout, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is working as expected in a general condition. @@ -1444,10 +1444,10 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { // The gap is too large, so the previous merge should fail. And this new put // request should be allowed. - let mut res = cluster.async_put(b"k1", b"new_val").unwrap(); + let res = cluster.async_put(b"k1", b"new_val").unwrap(); cluster.clear_send_filters(); - res.recv_timeout(Duration::from_secs(5)).unwrap(); + block_on_timeout(res, Duration::from_secs(5)).unwrap(); assert_eq!(cluster.must_get(b"k1").unwrap(), b"new_val"); } From 437a68d7daff44ad243d24cb5caeee9fc29b3a5a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:14:09 +0800 Subject: [PATCH 021/203] storage: avoid duplicated Instant:now (#15489) close tikv/tikv#15490 avoid duplicated Instant:now Signed-off-by: SpadeA-Tang --- src/storage/mod.rs | 171 ++++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 72 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0d4679fbe18e..cb4057bfd7e2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -97,7 +97,7 @@ use tikv_util::{ deadline::Deadline, future::try_poll, quota_limiter::QuotaLimiter, - time::{duration_to_ms, Instant, ThreadReadId}, + time::{duration_to_ms, duration_to_sec, Instant, ThreadReadId}, }; use tracker::{ clear_tls_tracker_token, set_tls_tracker_token, with_tls_tracker, TrackedFuture, TrackerToken, @@ -645,7 +645,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [key.as_encoded()])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); // The bypass_locks and access_locks set will be checked at most once. // `TsSet::vec` is more efficient here. @@ -697,12 +697,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); let read_bytes = key.len() + result @@ -765,7 +768,7 @@ impl Storage { ids: Vec, trackers: Vec, consumer: P, - begin_instant: tikv_util::time::Instant, + begin_instant: Instant, ) -> impl Future> { const CMD: CommandKind = CommandKind::batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read @@ -805,7 +808,7 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(requests.len() as f64); - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let read_id = Some(ThreadReadId::new()); let mut statistics = Statistics::default(); let mut req_snaps = vec![]; @@ -1019,7 +1022,7 @@ impl Storage { keys.iter().map(Key::as_encoded), )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1086,12 +1089,15 @@ impl Storage { (result, stats) }); metrics::tls_collect_scan_details(CMD, &stats); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); let read_bytes = stats.cf_statistics(CF_DEFAULT).flow_stats.read_bytes + stats.cf_statistics(CF_LOCK).flow_stats.read_bytes @@ -1217,7 +1223,7 @@ impl Storage { if reverse_scan { std::mem::swap(&mut start_key, &mut end_key); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1296,12 +1302,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); res.map_err(Error::from).map(|results| { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC @@ -1383,7 +1392,7 @@ impl Storage { // which resolves locks on regions, and boundary of regions will be out of range // of TiDB keys. - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); concurrency_manager.update_max_ts(max_ts); let begin_instant = Instant::now(); @@ -1455,12 +1464,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(locks) }) @@ -1669,7 +1681,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1704,12 +1716,15 @@ impl Storage { &stats, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); r } } @@ -1776,7 +1791,7 @@ impl Storage { .map_err(Error::from)?; } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let read_id = Some(ThreadReadId::new()); let mut snaps = vec![]; for (mut req, id) in gets.into_iter().zip(ids) { @@ -1845,12 +1860,15 @@ impl Storage { } } + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(()) } .in_resource_metering_tag(resource_tag), @@ -1896,7 +1914,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, &keys)?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1947,12 +1965,15 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(stats.data.flow_stats.read_keys as f64); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(result) } } @@ -2028,7 +2049,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2140,7 +2161,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2205,7 +2226,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2266,7 +2287,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let start_key = F::encode_raw_key_owned(start_key, None); let end_key = F::encode_raw_key_owned(end_key, None); @@ -2314,7 +2335,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2400,7 +2421,7 @@ impl Storage { [(Some(&start_key), end_key.as_ref())], )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2477,12 +2498,15 @@ impl Storage { .get(CMD) .observe(statistics.data.flow_stats.read_keys as f64); metrics::tls_collect_scan_details(CMD, &statistics); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); result } @@ -2542,7 +2566,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2640,12 +2664,15 @@ impl Storage { .get(CMD) .observe(statistics.data.flow_stats.read_keys as f64); metrics::tls_collect_scan_details(CMD, &statistics); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(result) } } @@ -2690,7 +2717,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2725,12 +2752,15 @@ impl Storage { &stats, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); r } } @@ -2887,7 +2917,7 @@ impl Storage { range.set_end_key(end_key.into_encoded()); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2898,7 +2928,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf("", api_version)?; - let begin_instant = tikv_util::time::Instant::now(); + let begin_instant = Instant::now(); let mut stats = Vec::with_capacity(ranges.len()); let ret = store .raw_checksum_ranges(cf, &ranges, &mut stats) @@ -2913,12 +2943,15 @@ impl Storage { buckets.as_ref(), ); }); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed().as_secs_f64()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed().as_secs_f64()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); ret } @@ -3695,7 +3728,7 @@ pub mod test_util { &self, id: u64, res: Result<(Option>, Statistics)>, - _: tikv_util::time::Instant, + _: Instant, _source: String, ) { self.data.lock().unwrap().push(GetResult { @@ -3706,13 +3739,7 @@ pub mod test_util { } impl ResponseBatchConsumer>> for GetConsumer { - fn consume( - &self, - id: u64, - res: Result>>, - _: tikv_util::time::Instant, - _source: String, - ) { + fn consume(&self, id: u64, res: Result>>, _: Instant, _source: String) { self.data.lock().unwrap().push(GetResult { id, res }); } } From 87b2fe35aefc0d12e53ea0a471b5d9a7cb8606c9 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 1 Sep 2023 10:29:09 +0800 Subject: [PATCH 022/203] resolved_ts: shrink resolver lock map (#15484) close tikv/tikv#15458 Resolver owns a hash map to tracking locks and unlock events, and so for calculating resolved ts. However, it does not shrink map even after all lock are removed, this may result OOM if there are transactions that modify many rows across many regions. The total memory usage is proportional to the number of modified rows. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/lib.rs | 1 + components/resolved_ts/src/resolver.rs | 129 ++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 3 deletions(-) diff --git a/components/resolved_ts/src/lib.rs b/components/resolved_ts/src/lib.rs index eef1211a580a..f9eeb7c8b706 100644 --- a/components/resolved_ts/src/lib.rs +++ b/components/resolved_ts/src/lib.rs @@ -14,6 +14,7 @@ #![feature(box_patterns)] #![feature(result_flattening)] +#![feature(let_chains)] #[macro_use] extern crate tikv_util; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 1b0a07bf8e2e..6bee5efd2f68 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp, collections::BTreeMap, sync::Arc}; +use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; @@ -23,6 +23,8 @@ pub struct Resolver { locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, + // The last shrink time. + last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. resolved_ts: TimeStamp, // The highest index `Resolver` had been tracked @@ -93,6 +95,7 @@ impl Resolver { resolved_ts: TimeStamp::zero(), locks_by_key: HashMap::default(), lock_ts_heap: BTreeMap::new(), + last_aggressive_shrink_time: Instant::now_coarse(), read_progress, tracked_index: 0, min_ts: TimeStamp::zero(), @@ -161,6 +164,23 @@ impl Resolver { key.heap_size() + std::mem::size_of::() } + fn shrink_ratio(&mut self, ratio: usize, timestamp: Option) { + // HashMap load factor is 87% approximately, leave some margin to avoid + // frequent rehash. + // + // See https://github.com/rust-lang/hashbrown/blob/v0.14.0/src/raw/mod.rs#L208-L220 + const MIN_SHRINK_RATIO: usize = 2; + if self.locks_by_key.capacity() + > self.locks_by_key.len() * cmp::max(MIN_SHRINK_RATIO, ratio) + { + self.locks_by_key.shrink_to_fit(); + } + if let Some(ts) = timestamp && let Some(lock_set) = self.lock_ts_heap.get_mut(&ts) + && lock_set.capacity() > lock_set.len() * cmp::max(MIN_SHRINK_RATIO, ratio) { + lock_set.shrink_to_fit(); + } + } + #[must_use] pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { @@ -201,13 +221,22 @@ impl Resolver { self.region_id, ); - let entry = self.lock_ts_heap.get_mut(&start_ts); - if let Some(locked_keys) = entry { + let mut shrink_ts = None; + if let Some(locked_keys) = self.lock_ts_heap.get_mut(&start_ts) { + // Only shrink large set, because committing a small transaction is + // fast and shrink adds unnecessary overhead. + const SHRINK_SET_CAPACITY: usize = 256; + if locked_keys.capacity() > SHRINK_SET_CAPACITY { + shrink_ts = Some(start_ts); + } locked_keys.remove(key); if locked_keys.is_empty() { self.lock_ts_heap.remove(&start_ts); } } + // Use a large ratio to amortize the cost of rehash. + let shrink_ratio = 8; + self.shrink_ratio(shrink_ratio, shrink_ts); } /// Try to advance resolved ts. @@ -215,11 +244,20 @@ impl Resolver { /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. pub fn resolve(&mut self, min_ts: TimeStamp, now: Option) -> TimeStamp { + // Use a small ratio to shrink the memory usage aggressively. + const AGGRESSIVE_SHRINK_RATIO: usize = 2; + const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); + if self.last_aggressive_shrink_time.saturating_elapsed() > AGGRESSIVE_SHRINK_INTERVAL { + self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO, None); + self.last_aggressive_shrink_time = Instant::now_coarse(); + } + // The `Resolver` is stopped, not need to advance, just return the current // `resolved_ts` if self.stopped { return self.resolved_ts; } + // Find the min start ts. let min_lock = self.lock_ts_heap.keys().next().cloned(); let has_lock = min_lock.is_some(); @@ -407,4 +445,89 @@ mod tests { drop(resolver); assert_eq!(memory_quota.in_use(), 0); } + + #[test] + fn test_untrack_lock_shrink_ratio() { + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); + let mut key = vec![0; 16]; + let mut ts = TimeStamp::default(); + for _ in 0..1000 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + assert!( + resolver.locks_by_key.capacity() >= 1000, + "{}", + resolver.locks_by_key.capacity() + ); + + let mut ts = TimeStamp::default(); + for _ in 0..901 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + // shrink_to_fit may reserve some space in accordance with the resize + // policy, but it is expected to be less than 500. + assert!( + resolver.locks_by_key.capacity() < 500, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + + for _ in 0..99 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + assert!( + resolver.locks_by_key.capacity() < 100, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + + // Trigger aggressive shrink. + resolver.last_aggressive_shrink_time = Instant::now_coarse() - Duration::from_secs(600); + resolver.resolve(TimeStamp::new(0), None); + assert!( + resolver.locks_by_key.capacity() == 0, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + } + + #[test] + fn test_untrack_lock_set_shrink_ratio() { + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); + let mut key = vec![0; 16]; + let ts = TimeStamp::new(1); + for i in 0..1000usize { + key[0..8].copy_from_slice(&i.to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + assert!( + resolver.lock_ts_heap[&ts].capacity() >= 1000, + "{}", + resolver.lock_ts_heap[&ts].capacity() + ); + + for i in 0..990usize { + key[0..8].copy_from_slice(&i.to_be_bytes()); + resolver.untrack_lock(&key, None); + } + // shrink_to_fit may reserve some space in accordance with the resize + // policy, but it is expected to be less than 100. + assert!( + resolver.lock_ts_heap[&ts].capacity() < 500, + "{}, {}", + resolver.lock_ts_heap[&ts].capacity(), + resolver.lock_ts_heap[&ts].len(), + ); + } } From 32c030dcdb54e81718bce98b79f056a38cde9a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:45:39 +0800 Subject: [PATCH 023/203] raftstore: don't return is_witness while region not found (#15475) close tikv/tikv#15468 Return `RegionNotFound` while cannot find peer in the current store. Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/worker/read.rs | 19 ++++++-- tests/failpoints/cases/test_witness.rs | 47 +++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 0c4641770beb..5d6ede9c1936 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -824,10 +824,21 @@ where return Ok(None); } - // Check witness - if find_peer_by_id(&delegate.region, delegate.peer_id).map_or(true, |p| p.is_witness) { - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); - return Err(Error::IsWitness(region_id)); + match find_peer_by_id(&delegate.region, delegate.peer_id) { + // Check witness + Some(peer) => { + if peer.is_witness { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); + return Err(Error::IsWitness(region_id)); + } + } + // This (rarely) happen in witness disabled clusters while the conf change applied but + // region not removed. We shouldn't return `IsWitness` here because our client back off + // for a long time while encountering that. + None => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); + return Err(Error::RegionNotFound(region_id)); + } } // Check non-witness hasn't finish applying snapshot yet. diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 02411ba1b76b..33a62f0532b7 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -16,6 +16,7 @@ fn test_witness_update_region_in_local_reader() { cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); + assert_eq!(nodes[2], 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -64,6 +65,52 @@ fn test_witness_update_region_in_local_reader() { fail::remove("change_peer_after_update_region_store_3"); } +// This case is almost the same as `test_witness_update_region_in_local_reader`, +// but this omitted changing the peer to witness, for ensuring `peer_is_witness` +// won't be returned in a cluster without witnesses. +#[test] +fn test_witness_not_reported_while_disabled() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + assert_eq!(nodes[2], 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + + cluster.must_put(b"k0", b"v0"); + + // update region but the peer is not destroyed yet + fail::cfg("change_peer_after_update_region_store_3", "pause").unwrap(); + + cluster + .pd_client + .must_remove_peer(region.get_id(), peer_on_store3.clone()); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3); + request.mut_header().set_replica_read(true); + + let resp = cluster + .read(None, request.clone(), Duration::from_millis(100)) + .unwrap(); + assert!(resp.get_header().has_error()); + assert!(!resp.get_header().get_error().has_is_witness()); + fail::remove("change_peer_after_update_region_store_3"); +} + // Test the case witness pull voter_replicated_index when has pending compact // cmd. #[test] From fa3892be7ff7acad80cdac19bbe2f5bb1423f8ac Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 1 Sep 2023 11:54:39 +0800 Subject: [PATCH 024/203] server: track grpc threads memory throughput (#15488) ref tikv/tikv#8235 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/server.rs | 8 ++++++++ components/server/src/server2.rs | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 72f7b9369568..8d44890e5a63 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -105,6 +105,7 @@ use tikv::{ Engine, Storage, }, }; +use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -294,6 +295,13 @@ where EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) + .after_start(|| { + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. + unsafe { add_thread_memory_accessor() }; + }) + .before_stop(|| { + remove_thread_memory_accessor(); + }) .build(), ); let pd_client = TikvServerCore::connect_to_pd_cluster( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 1289ffe848d6..2593035618da 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -103,6 +103,7 @@ use tikv::{ Engine, Storage, }, }; +use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -289,6 +290,13 @@ where EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) + .after_start(|| { + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. + unsafe { add_thread_memory_accessor() }; + }) + .before_stop(|| { + remove_thread_memory_accessor(); + }) .build(), ); let pd_client = TikvServerCore::connect_to_pd_cluster( From a56fe6abdccdf98657eb880f1b55792bbabb29ac Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 4 Sep 2023 12:53:13 +0800 Subject: [PATCH 025/203] raftstore-v2: fix panic of dynamic changing write-buffer-limit (#15504) close tikv/tikv#15503 fix panic of dynamic changing write-buffer-limit Signed-off-by: SpadeA-Tang --- components/engine_panic/src/db_options.rs | 4 ++++ components/engine_rocks/src/db_options.rs | 8 ++++++++ components/engine_traits/src/db_options.rs | 1 + src/config/mod.rs | 13 ++++++++++--- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/components/engine_panic/src/db_options.rs b/components/engine_panic/src/db_options.rs index c081a5c1d121..05147ca06fb1 100644 --- a/components/engine_panic/src/db_options.rs +++ b/components/engine_panic/src/db_options.rs @@ -40,6 +40,10 @@ impl DbOptions for PanicDbOptions { panic!() } + fn get_flush_size(&self) -> Result { + panic!() + } + fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index c9ef2cfda985..385876630848 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -100,6 +100,14 @@ impl DbOptions for RocksDbOptions { Ok(()) } + fn get_flush_size(&self) -> Result { + if let Some(m) = self.0.get_write_buffer_manager() { + return Ok(m.flush_size() as u64); + } + + Err(box_err!("write buffer manager not found")) + } + fn set_flush_oldest_first(&mut self, f: bool) -> Result<()> { if let Some(m) = self.0.get_write_buffer_manager() { m.set_flush_oldest_first(f); diff --git a/components/engine_traits/src/db_options.rs b/components/engine_traits/src/db_options.rs index 2c6e9c3d4e89..9713c406978b 100644 --- a/components/engine_traits/src/db_options.rs +++ b/components/engine_traits/src/db_options.rs @@ -21,6 +21,7 @@ pub trait DbOptions { fn get_rate_limiter_auto_tuned(&self) -> Option; fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()>; fn set_flush_size(&mut self, f: usize) -> Result<()>; + fn get_flush_size(&self) -> Result; fn set_flush_oldest_first(&mut self, f: bool) -> Result<()>; fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions); } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8c0c04957b11..be2a52d9b078 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,7 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; -const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: u64 = ReadableSize::mb(32).0; +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(32); /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -1406,7 +1406,7 @@ impl DbConfig { .get_or_insert(ReadableSize::mb(4)); self.lockcf .write_buffer_limit - .get_or_insert(ReadableSize::mb(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT)); + .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); } } } @@ -2061,7 +2061,8 @@ impl ConfigManager for DbConfigManger { .drain_filter(|(name, _)| name == "write_buffer_limit") .next() { - self.db.set_flush_size(size.1.into())?; + let size: ReadableSize = size.1.into(); + self.db.set_flush_size(size.0 as usize)?; } if let Some(f) = change @@ -5200,6 +5201,12 @@ mod tests { ReadableSize::mb(128).0 as i64 ); + cfg_controller + .update_config("rocksdb.write-buffer-limit", "10MB") + .unwrap(); + let flush_size = db.get_db_options().get_flush_size().unwrap(); + assert_eq!(flush_size, ReadableSize::mb(10).0); + // update some configs on default cf let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), false); From 280b39c1fa0ec4bf85dae06561f2f792bf826e6a Mon Sep 17 00:00:00 2001 From: qupeng Date: Mon, 4 Sep 2023 15:44:13 +0800 Subject: [PATCH 026/203] cdc: enhance deregister protocol (#15485) close tikv/tikv#15487 Signed-off-by: qupeng --- components/cdc/src/endpoint.rs | 97 ++++++++++++++++++++++++++++++++-- components/cdc/src/service.rs | 23 ++++++-- 2 files changed, 112 insertions(+), 8 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 72042bb5aecf..969d0cba0d9a 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -80,6 +80,11 @@ pub enum Deregister { conn_id: ConnId, request_id: u64, }, + Region { + conn_id: ConnId, + request_id: u64, + region_id: u64, + }, Downstream { conn_id: ConnId, request_id: u64, @@ -112,6 +117,16 @@ impl fmt::Debug for Deregister { .field("conn_id", conn_id) .field("request_id", request_id) .finish(), + Deregister::Region { + ref conn_id, + ref request_id, + ref region_id, + } => de + .field("deregister", &"region") + .field("conn_id", conn_id) + .field("request_id", request_id) + .field("region_id", region_id) + .finish(), Deregister::Downstream { ref conn_id, ref request_id, @@ -583,8 +598,20 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { let conn = self.connections.get_mut(&conn_id).unwrap(); - for (region, downstream) in conn.unsubscribe_request(request_id) { - self.deregister_downstream(region, downstream, None); + for (region_id, downstream) in conn.unsubscribe_request(request_id) { + let err = Some(Error::Other("region not found".into())); + self.deregister_downstream(region_id, downstream, err); + } + } + Deregister::Region { + conn_id, + request_id, + region_id, + } => { + let conn = self.connections.get_mut(&conn_id).unwrap(); + if let Some(downstream) = conn.unsubscribe(request_id, region_id) { + let err = Some(Error::Other("region not found".into())); + self.deregister_downstream(region_id, downstream, err); } } Deregister::Downstream { @@ -1248,13 +1275,12 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable for Endpoint { fn on_timeout(&mut self) { - CDC_ENDPOINT_PENDING_TASKS.set(self.scheduler.pending_tasks() as _); - // Reclaim resolved_region_heap memory. self.resolved_region_heap .borrow_mut() .reset_and_shrink_to(self.capture_regions.len()); + CDC_ENDPOINT_PENDING_TASKS.set(self.scheduler.pending_tasks() as _); CDC_CAPTURED_REGION_COUNT.set(self.capture_regions.len() as i64); CDC_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["unresolved"]) @@ -1262,6 +1288,7 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable CDC_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["resolved"]) .set(self.resolved_region_count as _); + if self.min_resolved_ts != TimeStamp::max() { CDC_MIN_RESOLVED_TS_REGION.set(self.min_ts_region_id as i64); CDC_MIN_RESOLVED_TS.set(self.min_resolved_ts.physical() as i64); @@ -2841,5 +2868,67 @@ mod tests { })); assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); assert_eq!(suite.capture_regions.len(), 0); + for _ in 0..2 { + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + } + + // Resubscribe the region. + suite.add_region(2, 100); + for i in 1..=2 { + req.set_request_id(1); + req.set_region_id(i); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), i as usize); + } + + // Deregister regions one by one in the request. + suite.run(Task::Deregister(Deregister::Region { + conn_id, + request_id: 1, + region_id: 1, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 1); + assert_eq!(suite.capture_regions.len(), 1); + + suite.run(Task::Deregister(Deregister::Region { + conn_id, + request_id: 1, + region_id: 2, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); + assert_eq!(suite.capture_regions.len(), 0); + + for _ in 0..2 { + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + } } } diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 7478e3afbade..7cbf268f2b73 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -304,6 +304,13 @@ impl Service { scheduler.schedule(task).map_err(|e| format!("{:?}", e)) } + // ### Command types: + // * Register registers a region. 1) both `request_id` and `region_id` must be + // specified; 2) `request_id` can be 0 but `region_id` can not. + // * Deregister deregisters some regions in one same `request_id` or just one + // region. 1) if both `request_id` and `region_id` are specified, just + // deregister the region; 2) if only `request_id` is specified, all region + // subscriptions with the same `request_id` will be deregistered. fn handle_request( scheduler: &Scheduler, peer: &str, @@ -361,10 +368,18 @@ impl Service { request: ChangeDataRequest, conn_id: ConnId, ) -> Result<(), String> { - let task = Task::Deregister(Deregister::Request { - conn_id, - request_id: request.request_id, - }); + let task = if request.region_id != 0 { + Task::Deregister(Deregister::Region { + conn_id, + request_id: request.request_id, + region_id: request.region_id, + }) + } else { + Task::Deregister(Deregister::Request { + conn_id, + request_id: request.request_id, + }) + }; scheduler.schedule(task).map_err(|e| format!("{:?}", e)) } From 1cd6dda7d351ed969811ebdea1a52f30c97d7094 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 4 Sep 2023 16:14:15 +0800 Subject: [PATCH 027/203] raftstore-v2: reuse failpoint tests in test_early_apply.rs (#15501) ref tikv/tikv#15409 reuse failpoint tests in test_early_apply Signed-off-by: SpadeA-Tang --- components/test_raftstore/src/util.rs | 8 +++---- tests/failpoints/cases/test_early_apply.rs | 22 +++++++++++++------ tests/failpoints/cases/test_split_region.rs | 2 +- tests/failpoints/cases/test_stale_read.rs | 2 +- .../raftstore/test_early_apply.rs | 4 ++-- .../integrations/raftstore/test_lease_read.rs | 2 +- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index f63c69f9631c..e88df1fb0ca1 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -668,11 +668,11 @@ pub fn create_test_engine( ) } -pub fn configure_for_request_snapshot(cluster: &mut Cluster) { +pub fn configure_for_request_snapshot(config: &mut Config) { // We don't want to generate snapshots due to compact log. - cluster.cfg.raft_store.raft_log_gc_threshold = 1000; - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); - cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); + config.raft_store.raft_log_gc_threshold = 1000; + config.raft_store.raft_log_gc_count_limit = Some(1000); + config.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); } pub fn configure_for_hibernate(config: &mut Config) { diff --git a/tests/failpoints/cases/test_early_apply.rs b/tests/failpoints/cases/test_early_apply.rs index a194ef74d8fe..bf403fb46684 100644 --- a/tests/failpoints/cases/test_early_apply.rs +++ b/tests/failpoints/cases/test_early_apply.rs @@ -7,14 +7,16 @@ use std::sync::{ use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; // Test if a singleton can apply a log before persisting it. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_singleton_cannot_early_apply() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); // Put one key first to cache leader. @@ -33,13 +35,14 @@ fn test_singleton_cannot_early_apply() { must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_multi_early_apply() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.cfg.raft_store.store_batch_system.pool_size = 1; // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run_conf_change(); // Check mixed regions can be scheduled correctly. @@ -68,9 +71,11 @@ fn test_multi_early_apply() { })), )); cluster.async_put(b"k4", b"v4").unwrap(); - // Sleep a while so that follower will send append response. + // Sleep a while so that follower will send append response sleep_ms(100); cluster.async_put(b"k11", b"v22").unwrap(); + // Sleep a while so that follower will send append response. + sleep_ms(100); // Now the store thread of store 1 pauses on `store_1_fp`. // Set `store_1_fp` again to make this store thread does not pause on it. // Then leader 1 will receive the append response and commit the log. @@ -92,6 +97,9 @@ fn test_multi_early_apply() { /// the peer to fix this issue. /// For simplicity, this test uses region merge to ensure that the apply state /// will be written to kv db before crash. +/// +/// Note: partitioned-raft-kv does not need this due to change in disk +/// persistence logic #[test] fn test_early_apply_yield_followed_with_many_entries() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index dfd7002495ca..ed01386b5283 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -752,7 +752,7 @@ impl Filter for CollectSnapshotFilter { #[test] fn test_split_duplicated_batch() { let mut cluster = new_node_cluster(0, 3); - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); // Use one thread to make it more possible to be fetched into one batch. diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 523bb54f7cb0..a9c6fa5d6e6d 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -325,7 +325,7 @@ fn test_read_index_when_transfer_leader_2() { // Increase the election tick to make this test case running reliably. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); // Stop log compaction to transfer leader with filter easier. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); let max_lease = Duration::from_secs(2); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index b30a861e2fea..44537e8b4095 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -109,7 +109,7 @@ fn test_early_apply(mode: DataLost) { let mut cluster = new_node_cluster(0, 3); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); if mode == DataLost::LeaderCommit || mode == DataLost::AllLost { cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -175,7 +175,7 @@ fn test_update_internal_apply_index() { let mut cluster = new_node_cluster(0, 4); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(3, 3)); cluster.must_put(b"k1", b"v1"); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 60c87fd4e00f..abf17e01e9d9 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -481,7 +481,7 @@ fn test_read_index_stale_in_suspect_lease() { configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let max_lease = Duration::from_secs(2); // Stop log compaction to transfer leader with filter easier. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); cluster.pd_client.disable_default_operator(); From 640143a2daba90bfcc9a3848d19887a7a2f39170 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 4 Sep 2023 17:48:43 +0800 Subject: [PATCH 028/203] raftstore: region initial size depends on the split resource . (#15456) close tikv/tikv#15457 there are three triggers will split the regions: 1. load split include sizekeys, load etc. In this cases, the new region should contains the data after split. 2. tidb split tables or partition table, such like `create table test.t1(id int,b int) shard_row_id_bits=4 partition by hash(id) partitions 2000`. In this cases , the new region shouldn't contains any data after split. Signed-off-by: bufferflies <1045931706@qq.com> --- Cargo.lock | 2 +- .../src/operation/command/admin/split.rs | 33 ++++++++-- components/raftstore-v2/src/operation/pd.rs | 2 + components/raftstore-v2/src/router/imp.rs | 2 +- components/raftstore-v2/src/router/message.rs | 3 + components/raftstore-v2/src/worker/pd/mod.rs | 11 +++- .../raftstore-v2/src/worker/pd/region.rs | 1 + .../raftstore-v2/src/worker/pd/split.rs | 8 +++ components/raftstore/src/router.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 7 ++ components/raftstore/src/store/fsm/peer.rs | 43 ++++++++++--- components/raftstore/src/store/msg.rs | 1 + components/raftstore/src/store/worker/pd.rs | 19 ++++++ components/test_raftstore/src/cluster.rs | 1 + src/server/raftkv/raft_extension.rs | 1 + src/server/raftkv2/raft_extension.rs | 2 +- tests/failpoints/cases/test_split_region.rs | 64 +++++++++++++++++++ 17 files changed, 181 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 162d1f3ae073..4cd0882628bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2979,7 +2979,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#7b612d935bf96f9daf7a537db379bcc88b4644e0" +source = "git+https://github.com/pingcap/kvproto.git#ecdbf1f8c130089392a9bb5f86f7577deddfbed5" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index c744c1b91610..0f9cae7218df 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -75,6 +75,9 @@ pub struct SplitResult { // The index of the derived region in `regions` pub derived_index: usize, pub tablet_index: u64, + // new regions will share the region size if it's true. + // otherwise, the new region's size will be 0. + pub share_source_region_size: bool, // Hack: in common case we should use generic, but split is an infrequent // event that performance is not critical. And using `Any` can avoid polluting // all existing code. @@ -148,6 +151,9 @@ pub struct RequestSplit { pub epoch: RegionEpoch, pub split_keys: Vec>, pub source: Cow<'static, str>, + // new regions will share the region size if it's true. + // otherwise, the new region's size will be 0. + pub share_source_region_size: bool, } #[derive(Debug)] @@ -235,6 +241,7 @@ impl Peer { { return true; } + fail_point!("on_split_region_check_tick", |_| true); if ctx.schedulers.split_check.is_busy() { return false; } @@ -336,7 +343,7 @@ impl Peer { ch.set_result(cmd_resp::new_error(e)); return; } - self.ask_batch_split_pd(ctx, rs.split_keys, ch); + self.ask_batch_split_pd(ctx, rs.split_keys, rs.share_source_region_size, ch); } pub fn on_request_half_split( @@ -479,6 +486,7 @@ impl Apply { let derived_req = &[derived_req]; let right_derive = split_reqs.get_right_derive(); + let share_source_region_size = split_reqs.get_share_source_region_size(); let reqs = if right_derive { split_reqs.get_requests().iter().chain(derived_req) } else { @@ -615,6 +623,7 @@ impl Apply { derived_index, tablet_index: log_index, tablet: Box::new(tablet), + share_source_region_size, }), )) } @@ -665,6 +674,7 @@ impl Peer { fail_point!("on_split", self.peer().get_store_id() == 3, |_| {}); let derived = &res.regions[res.derived_index]; + let share_source_region_size = res.share_source_region_size; let region_id = derived.get_id(); let region_locks = self.txn_context().split(&res.regions, derived); @@ -695,8 +705,14 @@ impl Peer { let new_region_count = res.regions.len() as u64; let control = self.split_flow_control_mut(); - let estimated_size = control.approximate_size.map(|v| v / new_region_count); - let estimated_keys = control.approximate_keys.map(|v| v / new_region_count); + // if share_source_region_size is true, it means the new region contains any + // data from the origin region. + let mut share_size = None; + let mut share_keys = None; + if share_source_region_size { + share_size = control.approximate_size.map(|v| v / new_region_count); + share_keys = control.approximate_keys.map(|v| v / new_region_count); + } self.post_split(); @@ -714,8 +730,11 @@ impl Peer { // After split, the peer may need to update its metrics. let control = self.split_flow_control_mut(); control.may_skip_split_check = false; - control.approximate_size = estimated_size; - control.approximate_keys = estimated_keys; + if share_source_region_size { + control.approximate_size = share_size; + control.approximate_keys = share_keys; + } + self.add_pending_tick(PeerTick::SplitRegionCheck); } self.storage_mut().set_has_dirty_data(true); @@ -760,8 +779,8 @@ impl Peer { derived_region_id: region_id, check_split: last_region_id == new_region_id, scheduled: false, - approximate_size: estimated_size, - approximate_keys: estimated_keys, + approximate_size: share_size, + approximate_keys: share_keys, locks, })); diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 817b3aa6eb68..9bce8f3ba02b 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -215,6 +215,7 @@ impl Peer { &self, ctx: &StoreContext, split_keys: Vec>, + share_source_region_size: bool, ch: CmdResChannel, ) { let task = pd::Task::AskBatchSplit { @@ -222,6 +223,7 @@ impl Peer { split_keys, peer: self.peer().clone(), right_derive: ctx.cfg.right_derive_when_split, + share_source_region_size, ch, }; if let Err(e) = ctx.schedulers.pd.schedule(task) { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 9c6cca96ae48..23a8a3c7d4e9 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -58,7 +58,7 @@ impl raftstore::coprocessor::StoreHandle for Store split_keys: Vec>, source: Cow<'static, str>, ) { - let (msg, _) = PeerMsg::request_split(region_epoch, split_keys, source.to_string()); + let (msg, _) = PeerMsg::request_split(region_epoch, split_keys, source.to_string(), true); let res = self.send(region_id, msg); if let Err(e) = res { warn!( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index f09314b4f174..2d364af44e19 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -315,6 +315,7 @@ impl PeerMsg { epoch: metapb::RegionEpoch, split_keys: Vec>, source: String, + share_source_region_size: bool, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -323,6 +324,7 @@ impl PeerMsg { epoch, split_keys, source: source.into(), + share_source_region_size, }, ch, }, @@ -344,6 +346,7 @@ impl PeerMsg { epoch, split_keys, source: source.into(), + share_source_region_size: false, }, ch, }, diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index f89ea75b604f..061a5ad51262 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -70,6 +70,7 @@ pub enum Task { split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: CmdResChannel, }, ReportBatchSplit { @@ -324,7 +325,15 @@ where peer, right_derive, ch, - } => self.handle_ask_batch_split(region, split_keys, peer, right_derive, ch), + share_source_region_size, + } => self.handle_ask_batch_split( + region, + split_keys, + peer, + right_derive, + share_source_region_size, + ch, + ), Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), Task::AutoSplit { split_infos } => self.handle_auto_split(split_infos), Task::UpdateMaxTimestamp { diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index e825dd54c327..763e12fff072 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -288,6 +288,7 @@ where epoch, split_keys: split_region.take_keys().into(), source: "pd".into(), + share_source_region_size: false, }, ch, } diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index bf13e01120a0..7fec5a31bb60 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -17,10 +17,13 @@ fn new_batch_split_region_request( split_keys: Vec>, ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::BatchSplit); req.mut_splits().set_right_derive(right_derive); + req.mut_splits() + .set_share_source_region_size(share_source_region_size); let mut requests = Vec::with_capacity(ids.len()); for (mut id, key) in ids.into_iter().zip(split_keys) { let mut split = SplitRequest::default(); @@ -46,6 +49,7 @@ where split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: CmdResChannel, ) { Self::ask_batch_split_imp( @@ -57,6 +61,7 @@ where split_keys, peer, right_derive, + share_source_region_size, Some(ch), ); } @@ -70,6 +75,7 @@ where split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: Option, ) { if split_keys.is_empty() { @@ -98,6 +104,7 @@ where split_keys, resp.take_ids().into(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -148,6 +155,7 @@ where vec![split_key], split_info.peer, true, + false, None, ); // Try to split the region on half within the given key diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 3a76a5ad26f6..09f389a22307 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -331,6 +331,7 @@ impl crate::coprocessor::StoreHandle for RaftRoute split_keys, callback: Callback::None, source, + share_source_region_size: true, }, ) { warn!( diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 0bc1ccf7d85d..c170e5a35f98 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -272,6 +272,7 @@ pub enum ExecResult { regions: Vec, derived: Region, new_split_regions: HashMap, + share_source_region_size: bool, }, PrepareMerge { region: Region, @@ -2516,6 +2517,9 @@ where admin_req .mut_splits() .set_right_derive(split.get_right_derive()); + admin_req + .mut_split() + .set_share_source_region_size(split.get_share_source_region_size()); admin_req.mut_splits().mut_requests().push(split); // This method is executed only when there are unapplied entries after being // restarted. So there will be no callback, it's OK to return a response @@ -2560,6 +2564,7 @@ where derived.mut_region_epoch().set_version(new_version); let right_derive = split_reqs.get_right_derive(); + let share_source_region_size = split_reqs.get_share_source_region_size(); let mut regions = Vec::with_capacity(new_region_cnt + 1); // Note that the split requests only contain ids for new regions, so we need // to handle new regions and old region separately. @@ -2724,6 +2729,7 @@ where regions, derived, new_split_regions, + share_source_region_size, }), )) } @@ -7088,6 +7094,7 @@ mod tests { regions, derived: _, new_split_regions: _, + share_source_region_size: _, } = apply_res.exec_res.front().unwrap() { let r8 = regions.get(0).unwrap(); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 62a3a2650de5..9f7934e806e4 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1049,8 +1049,15 @@ where split_keys, callback, source, + share_source_region_size, } => { - self.on_prepare_split_region(region_epoch, split_keys, callback, &source); + self.on_prepare_split_region( + region_epoch, + split_keys, + callback, + &source, + share_source_region_size, + ); } CasualMessage::ComputeHashResult { index, @@ -4042,6 +4049,7 @@ where derived: metapb::Region, regions: Vec, new_split_regions: HashMap, + share_source_region_size: bool, ) { fail_point!("on_split", self.ctx.store_id() == 3, |_| {}); @@ -4063,8 +4071,15 @@ where // Roughly estimate the size and keys for new regions. let new_region_count = regions.len() as u64; - let estimated_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); - let estimated_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + let mut share_size = None; + let mut share_keys = None; + // if share_source_region_size is true, it means the new region contains any + // data from the origin region + if share_source_region_size { + share_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); + share_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + } + let mut meta = self.ctx.store_meta.lock().unwrap(); meta.set_region( &self.ctx.coprocessor_host, @@ -4079,8 +4094,10 @@ where let is_leader = self.fsm.peer.is_leader(); if is_leader { - self.fsm.peer.approximate_size = estimated_size; - self.fsm.peer.approximate_keys = estimated_keys; + if share_source_region_size { + self.fsm.peer.approximate_size = share_size; + self.fsm.peer.approximate_keys = share_keys; + } self.fsm.peer.heartbeat_pd(self.ctx); // Notify pd immediately to let it update the region meta. info!( @@ -4215,8 +4232,8 @@ where new_peer.has_ready |= campaigned; if is_leader { - new_peer.peer.approximate_size = estimated_size; - new_peer.peer.approximate_keys = estimated_keys; + new_peer.peer.approximate_size = share_size; + new_peer.peer.approximate_keys = share_keys; *new_peer.peer.txn_ext.pessimistic_locks.write() = locks; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. @@ -5043,7 +5060,13 @@ where derived, regions, new_split_regions, - } => self.on_ready_split_region(derived, regions, new_split_regions), + share_source_region_size, + } => self.on_ready_split_region( + derived, + regions, + new_split_regions, + share_source_region_size, + ), ExecResult::PrepareMerge { region, state } => { self.on_ready_prepare_merge(region, state) } @@ -5768,7 +5791,7 @@ where return; } - fail_point!("on_split_region_check_tick"); + fail_point!("on_split_region_check_tick", |_| {}); self.register_split_region_check_tick(); // To avoid frequent scan, we only add new scan tasks if all previous tasks @@ -5828,6 +5851,7 @@ where split_keys: Vec>, cb: Callback, source: &str, + share_source_region_size: bool, ) { info!( "on split"; @@ -5873,6 +5897,7 @@ where split_keys, peer: self.fsm.peer.peer.clone(), right_derive: self.ctx.cfg.right_derive_when_split, + share_source_region_size, callback: cb, }; if let Err(ScheduleError::Stopped(t)) = self.ctx.pd_scheduler.schedule(task) { diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 1ed8934e0f0a..64c5be6d7e15 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -545,6 +545,7 @@ pub enum CasualMessage { split_keys: Vec>, callback: Callback, source: Cow<'static, str>, + share_source_region_size: bool, }, /// Hash result of ComputeHash command. diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index e8c8e2f575b1..32fbdbc31452 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -141,6 +141,7 @@ where peer: metapb::Peer, // If true, right Region derives origin region_id. right_derive: bool, + share_source_region_size: bool, callback: Callback, }, AskBatchSplit { @@ -149,6 +150,7 @@ where peer: metapb::Peer, // If true, right Region derives origin region_id. right_derive: bool, + share_source_region_size: bool, callback: Callback, }, AutoSplit { @@ -1066,6 +1068,7 @@ where split_key: Vec, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, callback: Callback, task: String, ) { @@ -1087,6 +1090,7 @@ where resp.get_new_region_id(), resp.take_new_peer_ids(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -1121,6 +1125,7 @@ where mut split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, callback: Callback, task: String, remote: Remote, @@ -1146,6 +1151,7 @@ where split_keys, resp.take_ids().into(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -1174,6 +1180,7 @@ where split_key: split_keys.pop().unwrap(), peer, right_derive, + share_source_region_size, callback, }; if let Err(ScheduleError::Stopped(t)) = scheduler.schedule(task) { @@ -1645,6 +1652,7 @@ where split_keys: split_region.take_keys().into(), callback: Callback::None, source: "pd".into(), + share_source_region_size: false, } } else { CasualMessage::HalfSplitRegion { @@ -2048,12 +2056,14 @@ where split_key, peer, right_derive, + share_source_region_size, callback, } => self.handle_ask_split( region, split_key, peer, right_derive, + share_source_region_size, callback, String::from("ask_split"), ), @@ -2062,6 +2072,7 @@ where split_keys, peer, right_derive, + share_source_region_size, callback, } => Self::handle_ask_batch_split( self.router.clone(), @@ -2071,6 +2082,7 @@ where split_keys, peer, right_derive, + share_source_region_size, callback, String::from("batch_split"), self.remote.clone(), @@ -2095,6 +2107,7 @@ where vec![split_key], split_info.peer, true, + false, Callback::None, String::from("auto_split"), remote.clone(), @@ -2385,6 +2398,7 @@ fn new_split_region_request( new_region_id: u64, peer_ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::Split); @@ -2392,6 +2406,8 @@ fn new_split_region_request( req.mut_split().set_new_region_id(new_region_id); req.mut_split().set_new_peer_ids(peer_ids); req.mut_split().set_right_derive(right_derive); + req.mut_split() + .set_share_source_region_size(share_source_region_size); req } @@ -2399,10 +2415,13 @@ fn new_batch_split_region_request( split_keys: Vec>, ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::BatchSplit); req.mut_splits().set_right_derive(right_derive); + req.mut_splits() + .set_share_source_region_size(share_source_region_size); let mut requests = Vec::with_capacity(ids.len()); for (mut id, key) in ids.into_iter().zip(split_keys) { let mut split = SplitRequest::default(); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index e65028fe9683..26fa2a47d5f5 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1463,6 +1463,7 @@ impl Cluster { split_keys: vec![split_key], callback: cb, source: "test".into(), + share_source_region_size: false, }, ) .unwrap(); diff --git a/src/server/raftkv/raft_extension.rs b/src/server/raftkv/raft_extension.rs index d31788424890..733d60c838cf 100644 --- a/src/server/raftkv/raft_extension.rs +++ b/src/server/raftkv/raft_extension.rs @@ -121,6 +121,7 @@ where split_keys, callback: raftstore::store::Callback::write(cb), source: source.into(), + share_source_region_size: false, }; let res = self.router.send_casual_msg(region_id, req); Box::pin(async move { diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs index f2f433999b92..f6bb66e9e118 100644 --- a/src/server/raftkv2/raft_extension.rs +++ b/src/server/raftkv2/raft_extension.rs @@ -71,7 +71,7 @@ impl tikv_kv::RaftExtension for Extension split_keys: Vec>, source: String, ) -> futures::future::BoxFuture<'static, tikv_kv::Result>> { - let (msg, sub) = PeerMsg::request_split(region_epoch, split_keys, source); + let (msg, sub) = PeerMsg::request_split(region_epoch, split_keys, source, true); let res = self.router.check_send(region_id, msg); Box::pin(async move { res?; diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index ed01386b5283..65c50793d7a6 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -17,6 +17,7 @@ use kvproto::{ Mutation, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, }, metapb::Region, + pdpb::CheckPolicy, raft_serverpb::{PeerState, RaftMessage}, tikvpb::TikvClient, }; @@ -31,6 +32,7 @@ use raftstore::{ Result, }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{ config::{ReadableDuration, ReadableSize}, @@ -346,6 +348,68 @@ impl Filter for PrevoteRangeFilter { } } +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_region_size_after_split() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_size = 1440; + let region_split_size = 960; + cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // insert 20 key value pairs into the cluster. + // from 000000001 to 000000020 + let mut range = 1..; + put_till_size(&mut cluster, region_max_size - 100, &mut range); + sleep_ms(100); + // disable check split. + fail::cfg("on_split_region_check_tick", "return").unwrap(); + let max_key = put_till_size(&mut cluster, region_max_size, &mut range); + // split by use key, split region 1 to region 1 and region 2. + // region 1: ["000000010",""] + // region 2: ["","000000010") + let region = pd_client.get_region(&max_key).unwrap(); + cluster.must_split(®ion, b"000000010"); + let size = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size >= region_max_size - 100, "{}", size); + + let region = pd_client.get_region(b"000000009").unwrap(); + let size1 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert_eq!(0, size1, "{}", size1); + + // split region by size check, the region 1 will be split to region 1 and region + // 3. and the region3 will contains one half region size data. + let region = pd_client.get_region(&max_key).unwrap(); + pd_client.split_region(region.clone(), CheckPolicy::Scan, vec![]); + sleep_ms(200); + let size2 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size > size2, "{}:{}", size, size2); + fail::remove("on_split_region_check_tick"); + + let region = pd_client.get_region(b"000000010").unwrap(); + let size3 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size3 > 0, "{}", size3); +} + // Test if a peer is created from splitting when another initialized peer with // the same region id has already existed. In previous implementation, it can be // created and panic will happen because there are two initialized peer with the From 02061bec4b8c2520eb2d5b003c064e3cd1a76a21 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 5 Sep 2023 15:09:43 +0800 Subject: [PATCH 029/203] raftstore-v2: limit the flush times during server stop (#15511) ref tikv/tikv#15461 limit the flush times during server stop Signed-off-by: SpadeA-Tang --- components/engine_traits/src/flush.rs | 2 +- .../src/operation/ready/apply_trace.rs | 15 ++++++++-- .../integrations/raftstore/test_bootstrap.rs | 30 +++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index d0f9f892f349..9344e84bb4e7 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -234,7 +234,7 @@ impl PersistenceListener { /// /// `largest_seqno` should be the largest seqno of the generated file. pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64, file_no: u64) { - fail_point!("on_flush_completed"); + fail_point!("on_flush_completed", |_| {}); // Maybe we should hook the compaction to avoid the file is compacted before // being recorded. let offset = data_cf_offset(cf); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d4743448d07d..1601e1f01dd1 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -46,7 +46,7 @@ use kvproto::{ use raftstore::store::{ util, ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use slog::{info, trace, Logger}; +use slog::{info, trace, warn, Logger}; use tikv_util::{box_err, slog_panic, worker::Scheduler}; use crate::{ @@ -619,7 +619,18 @@ impl Peer { // flush the oldest cf one by one until we are under the replay count threshold loop { let replay_count = self.storage().estimate_replay_count(); - if replay_count < flush_threshold { + if replay_count < flush_threshold || tried_count == 3 { + // Ideally, the replay count should be 0 after three flush_oldest_cf. If not, + // there may exist bug, but it's not desireable to block here, so we at most try + // three times. + if replay_count >= flush_threshold && tried_count == 3 { + warn!( + self.logger, + "after three flush_oldest_cf, the expected replay count still exceeds the threshold"; + "replay_count" => replay_count, + "threshold" => flush_threshold, + ); + } if flushed { let admin_flush = self.storage_mut().apply_trace_mut().admin.flushed; let (_, _, tablet_index) = ctx diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index b43a3d00d16e..056641e1e3f8 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -252,6 +252,36 @@ fn test_flush_before_stop() { .unwrap(); } +// test flush_before_close will not flush forever +#[test] +fn test_flush_before_stop2() { + use test_raftstore_v2::*; + + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("on_flush_completed", "return").unwrap(); + + for i in 0..20 { + let key = format!("k{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), b"val"); + cluster.must_put_cf(CF_LOCK, key.as_bytes(), b"val"); + } + + let router = cluster.get_router(1).unwrap(); + let raft_engine = cluster.get_raft_engine(1); + + let (tx, rx) = sync_channel(1); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(1, msg).unwrap(); + + rx.recv().unwrap(); + + let admin_flush = raft_engine.get_flushed_index(1, CF_RAFT).unwrap().unwrap(); + assert!(admin_flush < 10); +} + // We cannot use a flushed index to call `maybe_advance_admin_flushed` // consider a case: // 1. lock `k` with index 6 From 1c21d07f2bfb181993838f2ae3ed34dceff1b6cb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Sep 2023 15:41:11 +0800 Subject: [PATCH 030/203] resolved_ts: track pending lock memory usage (#15452) ref tikv/tikv#14864 * Fix resolved ts OOM caused by adding large txns locks to `ResolverStatus`. * Add initial scan backoff duration metrics. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Connor --- components/resolved_ts/src/endpoint.rs | 340 +++++++++++------- components/resolved_ts/src/metrics.rs | 6 + components/resolved_ts/src/resolver.rs | 4 +- components/resolved_ts/src/scanner.rs | 4 +- .../resolved_ts/tests/failpoints/mod.rs | 45 +++ metrics/grafana/tikv_details.json | 73 ++++ 6 files changed, 339 insertions(+), 133 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 3c1ad9d8c8d2..fc3e24de1e4e 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -28,7 +28,7 @@ use raftstore::{ use security::SecurityManager; use tikv::config::ResolvedTsConfig; use tikv_util::{ - memory::MemoryQuota, + memory::{HeapSize, MemoryQuota}, warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; @@ -41,7 +41,7 @@ use crate::{ metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, - Error, Result, + Error, Result, ON_DROP_WARN_HEAP_SIZE, }; /// grace period for logging safe-ts and resolved-ts gap in slow log @@ -53,10 +53,102 @@ enum ResolverStatus { tracked_index: u64, locks: Vec, cancelled: Arc, + memory_quota: Arc, }, Ready, } +impl Drop for ResolverStatus { + fn drop(&mut self) { + let ResolverStatus::Pending { + locks, + memory_quota, + .. + } = self else { + return; + }; + if locks.is_empty() { + return; + } + + // Free memory quota used by pending locks and unlocks. + let mut bytes = 0; + let num_locks = locks.len(); + for lock in locks { + bytes += lock.heap_size(); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("drop huge ResolverStatus"; + "bytes" => bytes, + "num_locks" => num_locks, + "memory_quota_in_use" => memory_quota.in_use(), + "memory_quota_capacity" => memory_quota.capacity(), + ); + } + memory_quota.free(bytes); + } +} + +impl ResolverStatus { + fn push_pending_lock(&mut self, lock: PendingLock, region_id: u64) -> Result<()> { + let ResolverStatus::Pending { + locks, + memory_quota, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + // Check if adding a new lock or unlock will exceed the memory + // quota. + if !memory_quota.alloc(lock.heap_size()) { + fail::fail_point!("resolved_ts_on_pending_locks_memory_quota_exceeded"); + return Err(Error::MemoryQuotaExceeded); + } + locks.push(lock); + Ok(()) + } + + fn update_tracked_index(&mut self, index: u64, region_id: u64) { + let ResolverStatus::Pending { + tracked_index, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + assert!( + *tracked_index < index, + "region {}, tracked_index: {}, incoming index: {}", + region_id, + *tracked_index, + index + ); + *tracked_index = index; + } + + fn drain_pending_locks( + &mut self, + region_id: u64, + ) -> (u64, impl Iterator + '_) { + let ResolverStatus::Pending { + locks, + memory_quota, + tracked_index, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + // Must take locks, otherwise it may double free memory quota on drop. + let locks = std::mem::take(locks); + ( + *tracked_index, + locks.into_iter().map(|lock| { + memory_quota.free(lock.heap_size()); + lock + }), + ) + } +} + #[allow(dead_code)] enum PendingLock { Track { @@ -70,6 +162,16 @@ enum PendingLock { }, } +impl HeapSize for PendingLock { + fn heap_size(&self) -> usize { + match self { + PendingLock::Track { key, .. } | PendingLock::Untrack { key, .. } => { + key.as_encoded().heap_size() + } + } + } +} + // Records information related to observed region. // observe_id is used for avoiding ABA problems in incremental scan task, // advance resolved ts task, and command observing. @@ -85,13 +187,14 @@ struct ObserveRegion { impl ObserveRegion { fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { ObserveRegion { - resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota), + resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota.clone()), meta, handle: ObserveHandle::new(), resolver_status: ResolverStatus::Pending { tracked_index: 0, locks: vec![], cancelled: Arc::new(AtomicBool::new(false)), + memory_quota, }, } } @@ -101,122 +204,109 @@ impl ObserveRegion { } fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> Result<()> { - match &mut self.resolver_status { - ResolverStatus::Pending { - locks, - tracked_index, - .. - } => { - for log in change_logs { - match log { - ChangeLog::Error(e) => { - debug!( - "skip change log error"; - "region" => self.meta.id, - "error" => ?e, - ); - continue; - } - ChangeLog::Admin(req_type) => { - // TODO: for admin cmd that won't change the region meta like peer list - // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to - // return error - return Err(box_err!( - "region met admin command {:?} while initializing resolver", - req_type - )); - } - ChangeLog::Rows { rows, index } => { - rows.iter().for_each(|row| match row { - ChangeRow::Prewrite { key, start_ts, .. } => { - locks.push(PendingLock::Track { - key: key.clone(), - start_ts: *start_ts, - }) - } + if matches!(self.resolver_status, ResolverStatus::Pending { .. }) { + for log in change_logs { + match log { + ChangeLog::Error(e) => { + debug!( + "skip change log error"; + "region" => self.meta.id, + "error" => ?e, + ); + continue; + } + ChangeLog::Admin(req_type) => { + // TODO: for admin cmd that won't change the region meta like peer list + // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to + // return error + return Err(box_err!( + "region met admin command {:?} while initializing resolver", + req_type + )); + } + ChangeLog::Rows { rows, index } => { + for row in rows { + let lock = match row { + ChangeRow::Prewrite { key, start_ts, .. } => PendingLock::Track { + key: key.clone(), + start_ts: *start_ts, + }, ChangeRow::Commit { key, start_ts, commit_ts, .. - } => locks.push(PendingLock::Untrack { + } => PendingLock::Untrack { key: key.clone(), start_ts: *start_ts, commit_ts: *commit_ts, - }), + }, // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => {} - ChangeRow::IngestSsT => {} - }); - assert!( - *tracked_index < *index, - "region {}, tracked_index: {}, incoming index: {}", - self.meta.id, - *tracked_index, - *index - ); - *tracked_index = *index; + ChangeRow::OnePc { .. } | ChangeRow::IngestSsT => continue, + }; + self.resolver_status.push_pending_lock(lock, self.meta.id)?; } + self.resolver_status + .update_tracked_index(*index, self.meta.id); } } } - ResolverStatus::Ready => { - for log in change_logs { - match log { - ChangeLog::Error(e) => { + } else { + for log in change_logs { + match log { + ChangeLog::Error(e) => { + debug!( + "skip change log error"; + "region" => self.meta.id, + "error" => ?e, + ); + continue; + } + ChangeLog::Admin(req_type) => match req_type { + AdminCmdType::Split + | AdminCmdType::BatchSplit + | AdminCmdType::PrepareMerge + | AdminCmdType::RollbackMerge + | AdminCmdType::CommitMerge => { + info!( + "region met split/merge command, stop tracking since key range changed, wait for re-register"; + "req_type" => ?req_type, + ); + // Stop tracking so that `tracked_index` larger than the split/merge + // command index won't be published until `RegionUpdate` event + // trigger the region re-register and re-scan the new key range + self.resolver.stop_tracking(); + } + _ => { debug!( - "skip change log error"; + "skip change log admin"; "region" => self.meta.id, - "error" => ?e, + "req_type" => ?req_type, ); - continue; } - ChangeLog::Admin(req_type) => match req_type { - AdminCmdType::Split - | AdminCmdType::BatchSplit - | AdminCmdType::PrepareMerge - | AdminCmdType::RollbackMerge - | AdminCmdType::CommitMerge => { - info!( - "region met split/merge command, stop tracking since key range changed, wait for re-register"; - "req_type" => ?req_type, - ); - // Stop tracking so that `tracked_index` larger than the split/merge - // command index won't be published until `RegionUpdate` event - // trigger the region re-register and re-scan the new key range - self.resolver.stop_tracking(); - } - _ => { - debug!( - "skip change log admin"; - "region" => self.meta.id, - "req_type" => ?req_type, - ); - } - }, - ChangeLog::Rows { rows, index } => { - for row in rows { - match row { - ChangeRow::Prewrite { key, start_ts, .. } => { - if !self.resolver.track_lock( - *start_ts, - key.to_raw().unwrap(), - Some(*index), - ) { - return Err(Error::MemoryQuotaExceeded); - } - } - ChangeRow::Commit { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(*index)), - // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => { - self.resolver.update_tracked_index(*index); - } - ChangeRow::IngestSsT => { - self.resolver.update_tracked_index(*index); + }, + ChangeLog::Rows { rows, index } => { + for row in rows { + match row { + ChangeRow::Prewrite { key, start_ts, .. } => { + if !self.resolver.track_lock( + *start_ts, + key.to_raw().unwrap(), + Some(*index), + ) { + return Err(Error::MemoryQuotaExceeded); } } + ChangeRow::Commit { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(*index)), + // One pc command do not contains any lock, so just skip it + ChangeRow::OnePc { .. } => { + self.resolver.update_tracked_index(*index); + } + ChangeRow::IngestSsT => { + self.resolver.update_tracked_index(*index); + } } } } @@ -247,38 +337,26 @@ impl ObserveRegion { ScanEntry::None => { // Update the `tracked_index` to the snapshot's `apply_index` self.resolver.update_tracked_index(apply_index); - let pending_tracked_index = - match std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready) { - ResolverStatus::Pending { - locks, - tracked_index, - .. - } => { - for lock in locks { - match lock { - PendingLock::Track { key, start_ts } => { - if !self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(tracked_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } - } - PendingLock::Untrack { key, .. } => { - self.resolver.untrack_lock( - &key.to_raw().unwrap(), - Some(tracked_index), - ) - } - } + let mut resolver_status = + std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); + let (pending_tracked_index, pending_locks) = + resolver_status.drain_pending_locks(self.meta.id); + for lock in pending_locks { + match lock { + PendingLock::Track { key, start_ts } => { + if !self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(pending_tracked_index), + ) { + return Err(Error::MemoryQuotaExceeded); } - tracked_index } - ResolverStatus::Ready => { - panic!("region {:?} resolver has ready", self.meta.id) - } - }; + PendingLock::Untrack { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), + } + } info!( "Resolver initialized"; "region" => self.meta.id, @@ -457,7 +535,7 @@ where // Stop observing data handle.stop_observing(); // Stop scanning data - if let ResolverStatus::Pending { cancelled, .. } = resolver_status { + if let ResolverStatus::Pending { ref cancelled, .. } = resolver_status { cancelled.store(true, Ordering::Release); } } else { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 15b3463f70ec..74da743952cd 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -138,4 +138,10 @@ lazy_static! { "The minimal (non-zero) resolved ts gap for observe leader peers" ) .unwrap(); + pub static ref RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM: Histogram = register_histogram!( + "tikv_resolved_ts_initial_scan_backoff_duration_seconds", + "Bucketed histogram of resolved-ts initial scan backoff duration", + exponential_buckets(0.1, 2.0, 16).unwrap(), + ) + .unwrap(); } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 6bee5efd2f68..405138d41cf7 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,7 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; -const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +pub(crate) const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -74,6 +74,8 @@ impl Drop for Resolver { "region_id" => self.region_id, "bytes" => bytes, "num_locks" => num_locks, + "memory_quota_in_use" => self.memory_quota.in_use(), + "memory_quota_capacity" => self.memory_quota.capacity(), ); } self.memory_quota.free(bytes); diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index e8665e9d8609..615819db7993 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -21,7 +21,7 @@ use txn_types::{Key, Lock, LockType, TimeStamp}; use crate::{ errors::{Error, Result}, - metrics::RTS_SCAN_DURATION_HISTOGRAM, + metrics::*, }; const DEFAULT_SCAN_BATCH_SIZE: usize = 1024; @@ -86,6 +86,7 @@ impl, E: KvEngine> ScannerPool { let cdc_handle = self.cdc_handle.clone(); let fut = async move { if let Some(backoff) = task.backoff { + RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM.observe(backoff.as_secs_f64()); if let Err(e) = GLOBAL_TIMER_HANDLE .delay(std::time::Instant::now() + backoff) .compat() @@ -113,6 +114,7 @@ impl, E: KvEngine> ScannerPool { return; } }; + fail::fail_point!("resolved_ts_after_scanner_get_snapshot"); let start = Instant::now(); let apply_index = snap.get_apply_index().unwrap(); let mut entries = vec![]; diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index 808f5ed62ff4..0c594ab1d1dd 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -2,6 +2,11 @@ #[path = "../mod.rs"] mod testsuite; +use std::{ + sync::{mpsc::channel, Mutex}, + time::Duration, +}; + use futures::executor::block_on; use kvproto::kvrpcpb::*; use pd_client::PdClient; @@ -128,3 +133,43 @@ fn test_report_min_resolved_ts_disable() { fail::remove("mock_min_resolved_ts_interval_disable"); suite.stop(); } + +#[test] +fn test_pending_locks_memory_quota_exceeded() { + // Pause scan lock so that locks will be put in pending locks. + fail::cfg("resolved_ts_after_scanner_get_snapshot", "pause").unwrap(); + // Check if memory quota exceeded is triggered. + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback( + "resolved_ts_on_pending_locks_memory_quota_exceeded", + move || { + let sender = tx.lock().unwrap(); + sender.send(()).unwrap(); + }, + ) + .unwrap(); + + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + // Must not trigger memory quota exceeded. + rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Must trigger memory quota exceeded. + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + fail::remove("resolved_ts_after_scanner_get_snapshot"); + fail::remove("resolved_ts_on_pending_locks_memory_quota_exceeded"); + suite.stop(); +} diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index d327041cd8ae..c78540c601a5 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39471,6 +39471,79 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The backoff duration before starting initial scan", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573950, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Initial scan backoff duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, From 6b91e4a2284296887c1a0eb32865e5d8ab90ebb7 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Sep 2023 16:45:42 +0800 Subject: [PATCH 031/203] cdc: deregister delegate if memory quota exceeded (#15486) close tikv/tikv#15412 Similar to resolved-ts endpoint, cdc endpoint maintains resolvers for subscribed regions. These resolvers also need memory quota, otherwise they may cause OOM. This commit lets cdc endpoint deregister regions if they exceed memory quota. Signed-off-by: Neil Shen --- components/cdc/src/channel.rs | 3 + components/cdc/src/delegate.rs | 183 ++++++++--- components/cdc/src/endpoint.rs | 40 ++- components/cdc/src/errors.rs | 2 + components/cdc/src/initializer.rs | 56 +++- components/cdc/tests/failpoints/mod.rs | 1 + .../cdc/tests/failpoints/test_memory_quota.rs | 289 ++++++++++++++++++ components/cdc/tests/mod.rs | 11 +- components/resolved_ts/src/resolver.rs | 25 +- 9 files changed, 517 insertions(+), 93 deletions(-) create mode 100644 components/cdc/tests/failpoints/test_memory_quota.rs diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index 6a8c3d5c3aa3..a3ddeeb90305 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -52,6 +52,9 @@ pub enum CdcEvent { impl CdcEvent { pub fn size(&self) -> u32 { + fail::fail_point!("cdc_event_size", |size| size + .map(|s| s.parse::().unwrap()) + .unwrap_or(0)); match self { CdcEvent::ResolvedTs(ref r) => { // For region id, it is unlikely to exceed 100,000,000 which is diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index da5c26aad30f..e109b3368b40 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -28,9 +28,13 @@ use raftstore::{ store::util::compare_region_epoch, Error as RaftStoreError, }; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, ON_DROP_WARN_HEAP_SIZE}; use tikv::storage::{txn::TxnEntry, Statistics}; -use tikv_util::{debug, info, warn}; +use tikv_util::{ + debug, info, + memory::{HeapSize, MemoryQuota}, + warn, +}; use txn_types::{Key, Lock, LockType, TimeStamp, WriteBatchFlags, WriteRef, WriteType}; use crate::{ @@ -226,16 +230,77 @@ impl Downstream { } } -#[derive(Default)] struct Pending { - pub downstreams: Vec, - pub locks: Vec, - pub pending_bytes: usize, + downstreams: Vec, + locks: Vec, + pending_bytes: usize, + memory_quota: Arc, +} + +impl Pending { + fn new(memory_quota: Arc) -> Pending { + Pending { + downstreams: vec![], + locks: vec![], + pending_bytes: 0, + memory_quota, + } + } + + fn push_pending_lock(&mut self, lock: PendingLock) -> Result<()> { + let bytes = lock.heap_size(); + if !self.memory_quota.alloc(bytes) { + return Err(Error::MemoryQuotaExceeded); + } + self.locks.push(lock); + self.pending_bytes += bytes; + CDC_PENDING_BYTES_GAUGE.add(bytes as i64); + Ok(()) + } + + fn on_region_ready(&mut self, resolver: &mut Resolver) -> Result<()> { + fail::fail_point!("cdc_pending_on_region_ready", |_| Err( + Error::MemoryQuotaExceeded + )); + // Must take locks, otherwise it may double free memory quota on drop. + for lock in mem::take(&mut self.locks) { + self.memory_quota.free(lock.heap_size()); + match lock { + PendingLock::Track { key, start_ts } => { + if !resolver.track_lock(start_ts, key, None) { + return Err(Error::MemoryQuotaExceeded); + } + } + PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), + } + } + Ok(()) + } } impl Drop for Pending { fn drop(&mut self) { CDC_PENDING_BYTES_GAUGE.sub(self.pending_bytes as i64); + let locks = mem::take(&mut self.locks); + if locks.is_empty() { + return; + } + + // Free memory quota used by pending locks and unlocks. + let mut bytes = 0; + let num_locks = locks.len(); + for lock in locks { + bytes += lock.heap_size(); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("cdc drop huge Pending"; + "bytes" => bytes, + "num_locks" => num_locks, + "memory_quota_in_use" => self.memory_quota.in_use(), + "memory_quota_capacity" => self.memory_quota.capacity(), + ); + } + self.memory_quota.free(bytes); } } @@ -244,6 +309,14 @@ enum PendingLock { Untrack { key: Vec }, } +impl HeapSize for PendingLock { + fn heap_size(&self) -> usize { + match self { + PendingLock::Track { key, .. } | PendingLock::Untrack { key } => key.heap_size(), + } + } +} + /// A CDC delegate of a raftstore region peer. /// /// It converts raft commands into CDC events and broadcast to downstreams. @@ -265,14 +338,18 @@ pub struct Delegate { impl Delegate { /// Create a Delegate the given region. - pub fn new(region_id: u64, txn_extra_op: Arc>) -> Delegate { + pub fn new( + region_id: u64, + txn_extra_op: Arc>, + memory_quota: Arc, + ) -> Delegate { Delegate { region_id, handle: ObserveHandle::new(), resolver: None, region: None, resolved_downstreams: Vec::new(), - pending: Some(Pending::default()), + pending: Some(Pending::new(memory_quota)), txn_extra_op, failed: false, } @@ -395,7 +472,7 @@ impl Delegate { &mut self, mut resolver: Resolver, region: Region, - ) -> Vec<(&Downstream, Error)> { + ) -> Result> { assert!( self.resolver.is_none(), "region {} resolver should not be ready", @@ -408,29 +485,24 @@ impl Delegate { } // Mark the delegate as initialized. - let mut pending = self.pending.take().unwrap(); - self.region = Some(region); info!("cdc region is ready"; "region_id" => self.region_id); + // Downstreams in pending must be moved to resolved_downstreams + // immediately and must not return in the middle, otherwise the delegate + // loses downstreams. + let mut pending = self.pending.take().unwrap(); + self.resolved_downstreams = mem::take(&mut pending.downstreams); - for lock in mem::take(&mut pending.locks) { - match lock { - PendingLock::Track { key, start_ts } => { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(resolver.track_lock(start_ts, key, None)); - } - PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), - } - } + pending.on_region_ready(&mut resolver)?; self.resolver = Some(resolver); + self.region = Some(region); - self.resolved_downstreams = mem::take(&mut pending.downstreams); let mut failed_downstreams = Vec::new(); for downstream in self.downstreams() { if let Err(e) = self.check_epoch_on_ready(downstream) { failed_downstreams.push((downstream, e)); } } - failed_downstreams + Ok(failed_downstreams) } /// Try advance and broadcast resolved ts. @@ -611,16 +683,14 @@ impl Delegate { let mut txn_rows: HashMap, (EventRow, bool)> = HashMap::default(); let mut raw_rows: Vec = Vec::new(); for mut req in requests { - match req.get_cmd_type() { - CmdType::Put => { - self.sink_put( - req.take_put(), - is_one_pc, - &mut txn_rows, - &mut raw_rows, - &mut read_old_value, - )?; - } + let res = match req.get_cmd_type() { + CmdType::Put => self.sink_put( + req.take_put(), + is_one_pc, + &mut txn_rows, + &mut raw_rows, + &mut read_old_value, + ), CmdType::Delete => self.sink_delete(req.take_delete()), _ => { debug!( @@ -628,7 +698,12 @@ impl Delegate { "region_id" => self.region_id, "command" => ?req, ); + Ok(()) } + }; + if res.is_err() { + self.mark_failed(); + return res; } } @@ -825,18 +900,17 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(resolver.track_lock(row.start_ts.into(), row.key.clone(), None)); + if !resolver.track_lock(row.start_ts.into(), row.key.clone(), None) { + return Err(Error::MemoryQuotaExceeded); + } } None => { assert!(self.pending.is_some(), "region resolver not ready"); let pending = self.pending.as_mut().unwrap(); - pending.locks.push(PendingLock::Track { + pending.push_pending_lock(PendingLock::Track { key: row.key.clone(), start_ts: row.start_ts.into(), - }); - pending.pending_bytes += row.key.len(); - CDC_PENDING_BYTES_GAUGE.add(row.key.len() as i64); + })?; } } @@ -858,7 +932,7 @@ impl Delegate { Ok(()) } - fn sink_delete(&mut self, mut delete: DeleteRequest) { + fn sink_delete(&mut self, mut delete: DeleteRequest) -> Result<()> { match delete.cf.as_str() { "lock" => { let raw_key = Key::from_encoded(delete.take_key()).into_raw().unwrap(); @@ -866,11 +940,8 @@ impl Delegate { Some(ref mut resolver) => resolver.untrack_lock(&raw_key, None), None => { assert!(self.pending.is_some(), "region resolver not ready"); - let key_len = raw_key.len(); let pending = self.pending.as_mut().unwrap(); - pending.locks.push(PendingLock::Untrack { key: raw_key }); - pending.pending_bytes += key_len; - CDC_PENDING_BYTES_GAUGE.add(key_len as i64); + pending.push_pending_lock(PendingLock::Untrack { key: raw_key })?; } } } @@ -879,6 +950,7 @@ impl Delegate { panic!("invalid cf {}", other); } } + Ok(()) } fn sink_admin(&mut self, request: AdminRequest, mut response: AdminResponse) -> Result<()> { @@ -949,7 +1021,7 @@ impl Delegate { } fn stop_observing(&self) { - info!("stop observing"; "region_id" => self.region_id, "failed" => self.failed); + info!("cdc stop observing"; "region_id" => self.region_id, "failed" => self.failed); // Stop observe further events. self.handle.stop_observing(); // To inform transaction layer no more old values are required for the region. @@ -1184,12 +1256,18 @@ mod tests { ObservedRange::default(), ); downstream.set_sink(sink); - let mut delegate = Delegate::new(region_id, Default::default()); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + let mut delegate = Delegate::new(region_id, Default::default(), memory_quota); delegate.subscribe(downstream).unwrap(); assert!(delegate.handle.is_observing()); let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let resolver = Resolver::new(region_id, memory_quota); - assert!(delegate.on_region_ready(resolver, region).is_empty()); + assert!( + delegate + .on_region_ready(resolver, region) + .unwrap() + .is_empty() + ); assert!(delegate.downstreams()[0].observed_range.all_key_covered); let rx_wrap = Cell::new(Some(rx)); @@ -1313,8 +1391,9 @@ mod tests { }; // Create a new delegate. + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op.clone()); + let mut delegate = Delegate::new(1, txn_extra_op.clone(), memory_quota); assert_eq!(txn_extra_op.load(), TxnExtraOp::Noop); assert!(delegate.handle.is_observing()); @@ -1340,7 +1419,9 @@ mod tests { region.mut_region_epoch().set_version(1); { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); - let failures = delegate.on_region_ready(Resolver::new(1, memory_quota), region); + let failures = delegate + .on_region_ready(Resolver::new(1, memory_quota), region) + .unwrap(); assert_eq!(failures.len(), 1); let id = failures[0].0.id; delegate.unsubscribe(id, None); @@ -1431,8 +1512,9 @@ mod tests { Key::from_raw(b"d").into_encoded(), ) .unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op); + let mut delegate = Delegate::new(1, txn_extra_op, memory_quota); assert!(delegate.handle.is_observing()); let mut map = HashMap::default(); @@ -1500,8 +1582,9 @@ mod tests { Key::from_raw(b"f").into_encoded(), ) .unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op); + let mut delegate = Delegate::new(1, txn_extra_op, memory_quota); assert!(delegate.handle.is_observing()); let mut map = HashMap::default(); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 969d0cba0d9a..2b314f224430 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -736,7 +736,11 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint e.into_mut(), HashMapEntry::Vacant(e) => { is_new_delegate = true; - e.insert(Delegate::new(region_id, txn_extra_op)) + e.insert(Delegate::new( + region_id, + txn_extra_op, + self.sink_memory_quota.clone(), + )) } }; @@ -802,10 +806,11 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { @@ -858,18 +863,26 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { + for (downstream, e) in fails { + deregisters.push(Deregister::Downstream { + conn_id: downstream.get_conn_id(), + request_id: downstream.get_req_id(), + region_id, + downstream_id: downstream.get_id(), + err: Some(e), + }); + } + } + Err(e) => deregisters.push(Deregister::Delegate { region_id, - downstream_id: downstream.get_id(), - err: Some(e), - }); + observe_id, + err: e, + }), } } else { debug!("cdc stale region ready"; @@ -883,7 +896,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint Initializer { change_observer: ChangeObserver, cdc_handle: T, concurrency_semaphore: Arc, + memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); let _permit = concurrency_semaphore.acquire().await; @@ -173,7 +174,7 @@ impl Initializer { } match fut.await { - Ok(resp) => self.on_change_cmd_response(resp).await, + Ok(resp) => self.on_change_cmd_response(resp, memory_quota).await, Err(e) => Err(Error::Other(box_err!(e))), } } @@ -181,11 +182,13 @@ impl Initializer { pub(crate) async fn on_change_cmd_response( &mut self, mut resp: ReadResponse, + memory_quota: Arc, ) -> Result<()> { if let Some(region_snapshot) = resp.snapshot { assert_eq!(self.region_id, region_snapshot.get_region().get_id()); let region = region_snapshot.get_region().clone(); - self.async_incremental_scan(region_snapshot, region).await + self.async_incremental_scan(region_snapshot, region, memory_quota) + .await } else { assert!( resp.response.get_header().has_error(), @@ -201,6 +204,7 @@ impl Initializer { &mut self, snap: S, region: Region, + memory_quota: Arc, ) -> Result<()> { let downstream_id = self.downstream_id; let region_id = region.get_id(); @@ -216,8 +220,6 @@ impl Initializer { "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); let mut resolver = if self.build_resolver { - // TODO: limit the memory usage of the resolver. - let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); Some(Resolver::new(region_id, memory_quota)) } else { None @@ -422,9 +424,9 @@ impl Initializer { let lock = Lock::parse(value)?; match lock.lock_type { LockType::Put | LockType::Delete => { - // TODO: handle memory quota exceed, for now, quota is set to - // usize::MAX. - assert!(resolver.track_lock(lock.ts, key, None)); + if !resolver.track_lock(lock.ts, key, None) { + return Err(Error::MemoryQuotaExceeded); + } } _ => (), }; @@ -745,21 +747,37 @@ mod tests { } }); - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); check_result(); initializer .downstream_state .store(DownstreamState::Initializing); initializer.max_scan_batch_bytes = total_bytes; - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); check_result(); initializer .downstream_state .store(DownstreamState::Initializing); initializer.build_resolver = false; - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); loop { let task = rx.recv_timeout(Duration::from_millis(100)); @@ -772,7 +790,8 @@ mod tests { // Test cancellation. initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.async_incremental_scan(snap.clone(), region)).unwrap_err(); + block_on(initializer.async_incremental_scan(snap.clone(), region, memory_quota.clone())) + .unwrap_err(); // Cancel error should trigger a deregsiter. let mut region = Region::default(); @@ -784,14 +803,15 @@ mod tests { response: Default::default(), txn_extra_op: Default::default(), }; - block_on(initializer.on_change_cmd_response(resp.clone())).unwrap_err(); + block_on(initializer.on_change_cmd_response(resp.clone(), memory_quota.clone())) + .unwrap_err(); // Disconnect sink by dropping runtime (it also drops drain). drop(pool); initializer .downstream_state .store(DownstreamState::Initializing); - block_on(initializer.on_change_cmd_response(resp)).unwrap_err(); + block_on(initializer.on_change_cmd_response(resp, memory_quota)).unwrap_err(); worker.stop(); } @@ -819,8 +839,9 @@ mod tests { filter_loop, ); let th = pool.spawn(async move { + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer - .async_incremental_scan(snap, Region::default()) + .async_incremental_scan(snap, Region::default(), memory_quota) .await .unwrap(); }); @@ -904,8 +925,9 @@ mod tests { let snap = engine.snapshot(Default::default()).unwrap(); let th = pool.spawn(async move { + let memory_qutoa = Arc::new(MemoryQuota::new(usize::MAX)); initializer - .async_incremental_scan(snap, Region::default()) + .async_incremental_scan(snap, Region::default(), memory_qutoa) .await .unwrap(); }); @@ -1017,12 +1039,14 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); let concurrency_semaphore = Arc::new(Semaphore::new(1)); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); block_on(initializer.initialize( change_cmd, raft_router.clone(), concurrency_semaphore.clone(), + memory_quota.clone(), )) .unwrap_err(); @@ -1048,7 +1072,7 @@ mod tests { &concurrency_semaphore, ); let res = initializer - .initialize(change_cmd, raft_router, concurrency_semaphore) + .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) .await; tx1.send(res).unwrap(); }); diff --git a/components/cdc/tests/failpoints/mod.rs b/components/cdc/tests/failpoints/mod.rs index 082b1c15f671..619ee2009856 100644 --- a/components/cdc/tests/failpoints/mod.rs +++ b/components/cdc/tests/failpoints/mod.rs @@ -4,6 +4,7 @@ #![test_runner(test_util::run_failpoint_tests)] mod test_endpoint; +mod test_memory_quota; mod test_observe; mod test_register; mod test_resolve; diff --git a/components/cdc/tests/failpoints/test_memory_quota.rs b/components/cdc/tests/failpoints/test_memory_quota.rs new file mode 100644 index 000000000000..5b564ba61ecd --- /dev/null +++ b/components/cdc/tests/failpoints/test_memory_quota.rs @@ -0,0 +1,289 @@ +// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::*, time::Duration}; + +use cdc::{Task, Validate}; +use futures::{executor::block_on, SinkExt}; +use grpcio::WriteFlags; +use kvproto::{cdcpb::*, kvrpcpb::*}; +use pd_client::PdClient; +use test_raftstore::*; + +use crate::{new_event_feed, TestSuiteBuilder}; + +#[test] +fn test_resolver_track_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let event = receive_event(false); + event.events.into_iter().for_each(|e| { + match e.event.unwrap() { + // Even if there is no write, + // it should always outputs an Initialized event. + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + } + }); + + // Client must receive messages when there is no congest error. + let key_size = memory_quota / 2; + let (k, v) = (vec![1; key_size], vec![5]); + // Prewrite + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + assert_eq!(entries.entries[0].get_type(), EventLogType::Prewrite); + } + other => panic!("unknown event {:?}", other), + } + + // Trigger congest error. + let key_size = memory_quota * 2; + let (k, v) = (vec![2; key_size], vec![5]); + // Prewrite + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + suite.stop(); +} + +#[test] +fn test_pending_on_region_ready_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Trigger memory quota exceeded error. + fail::cfg("cdc_pending_on_region_ready", "return").unwrap(); + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let event = receive_event(false); + event.events.into_iter().for_each(|e| { + match e.event.unwrap() { + // Even if there is no write, + // it should always outputs an Initialized event. + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + } + }); + // MemoryQuotaExceeded error is triggered on_region_ready. + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + fail::remove("cdc_incremental_scan_start"); + suite.stop(); +} + +#[test] +fn test_pending_push_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Pause scan so that no region can be initialized, and all locks will be + // put in pending locks. + fail::cfg("cdc_incremental_scan_start", "pause").unwrap(); + + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + + // Trigger congest error. + let key_size = memory_quota * 2; + let (k, v) = (vec![1; key_size], vec![5]); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + fail::remove("cdc_incremental_scan_start"); + suite.stop(); +} + +#[test] +fn test_scan_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Put a lock that exceeds memory quota. + let key_size = memory_quota * 2; + let (k, v) = (vec![1; key_size], vec![5]); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + + // No region can be initialized. + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + suite.stop(); +} diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index ec479909793d..afd209af2d3a 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -168,6 +168,7 @@ impl TestSuiteBuilder { let count = cluster.count; let pd_cli = cluster.pd_client.clone(); let mut endpoints = HashMap::default(); + let mut quotas = HashMap::default(); let mut obs = HashMap::default(); let mut concurrency_managers = HashMap::default(); // Hack! node id are generated from 1..count+1. @@ -177,15 +178,14 @@ impl TestSuiteBuilder { let mut sim = cluster.sim.wl(); // Register cdc service to gRPC server. + let memory_quota = Arc::new(MemoryQuota::new(memory_quota)); + let memory_quota_ = memory_quota.clone(); let scheduler = worker.scheduler(); sim.pending_services .entry(id) .or_default() .push(Box::new(move || { - create_change_data(cdc::Service::new( - scheduler.clone(), - Arc::new(MemoryQuota::new(memory_quota)), - )) + create_change_data(cdc::Service::new(scheduler.clone(), memory_quota_.clone())) })); sim.txn_extra_schedulers.insert( id, @@ -200,6 +200,7 @@ impl TestSuiteBuilder { }, )); endpoints.insert(id, worker); + quotas.insert(id, memory_quota); } runner(&mut cluster); @@ -224,7 +225,7 @@ impl TestSuiteBuilder { cm.clone(), env, sim.security_mgr.clone(), - Arc::new(MemoryQuota::new(usize::MAX)), + quotas[id].clone(), sim.get_causal_ts_provider(*id), ); let mut updated_cfg = cfg.clone(); diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 405138d41cf7..ef257ad47620 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,7 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; -pub(crate) const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -188,13 +188,16 @@ impl Resolver { if let Some(index) = index { self.update_tracked_index(index); } + let bytes = self.lock_heap_size(&key); debug!( - "track lock {}@{}, region {}", + "track lock {}@{}", &log_wrappers::Value::key(&key), - start_ts, - self.region_id + start_ts; + "region_id" => self.region_id, + "memory_in_use" => self.memory_quota.in_use(), + "memory_capacity" => self.memory_quota.capacity(), + "key_heap_size" => bytes, ); - let bytes = self.lock_heap_size(&key); if !self.memory_quota.alloc(bytes) { return false; } @@ -213,14 +216,18 @@ impl Resolver { self.memory_quota.free(bytes); start_ts } else { - debug!("untrack a lock that was not tracked before"; "key" => &log_wrappers::Value::key(key)); + debug!("untrack a lock that was not tracked before"; + "key" => &log_wrappers::Value::key(key), + "region_id" => self.region_id, + ); return; }; debug!( - "untrack lock {}@{}, region {}", + "untrack lock {}@{}", &log_wrappers::Value::key(key), - start_ts, - self.region_id, + start_ts; + "region_id" => self.region_id, + "memory_in_use" => self.memory_quota.in_use(), ); let mut shrink_ts = None; From 9bf96f921637f1823f8507f822a215dff55d50e1 Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 6 Sep 2023 07:20:12 +0800 Subject: [PATCH 032/203] metrics: more logs and metrics for resolved-ts (#15416) ref tikv/tikv#15082 Add more logs and metrics for resolved-ts. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 6 +- components/cdc/src/delegate.rs | 4 +- components/cdc/src/initializer.rs | 4 +- components/concurrency_manager/src/lib.rs | 17 + .../concurrency_manager/src/lock_table.rs | 8 + components/raftstore/src/store/util.rs | 1 - components/resolved_ts/src/advance.rs | 9 +- components/resolved_ts/src/endpoint.rs | 563 +++++++++++++----- components/resolved_ts/src/metrics.rs | 68 ++- components/resolved_ts/src/resolver.rs | 112 +++- metrics/grafana/tikv_details.json | 12 +- 11 files changed, 615 insertions(+), 189 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index ef6e24d9d8f0..d6d49f0cf1c6 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -8,7 +8,7 @@ use dashmap::{ }; use kvproto::metapb::Region; use raftstore::coprocessor::*; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, TsSource}; use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; @@ -516,7 +516,7 @@ impl TwoPhaseResolver { return min_ts.min(stable_ts); } - self.resolver.resolve(min_ts, None) + self.resolver.resolve(min_ts, None, TsSource::BackupStream) } pub fn resolved_ts(&self) -> TimeStamp { @@ -548,7 +548,7 @@ impl TwoPhaseResolver { // advance the internal resolver. // the start ts of initial scanning would be a safe ts for min ts // -- because is used to be a resolved ts. - self.resolver.resolve(ts, None); + self.resolver.resolve(ts, None, TsSource::BackupStream); } None => { warn!("BUG: a two-phase resolver is executing phase_one_done when not in phase one"; "resolver" => ?self) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index e109b3368b40..f7125aa88823 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -28,7 +28,7 @@ use raftstore::{ store::util::compare_region_epoch, Error as RaftStoreError, }; -use resolved_ts::{Resolver, ON_DROP_WARN_HEAP_SIZE}; +use resolved_ts::{Resolver, TsSource, ON_DROP_WARN_HEAP_SIZE}; use tikv::storage::{txn::TxnEntry, Statistics}; use tikv_util::{ debug, info, @@ -514,7 +514,7 @@ impl Delegate { } debug!("cdc try to advance ts"; "region_id" => self.region_id, "min_ts" => min_ts); let resolver = self.resolver.as_mut().unwrap(); - let resolved_ts = resolver.resolve(min_ts, None); + let resolved_ts = resolver.resolve(min_ts, None, TsSource::Cdc); debug!("cdc resolved ts updated"; "region_id" => self.region_id, "resolved_ts" => resolved_ts); Some(resolved_ts) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 25b7175a08d3..ef0b15caab9d 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -23,7 +23,7 @@ use raftstore::{ msg::{Callback, ReadResponse}, }, }; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, TsSource}; use tikv::storage::{ kv::Snapshot, mvcc::{DeltaScanner, ScannerBuilder}, @@ -467,7 +467,7 @@ impl Initializer { fn finish_building_resolver(&self, mut resolver: Resolver, region: Region) { let observe_id = self.observe_id; - let rts = resolver.resolve(TimeStamp::zero(), None); + let rts = resolver.resolve(TimeStamp::zero(), None, TsSource::Cdc); info!( "cdc resolver initialized and schedule resolver ready"; "region_id" => region.get_id(), diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index ce77cb87a424..1c6bdb8dbf16 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -124,6 +124,23 @@ impl ConcurrencyManager { }); min_lock_ts } + + pub fn global_min_lock(&self) -> Option<(TimeStamp, Key)> { + let mut min_lock: Option<(TimeStamp, Key)> = None; + // TODO: The iteration looks not so efficient. It's better to be optimized. + self.lock_table.for_each_kv(|key, handle| { + if let Some(curr_ts) = handle.with_lock(|lock| lock.as_ref().map(|l| l.ts)) { + if min_lock + .as_ref() + .map(|(ts, _)| ts > &curr_ts) + .unwrap_or(true) + { + min_lock = Some((curr_ts, key.clone())); + } + } + }); + min_lock + } } #[cfg(test)] diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index db6995fa1d0f..8f4fb8952c33 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -115,6 +115,14 @@ impl LockTable { } } + pub fn for_each_kv(&self, mut f: impl FnMut(&Key, Arc)) { + for entry in self.0.iter() { + if let Some(handle) = entry.value().upgrade() { + f(entry.key(), handle); + } + } + } + /// Removes the key and its key handle from the map. pub fn remove(&self, key: &Key) { self.0.remove(key); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 880a394fdae1..3f34fe691ee0 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1439,7 +1439,6 @@ impl RegionReadProgress { self.safe_ts() } - // Dump the `LeaderInfo` and the peer list pub fn get_core(&self) -> MutexGuard<'_, RegionReadProgressCore> { self.core.lock().unwrap() } diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 4428ed01a358..59478f5affba 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -43,7 +43,7 @@ use tokio::{ }; use txn_types::TimeStamp; -use crate::{endpoint::Task, metrics::*}; +use crate::{endpoint::Task, metrics::*, TsSource}; pub(crate) const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; @@ -57,7 +57,7 @@ pub struct AdvanceTsWorker { scheduler: Scheduler, /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. - concurrency_manager: ConcurrencyManager, + pub(crate) concurrency_manager: ConcurrencyManager, // cache the last pd tso, used to approximate the next timestamp w/o an actual TSO RPC pub(crate) last_pd_tso: Arc>>, @@ -114,15 +114,17 @@ impl AdvanceTsWorker { if let Ok(mut last_pd_tso) = last_pd_tso.try_lock() { *last_pd_tso = Some((min_ts, Instant::now())); } + let mut ts_source = TsSource::PdTso; // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. // Note: This step must be done before scheduling `Task::MinTs` task, and the // resolver must be checked in or after `Task::MinTs`' execution. cm.update_max_ts(min_ts); - if let Some(min_mem_lock_ts) = cm.global_min_lock_ts() { + if let Some((min_mem_lock_ts, lock)) = cm.global_min_lock() { if min_mem_lock_ts < min_ts { min_ts = min_mem_lock_ts; + ts_source = TsSource::MemoryLock(lock); } } @@ -131,6 +133,7 @@ impl AdvanceTsWorker { if let Err(e) = scheduler.schedule(Task::ResolvedTsAdvanced { regions, ts: min_ts, + ts_source, }) { info!("failed to schedule advance event"; "err" => ?e); } diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index fc3e24de1e4e..e2d2aec4f701 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -1,12 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cmp::min, collections::HashMap, fmt, marker::PhantomData, sync::{ atomic::{AtomicBool, Ordering}, - Arc, Mutex, + Arc, Mutex, MutexGuard, }, time::Duration, }; @@ -14,7 +15,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use grpcio::Environment; -use kvproto::{metapb::Region, raft_cmdpb::AdminCmdType}; +use kvproto::{kvrpcpb::LeaderInfo, metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; use pd_client::PdClient; use raftstore::{ @@ -22,7 +23,9 @@ use raftstore::{ router::CdcHandle, store::{ fsm::store::StoreRegionMeta, - util::{self, RegionReadProgress, RegionReadProgressRegistry}, + util::{ + self, ReadState, RegionReadProgress, RegionReadProgressCore, RegionReadProgressRegistry, + }, }, }; use security::SecurityManager; @@ -39,12 +42,12 @@ use crate::{ advance::{AdvanceTsWorker, LeadershipResolver, DEFAULT_CHECK_LEADER_TIMEOUT_DURATION}, cmd::{ChangeLog, ChangeRow}, metrics::*, - resolver::Resolver, + resolver::{LastAttempt, Resolver}, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, - Error, Result, ON_DROP_WARN_HEAP_SIZE, + Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, }; -/// grace period for logging safe-ts and resolved-ts gap in slow log +/// grace period for identifying identifying slow resolved-ts and safe-ts. const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; const MEMORY_QUOTA_EXCEEDED_BACKOFF: Duration = Duration::from_secs(30); @@ -386,6 +389,265 @@ pub struct Endpoint { _phantom: PhantomData<(T, E)>, } +// methods that are used for metrics and logging +impl Endpoint +where + T: 'static + CdcHandle, + E: KvEngine, + S: StoreRegionMeta, +{ + fn is_leader(&self, store_id: Option, leader_store_id: Option) -> bool { + store_id.is_some() && store_id == leader_store_id + } + + fn collect_stats(&mut self) -> Stats { + let store_id = self.get_or_init_store_id(); + let mut stats = Stats::default(); + self.region_read_progress.with(|registry| { + for (region_id, read_progress) in registry { + let (leader_info, leader_store_id) = read_progress.dump_leader_info(); + let core = read_progress.get_core(); + let resolved_ts = leader_info.get_read_state().get_safe_ts(); + let safe_ts = core.read_state().ts; + + if resolved_ts == 0 { + stats.zero_ts_count += 1; + continue; + } + + if self.is_leader(store_id, leader_store_id) { + // leader resolved-ts + if resolved_ts < stats.min_leader_resolved_ts.resolved_ts { + let resolver = self.regions.get(region_id).map(|x| &x.resolver); + stats + .min_leader_resolved_ts + .set(*region_id, resolver, &core, &leader_info); + } + } else { + // follower safe-ts + if safe_ts > 0 && safe_ts < stats.min_follower_safe_ts.safe_ts { + stats.min_follower_safe_ts.set(*region_id, &core); + } + + // follower resolved-ts + if resolved_ts < stats.min_follower_resolved_ts.resolved_ts { + stats.min_follower_resolved_ts.set(*region_id, &core); + } + } + } + }); + + stats.resolver = self.collect_resolver_stats(); + stats.cm_min_lock = self.advance_worker.concurrency_manager.global_min_lock(); + stats + } + + fn collect_resolver_stats(&mut self) -> ResolverStats { + let mut stats = ResolverStats::default(); + for observed_region in self.regions.values() { + match &observed_region.resolver_status { + ResolverStatus::Pending { locks, .. } => { + for l in locks { + match l { + PendingLock::Track { key, .. } => stats.heap_size += key.len() as i64, + PendingLock::Untrack { key, .. } => stats.heap_size += key.len() as i64, + } + } + stats.unresolved_count += 1; + } + ResolverStatus::Ready { .. } => { + stats.heap_size += observed_region.resolver.approximate_heap_bytes() as i64; + stats.resolved_count += 1; + } + } + } + stats + } + + fn update_metrics(&self, stats: &Stats) { + let now = self.approximate_now_tso(); + // general + if stats.min_follower_resolved_ts.resolved_ts < stats.min_leader_resolved_ts.resolved_ts { + RTS_MIN_RESOLVED_TS.set(stats.min_follower_resolved_ts.resolved_ts as i64); + RTS_MIN_RESOLVED_TS_GAP.set(now.saturating_sub( + TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_RESOLVED_TS_REGION.set(stats.min_follower_resolved_ts.region_id as i64); + } else { + RTS_MIN_RESOLVED_TS.set(stats.min_leader_resolved_ts.resolved_ts as i64); + RTS_MIN_RESOLVED_TS_GAP.set(now.saturating_sub( + TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_RESOLVED_TS_REGION.set(stats.min_leader_resolved_ts.region_id as i64); + } + RTS_ZERO_RESOLVED_TS.set(stats.zero_ts_count); + + RTS_LOCK_HEAP_BYTES_GAUGE.set(stats.resolver.heap_size); + RTS_REGION_RESOLVE_STATUS_GAUGE_VEC + .with_label_values(&["resolved"]) + .set(stats.resolver.resolved_count); + RTS_REGION_RESOLVE_STATUS_GAUGE_VEC + .with_label_values(&["unresolved"]) + .set(stats.resolver.unresolved_count); + + CONCURRENCY_MANAGER_MIN_LOCK_TS.set( + stats + .cm_min_lock + .clone() + .map(|(ts, _)| ts.into_inner()) + .unwrap_or_default() as i64, + ); + + // min follower safe ts + RTS_MIN_FOLLOWER_SAFE_TS_REGION.set(stats.min_follower_safe_ts.region_id as i64); + RTS_MIN_FOLLOWER_SAFE_TS.set(stats.min_follower_safe_ts.safe_ts as i64); + RTS_MIN_FOLLOWER_SAFE_TS_GAP.set( + now.saturating_sub(TimeStamp::from(stats.min_follower_safe_ts.safe_ts).physical()) + as i64, + ); + RTS_MIN_FOLLOWER_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER.set( + stats + .min_follower_safe_ts + .duration_to_last_consume_leader + .map(|x| x as i64) + .unwrap_or(-1), + ); + + // min leader resolved ts + RTS_MIN_LEADER_RESOLVED_TS.set(stats.min_leader_resolved_ts.resolved_ts as i64); + RTS_MIN_LEADER_RESOLVED_TS_REGION.set(stats.min_leader_resolved_ts.region_id as i64); + RTS_MIN_LEADER_RESOLVED_TS_REGION_MIN_LOCK_TS.set( + stats + .min_leader_resolved_ts + .min_lock + .as_ref() + .map(|(ts, _)| (*ts).into_inner() as i64) + .unwrap_or(-1), + ); + RTS_MIN_LEADER_RESOLVED_TS_GAP + .set(now.saturating_sub( + TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_LEADER_DUATION_TO_LAST_UPDATE_SAFE_TS.set( + stats + .min_leader_resolved_ts + .duration_to_last_update_ms + .map(|x| x as i64) + .unwrap_or(-1), + ); + + // min follower resolved ts + RTS_MIN_FOLLOWER_RESOLVED_TS.set(stats.min_follower_resolved_ts.resolved_ts as i64); + RTS_MIN_FOLLOWER_RESOLVED_TS_REGION.set(stats.min_follower_resolved_ts.region_id as i64); + RTS_MIN_FOLLOWER_RESOLVED_TS_GAP.set( + now.saturating_sub( + TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical(), + ) as i64, + ); + RTS_MIN_FOLLOWER_RESOLVED_TS_DURATION_TO_LAST_CONSUME_LEADER.set( + stats + .min_follower_resolved_ts + .duration_to_last_consume_leader + .map(|x| x as i64) + .unwrap_or(-1), + ); + } + + // Approximate a TSO from PD. It is better than local timestamp when clock skew + // exists. + // Returns the physical part. + fn approximate_now_tso(&self) -> u64 { + self.advance_worker + .last_pd_tso + .try_lock() + .map(|opt| { + opt.map(|(pd_ts, instant)| { + pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 + }) + .unwrap_or_else(|| TimeStamp::physical_now()) + }) + .unwrap_or_else(|_| TimeStamp::physical_now()) + } + + fn log_slow_regions(&self, stats: &Stats) { + let expected_interval = min( + self.cfg.advance_ts_interval.as_millis(), + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64, + ) + self.cfg.advance_ts_interval.as_millis(); + let leader_threshold = expected_interval + SLOW_LOG_GRACE_PERIOD_MS; + let follower_threshold = 2 * expected_interval + SLOW_LOG_GRACE_PERIOD_MS; + let now = self.approximate_now_tso(); + + // min leader resolved ts + let min_leader_resolved_ts_gap = now + .saturating_sub(TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical()); + if min_leader_resolved_ts_gap > leader_threshold { + info!( + "the max gap of leader resolved-ts is large"; + "region_id" => stats.min_leader_resolved_ts.region_id, + "gap" => format!("{}ms", min_leader_resolved_ts_gap), + "read_state" => ?stats.min_leader_resolved_ts.read_state, + "applied_index" => stats.min_leader_resolved_ts.applied_index, + "min_lock" => ?stats.min_leader_resolved_ts.min_lock, + "lock_num" => stats.min_leader_resolved_ts.lock_num, + "txn_num" => stats.min_leader_resolved_ts.txn_num, + "min_memory_lock" => ?stats.cm_min_lock, + "duration_to_last_update_safe_ts" => match stats.min_leader_resolved_ts.duration_to_last_update_ms { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "last_resolve_attempt" => &stats.min_leader_resolved_ts.last_resolve_attempt, + ); + } + + // min follower safe ts + let min_follower_safe_ts_gap = + now.saturating_sub(TimeStamp::from(stats.min_follower_safe_ts.safe_ts).physical()); + if min_follower_safe_ts_gap > follower_threshold { + info!( + "the max gap of follower safe-ts is large"; + "region_id" => stats.min_follower_safe_ts.region_id, + "gap" => format!("{}ms", min_follower_safe_ts_gap), + "safe_ts" => stats.min_follower_safe_ts.safe_ts, + "resolved_ts" => stats.min_follower_safe_ts.resolved_ts, + "duration_to_last_consume_leader" => match stats.min_follower_safe_ts.duration_to_last_consume_leader { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "applied_index" => stats.min_follower_safe_ts.applied_index, + "latest_candidate" => ?stats.min_follower_safe_ts.latest_candidate, + "oldest_candidate" => ?stats.min_follower_safe_ts.oldest_candidate, + ); + } + + // min follower resolved ts + let min_follower_resolved_ts_gap = now + .saturating_sub(TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical()); + if min_follower_resolved_ts_gap > follower_threshold { + if stats.min_follower_resolved_ts.region_id == stats.min_follower_safe_ts.region_id { + info!( + "the max gap of follower resolved-ts is large; it's the same region that has the min safe-ts" + ); + } else { + info!( + "the max gap of follower resolved-ts is large"; + "region_id" => stats.min_follower_resolved_ts.region_id, + "gap" => format!("{}ms", min_follower_resolved_ts_gap), + "safe_ts" => stats.min_follower_resolved_ts.safe_ts, + "resolved_ts" => stats.min_follower_resolved_ts.resolved_ts, + "duration_to_last_consume_leader" => match stats.min_follower_resolved_ts.duration_to_last_consume_leader { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "applied_index" => stats.min_follower_resolved_ts.applied_index, + "latest_candidate" => ?stats.min_follower_resolved_ts.latest_candidate, + "oldest_candidate" => ?stats.min_follower_resolved_ts.oldest_candidate, + ); + } + } + } +} + impl Endpoint where T: 'static + CdcHandle, @@ -623,7 +885,12 @@ where // Update advanced resolved ts. // Must ensure all regions are leaders at the point of ts. - fn handle_resolved_ts_advanced(&mut self, regions: Vec, ts: TimeStamp) { + fn handle_resolved_ts_advanced( + &mut self, + regions: Vec, + ts: TimeStamp, + ts_source: TsSource, + ) { if regions.is_empty() { return; } @@ -631,7 +898,9 @@ where for region_id in regions.iter() { if let Some(observe_region) = self.regions.get_mut(region_id) { if let ResolverStatus::Ready = observe_region.resolver_status { - let _ = observe_region.resolver.resolve(ts, Some(now)); + let _ = observe_region + .resolver + .resolve(ts, Some(now), ts_source.clone()); } } } @@ -776,6 +1045,7 @@ pub enum Task { ResolvedTsAdvanced { regions: Vec, ts: TimeStamp, + ts_source: TsSource, }, ChangeLog { cmd_batch: Vec, @@ -830,10 +1100,12 @@ impl fmt::Debug for Task { Task::ResolvedTsAdvanced { ref regions, ref ts, + ref ts_source, } => de .field("name", &"advance_resolved_ts") .field("regions", ®ions) .field("ts", &ts) + .field("ts_source", &ts_source.label()) .finish(), Task::ChangeLog { .. } => de.field("name", &"change_log").finish(), Task::ScanLocks { @@ -890,9 +1162,11 @@ where Task::AdvanceResolvedTs { leader_resolver } => { self.handle_advance_resolved_ts(leader_resolver) } - Task::ResolvedTsAdvanced { regions, ts } => { - self.handle_resolved_ts_advanced(regions, ts) - } + Task::ResolvedTsAdvanced { + regions, + ts, + ts_source, + } => self.handle_resolved_ts_advanced(regions, ts, ts_source), Task::ChangeLog { cmd_batch } => self.handle_change_log(cmd_batch), Task::ScanLocks { region_id, @@ -928,6 +1202,138 @@ impl ConfigManager for ResolvedTsConfigManager { } } +#[derive(Default)] +struct Stats { + // stats for metrics + zero_ts_count: i64, + min_leader_resolved_ts: LeaderStats, + min_follower_safe_ts: FollowerStats, + min_follower_resolved_ts: FollowerStats, + resolver: ResolverStats, + // we don't care about min_safe_ts_leader, because safe_ts should be equal to resolved_ts in + // leaders + // The min memory lock in concurrency manager. + cm_min_lock: Option<(TimeStamp, Key)>, +} + +struct LeaderStats { + region_id: u64, + resolved_ts: u64, + read_state: ReadState, + duration_to_last_update_ms: Option, + last_resolve_attempt: Option, + applied_index: u64, + // min lock in LOCK CF + min_lock: Option<(TimeStamp, Key)>, + lock_num: Option, + txn_num: Option, +} + +impl Default for LeaderStats { + fn default() -> Self { + Self { + region_id: 0, + resolved_ts: u64::MAX, + read_state: ReadState::default(), + duration_to_last_update_ms: None, + applied_index: 0, + last_resolve_attempt: None, + min_lock: None, + lock_num: None, + txn_num: None, + } + } +} + +impl LeaderStats { + fn set( + &mut self, + region_id: u64, + resolver: Option<&Resolver>, + region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, + leader_info: &LeaderInfo, + ) { + *self = LeaderStats { + region_id, + resolved_ts: leader_info.get_read_state().get_safe_ts(), + read_state: region_read_progress.read_state().clone(), + duration_to_last_update_ms: region_read_progress + .last_instant_of_update_ts() + .map(|i| i.saturating_elapsed().as_millis() as u64), + last_resolve_attempt: resolver.and_then(|r| r.last_attempt.clone()), + min_lock: resolver.and_then(|r| { + r.oldest_transaction().map(|(ts, keys)| { + ( + *ts, + keys.iter() + .next() + .map(|k| Key::from_encoded_slice(k.as_ref())) + .unwrap_or_else(|| Key::from_encoded_slice("no_keys_found".as_ref())), + ) + }) + }), + applied_index: region_read_progress.applied_index(), + lock_num: resolver.map(|r| r.num_locks()), + txn_num: resolver.map(|r| r.num_transactions()), + }; + } +} + +struct FollowerStats { + region_id: u64, + resolved_ts: u64, + safe_ts: u64, + latest_candidate: Option, + oldest_candidate: Option, + applied_index: u64, + duration_to_last_consume_leader: Option, +} + +impl Default for FollowerStats { + fn default() -> Self { + Self { + region_id: 0, + safe_ts: u64::MAX, + resolved_ts: u64::MAX, + latest_candidate: None, + oldest_candidate: None, + applied_index: 0, + duration_to_last_consume_leader: None, + } + } +} + +impl FollowerStats { + fn set( + &mut self, + region_id: u64, + region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, + ) { + let read_state = region_read_progress.read_state(); + *self = FollowerStats { + region_id, + resolved_ts: region_read_progress + .get_leader_info() + .get_read_state() + .get_safe_ts(), + safe_ts: read_state.ts, + applied_index: region_read_progress.applied_index(), + latest_candidate: region_read_progress.pending_items().back().cloned(), + oldest_candidate: region_read_progress.pending_items().front().cloned(), + duration_to_last_consume_leader: region_read_progress + .last_instant_of_consume_leader() + .map(|i| i.saturating_elapsed().as_millis() as u64), + }; + } +} + +#[derive(Default)] +struct ResolverStats { + resolved_count: i64, + unresolved_count: i64, + heap_size: i64, +} + const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s impl RunnableWithTimer for Endpoint @@ -937,138 +1343,9 @@ where S: StoreRegionMeta, { fn on_timeout(&mut self) { - let store_id = self.get_or_init_store_id(); - let (mut oldest_ts, mut oldest_region, mut zero_ts_count) = (u64::MAX, 0, 0); - let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); - let (mut oldest_safe_ts, mut oldest_safe_ts_region) = (u64::MAX, 0); - let mut oldest_duration_to_last_update_ms = 0; - let mut oldest_duration_to_last_consume_leader_ms = 0; - self.region_read_progress.with(|registry| { - for (region_id, read_progress) in registry { - let safe_ts = read_progress.safe_ts(); - if safe_ts > 0 && safe_ts < oldest_safe_ts { - oldest_safe_ts = safe_ts; - oldest_safe_ts_region = *region_id; - } - - let (leader_info, leader_store_id) = read_progress.dump_leader_info(); - // this is maximum resolved-ts pushed to region_read_progress, namely candidates - // of safe_ts. It may not be the safe_ts yet - let ts = leader_info.get_read_state().get_safe_ts(); - if ts == 0 { - zero_ts_count += 1; - continue; - } - if ts < oldest_ts { - oldest_ts = ts; - oldest_region = *region_id; - // use -1 to denote none. - oldest_duration_to_last_update_ms = read_progress - .get_core() - .last_instant_of_consume_leader() - .map(|t| t.saturating_elapsed().as_millis() as i64) - .unwrap_or(-1); - oldest_duration_to_last_consume_leader_ms = read_progress - .get_core() - .last_instant_of_consume_leader() - .map(|t| t.saturating_elapsed().as_millis() as i64) - .unwrap_or(-1); - } - - if let (Some(store_id), Some(leader_store_id)) = (store_id, leader_store_id) { - if leader_store_id == store_id && ts < oldest_leader_ts { - oldest_leader_ts = ts; - oldest_leader_region = *region_id; - } - } - } - }); - let mut lock_heap_size = 0; - let (mut resolved_count, mut unresolved_count) = (0, 0); - for observe_region in self.regions.values() { - match &observe_region.resolver_status { - ResolverStatus::Pending { locks, .. } => { - for l in locks { - match l { - PendingLock::Track { key, .. } => lock_heap_size += key.len(), - PendingLock::Untrack { key, .. } => lock_heap_size += key.len(), - } - } - unresolved_count += 1; - } - ResolverStatus::Ready { .. } => { - lock_heap_size += observe_region.resolver.approximate_heap_bytes(); - resolved_count += 1; - } - } - } - // approximate a TSO from PD. It is better than local timestamp when clock skew - // exists. - let now: u64 = self - .advance_worker - .last_pd_tso - .try_lock() - .map(|opt| { - opt.map(|(pd_ts, instant)| { - pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 - }) - .unwrap_or_else(|| TimeStamp::physical_now()) - }) - .unwrap_or_else(|_| TimeStamp::physical_now()); - - RTS_MIN_SAFE_TS.set(oldest_safe_ts as i64); - RTS_MIN_SAFE_TS_REGION.set(oldest_safe_ts_region as i64); - let safe_ts_gap = now.saturating_sub(TimeStamp::from(oldest_safe_ts).physical()); - if safe_ts_gap - > self.cfg.advance_ts_interval.as_millis() - + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64 - + SLOW_LOG_GRACE_PERIOD_MS - { - let mut lock_num = None; - let mut min_start_ts = None; - if let Some(ob) = self.regions.get(&oldest_safe_ts_region) { - min_start_ts = ob - .resolver - .locks() - .keys() - .next() - .cloned() - .map(TimeStamp::into_inner); - lock_num = Some(ob.resolver.num_locks()); - } - info!( - "the max gap of safe-ts is large"; - "gap" => safe_ts_gap, - "oldest_safe_ts" => ?oldest_safe_ts, - "region_id" => oldest_safe_ts_region, - "advance_ts_interval" => ?self.cfg.advance_ts_interval, - "lock_num" => lock_num, - "min_start_ts" => min_start_ts, - ); - } - RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); - RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS.set(oldest_duration_to_last_update_ms); - RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER - .set(oldest_duration_to_last_consume_leader_ms); - - RTS_MIN_RESOLVED_TS_REGION.set(oldest_region as i64); - RTS_MIN_RESOLVED_TS.set(oldest_ts as i64); - RTS_ZERO_RESOLVED_TS.set(zero_ts_count as i64); - RTS_MIN_RESOLVED_TS_GAP - .set(now.saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64); - - RTS_MIN_LEADER_RESOLVED_TS_REGION.set(oldest_leader_region as i64); - RTS_MIN_LEADER_RESOLVED_TS.set(oldest_leader_ts as i64); - RTS_MIN_LEADER_RESOLVED_TS_GAP - .set(now.saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) as i64); - - RTS_LOCK_HEAP_BYTES_GAUGE.set(lock_heap_size as i64); - RTS_REGION_RESOLVE_STATUS_GAUGE_VEC - .with_label_values(&["resolved"]) - .set(resolved_count as _); - RTS_REGION_RESOLVE_STATUS_GAUGE_VEC - .with_label_values(&["unresolved"]) - .set(unresolved_count as _); + let stats = self.collect_stats(); + self.update_metrics(&stats); + self.log_slow_regions(&stats); } fn get_interval(&self) -> Duration { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 74da743952cd..02bb92f78878 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -38,7 +38,7 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observed regions" + "The gap between now() and the minimal (non-zero) resolved ts" ) .unwrap(); pub static ref RTS_RESOLVED_FAIL_ADVANCE_VEC: IntCounterVec = register_int_counter_vec!( @@ -69,29 +69,29 @@ lazy_static! { "The minimal (non-zero) resolved ts for observed regions" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_REGION: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_region", - "The region which has minimal safe ts" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_region", + "The region id of the follower that has minimal safe ts" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts", - "The minimal (non-zero) safe ts for observed regions" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts", + "The minimal (non-zero) safe ts for followers" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_GAP: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_gap_millis", - "The minimal (non-zero) safe ts gap for observed regions" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_gap_millis", + "The gap between now() and the minimal (non-zero) safe ts for followers" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_duration_to_update_safe_ts", - "The duration since last update_safe_ts() called by resolved-ts routine. -1 denotes None." + pub static ref RTS_MIN_LEADER_DUATION_TO_LAST_UPDATE_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_leader_min_resolved_ts_duration_to_last_update_safe_ts", + "The duration since last update_safe_ts() called by resolved-ts routine in the leader with min resolved ts. -1 denotes None." ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_duration_to_last_consume_leader", - "The duration since last check_leader(). -1 denotes None." + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_duration_to_last_consume_leader", + "The duration since last check_leader() in the follower region with min safe ts. -1 denotes None." ) .unwrap(); pub static ref RTS_ZERO_RESOLVED_TS: IntGauge = register_int_gauge!( @@ -125,7 +125,17 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS_REGION: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_leader_resolved_ts_region", - "The region which its leader peer has minimal resolved ts" + "The region whose leader peer has minimal resolved ts" + ) + .unwrap(); + pub static ref RTS_MIN_LEADER_RESOLVED_TS_REGION_MIN_LOCK_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_leader_resolved_ts_region_min_lock_ts", + "The minimal lock ts for the region whose leader peer has minimal resolved ts. 0 means no lock. -1 means no region found." + ) + .unwrap(); + pub static ref CONCURRENCY_MANAGER_MIN_LOCK_TS: IntGauge = register_int_gauge!( + "tikv_concurrency_manager_min_lock_ts", + "The minimal lock ts in concurrency manager. 0 means no lock." ) .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS: IntGauge = register_int_gauge!( @@ -135,7 +145,29 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_leader_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observe leader peers" + "The gap between now() and the minimal (non-zero) resolved ts for leader peers" + ) + .unwrap(); + + // for min_follower_resolved_ts + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_region", + "The region id of the follower has minimal resolved ts" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts", + "The minimal (non-zero) resolved ts for follower regions" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_gap_millis", + "The max gap of now() and the minimal (non-zero) resolved ts for follower regions" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_duration_to_last_consume_leader", + "The duration since last check_leader() in the follower region with min resolved ts. -1 denotes None." ) .unwrap(); pub static ref RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM: Histogram = register_histogram!( diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index ef257ad47620..e0814176a92f 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -8,13 +8,46 @@ use tikv_util::{ memory::{HeapSize, MemoryQuota}, time::Instant, }; -use txn_types::TimeStamp; +use txn_types::{Key, TimeStamp}; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +#[derive(Clone)] +pub enum TsSource { + // A lock in LOCK CF + Lock(Arc<[u8]>), + // A memory lock in concurrency manager + MemoryLock(Key), + PdTso, + // The following sources can also come from PD or memory lock, but we care more about sources + // in resolved-ts. + BackupStream, + Cdc, +} + +impl TsSource { + pub fn label(&self) -> &str { + match self { + TsSource::Lock(_) => "lock", + TsSource::MemoryLock(_) => "rts_cm_min_lock", + TsSource::PdTso => "pd_tso", + TsSource::BackupStream => "backup_stream", + TsSource::Cdc => "cdc", + } + } + + pub fn key(&self) -> Option { + match self { + TsSource::Lock(k) => Some(Key::from_encoded_slice(k)), + TsSource::MemoryLock(k) => Some(k.clone()), + _ => None, + } + } +} + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -22,7 +55,7 @@ pub struct Resolver { // key -> start_ts locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. - lock_ts_heap: BTreeMap>>, + pub(crate) lock_ts_heap: BTreeMap>>, // The last shrink time. last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. @@ -35,14 +68,42 @@ pub struct Resolver { min_ts: TimeStamp, // Whether the `Resolver` is stopped stopped: bool, - // The memory quota for the `Resolver` and its lock keys and timestamps. memory_quota: Arc, + // The last attempt of resolve(), used for diagnosis. + pub(crate) last_attempt: Option, +} + +#[derive(Clone)] +pub(crate) struct LastAttempt { + success: bool, + ts: TimeStamp, + reason: TsSource, +} + +impl slog::Value for LastAttempt { + fn serialize( + &self, + _record: &slog::Record<'_>, + key: slog::Key, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + serializer.emit_arguments( + key, + &format_args!( + "{{ success={}, ts={}, reason={}, key={:?} }}", + self.success, + self.ts, + self.reason.label(), + self.reason.key(), + ), + ) + } } impl std::fmt::Debug for Resolver { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let far_lock = self.lock_ts_heap.iter().next(); + let far_lock = self.oldest_transaction(); let mut dt = f.debug_tuple("Resolver"); dt.field(&format_args!("region={}", self.region_id)); @@ -103,6 +164,7 @@ impl Resolver { min_ts: TimeStamp::zero(), stopped: false, memory_quota, + last_attempt: None, } } @@ -252,7 +314,12 @@ impl Resolver { /// /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. - pub fn resolve(&mut self, min_ts: TimeStamp, now: Option) -> TimeStamp { + pub fn resolve( + &mut self, + min_ts: TimeStamp, + now: Option, + source: TsSource, + ) -> TimeStamp { // Use a small ratio to shrink the memory usage aggressively. const AGGRESSIVE_SHRINK_RATIO: usize = 2; const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); @@ -268,17 +335,36 @@ impl Resolver { } // Find the min start ts. - let min_lock = self.lock_ts_heap.keys().next().cloned(); + let min_lock = self + .oldest_transaction() + .and_then(|(ts, locks)| locks.iter().next().map(|lock| (*ts, lock))); let has_lock = min_lock.is_some(); - let min_start_ts = min_lock.unwrap_or(min_ts); + let min_start_ts = min_lock.map(|(ts, _)| ts).unwrap_or(min_ts); // No more commit happens before the ts. let new_resolved_ts = cmp::min(min_start_ts, min_ts); + // reason is the min source of the new resolved ts. + let reason = match (min_lock, min_ts) { + (Some(lock), min_ts) if lock.0 < min_ts => TsSource::Lock(lock.1.clone()), + (Some(_), _) => source, + (None, _) => source, + }; + if self.resolved_ts >= new_resolved_ts { - let label = if has_lock { "has_lock" } else { "stale_ts" }; RTS_RESOLVED_FAIL_ADVANCE_VEC - .with_label_values(&[label]) + .with_label_values(&[reason.label()]) .inc(); + self.last_attempt = Some(LastAttempt { + success: false, + ts: new_resolved_ts, + reason, + }); + } else { + self.last_attempt = Some(LastAttempt { + success: true, + ts: new_resolved_ts, + reason, + }) } // Resolved ts never decrease. @@ -335,6 +421,10 @@ impl Resolver { pub(crate) fn read_progress(&self) -> Option<&Arc> { self.read_progress.as_ref() } + + pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &HashSet>)> { + self.lock_ts_heap.iter().next() + } } #[cfg(test)] @@ -419,7 +509,7 @@ mod tests { Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { assert_eq!( - resolver.resolve(min_ts.into(), None), + resolver.resolve(min_ts.into(), None, TsSource::PdTso), expect.into(), "case {}", i @@ -501,7 +591,7 @@ mod tests { // Trigger aggressive shrink. resolver.last_aggressive_shrink_time = Instant::now_coarse() - Duration::from_secs(600); - resolver.resolve(TimeStamp::new(0), None); + resolver.resolve(TimeStamp::new(0), None, TsSource::PdTso); assert!( resolver.locks_by_key.capacity() == 0, "{}, {}", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c78540c601a5..ceed5c6314ce 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39068,7 +39068,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between safe ts and current time", + "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { @@ -39119,7 +39119,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -39132,7 +39132,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max gap of safe-ts", + "title": "Max gap of follower safe-ts", "tooltip": { "msResolution": false, "shared": true, @@ -39292,7 +39292,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The region that has minimal safe ts", + "description": "The region id of the follower that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { @@ -39348,7 +39348,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", @@ -39362,7 +39362,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Min Safe TS Region", + "title": "Min Safe TS Follower Region", "tooltip": { "msResolution": false, "shared": true, From 1abc220dca85950a728c7be06f469870373fb463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=B6=85?= Date: Wed, 6 Sep 2023 14:48:43 +0800 Subject: [PATCH 033/203] coprocessor: add SQL statement tracing in tikv slow log (#15514) close tikv/tikv#15513 coprocessor: add SQL statement tracing in tikv slow log Signed-off-by: Chao Wang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/tracker.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 71d84388c3b7..bb32a3a0e032 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -264,8 +264,11 @@ impl Tracker { .unwrap_or_default() }); + let source_stmt = self.req_ctx.context.get_source_stmt(); with_tls_tracker(|tracker| { info!(#"slow_log", "slow-query"; + "connection_id" => source_stmt.get_connection_id(), + "session_alias" => source_stmt.get_session_alias(), "region_id" => &self.req_ctx.context.get_region_id(), "remote_host" => &self.req_ctx.peer, "total_lifetime" => ?self.req_lifetime, From fd896513d1c1bf274cf11acae1a09b6034b3c149 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 7 Sep 2023 15:00:44 +0800 Subject: [PATCH 034/203] engine_rocks: trace all memtables including pinned (#15547) close tikv/tikv#15546 Signed-off-by: Neil Shen --- components/engine_rocks/src/rocks_metrics.rs | 15 ++++++++++++--- components/engine_rocks/src/rocks_metrics_defs.rs | 1 + metrics/grafana/tikv_details.json | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 522696cb1502..2b32af111ec5 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -920,6 +920,7 @@ struct CfStats { blob_cache_size: Option, readers_mem: Option, mem_tables: Option, + mem_tables_all: Option, num_keys: Option, pending_compaction_bytes: Option, num_immutable_mem_table: Option, @@ -978,6 +979,9 @@ impl StatisticsReporter for RocksStatisticsReporter { if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_CUR_SIZE_ALL_MEM_TABLES) { *cf_stats.mem_tables.get_or_insert_default() += v; } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_SIZE_ALL_MEM_TABLES) { + *cf_stats.mem_tables_all.get_or_insert_default() += v; + } // TODO: add cache usage and pinned usage. if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_ESTIMATE_NUM_KEYS) { *cf_stats.num_keys.get_or_insert_default() += v; @@ -1119,6 +1123,11 @@ impl StatisticsReporter for RocksStatisticsReporter { .with_label_values(&[&self.name, cf, "mem-tables"]) .set(v as i64); } + if let Some(v) = cf_stats.mem_tables_all { + STORE_ENGINE_MEMORY_GAUGE_VEC + .with_label_values(&[&self.name, cf, "mem-tables-all"]) + .set(v as i64); + } if let Some(v) = cf_stats.num_keys { STORE_ENGINE_ESTIMATE_NUM_KEYS_VEC .with_label_values(&[&self.name, cf]) @@ -1538,9 +1547,9 @@ lazy_static! { "Number of times titan blob file sync is done", &["db"] ).unwrap(); - pub static ref STORE_ENGINE_BLOB_FILE_SYNCED: SimpleEngineTickerMetrics = - auto_flush_from!(STORE_ENGINE_BLOB_FILE_SYNCED_VEC, SimpleEngineTickerMetrics); - + pub static ref STORE_ENGINE_BLOB_FILE_SYNCED: SimpleEngineTickerMetrics = + auto_flush_from!(STORE_ENGINE_BLOB_FILE_SYNCED_VEC, SimpleEngineTickerMetrics); + pub static ref STORE_ENGINE_BLOB_CACHE_EFFICIENCY_VEC: IntCounterVec = register_int_counter_vec!( "tikv_engine_blob_cache_efficiency", "Efficiency of titan's blob cache", diff --git a/components/engine_rocks/src/rocks_metrics_defs.rs b/components/engine_rocks/src/rocks_metrics_defs.rs index 042949f1c095..5bbc6245c726 100644 --- a/components/engine_rocks/src/rocks_metrics_defs.rs +++ b/components/engine_rocks/src/rocks_metrics_defs.rs @@ -5,6 +5,7 @@ use rocksdb::{DBStatisticsHistogramType as HistType, DBStatisticsTickerType as T pub const ROCKSDB_TOTAL_SST_FILES_SIZE: &str = "rocksdb.total-sst-files-size"; pub const ROCKSDB_TABLE_READERS_MEM: &str = "rocksdb.estimate-table-readers-mem"; pub const ROCKSDB_CUR_SIZE_ALL_MEM_TABLES: &str = "rocksdb.cur-size-all-mem-tables"; +pub const ROCKSDB_SIZE_ALL_MEM_TABLES: &str = "rocksdb.size-all-mem-tables"; pub const ROCKSDB_ESTIMATE_NUM_KEYS: &str = "rocksdb.estimate-num-keys"; pub const ROCKSDB_PENDING_COMPACTION_BYTES: &str = "rocksdb.\ estimate-pending-compaction-bytes"; diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index ceed5c6314ce..c31ee12b27b8 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -31941,7 +31941,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables\"}) by (cf)", + "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables-all\"}) by (cf)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cf}}", From 23c89b3fd2d0395d868b76deb0a0c820c3e48aab Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 7 Sep 2023 15:15:44 +0800 Subject: [PATCH 035/203] *: let alloc API return result (#15529) ref tikv/tikv#15412 MemoryQuota alloc API returns result, make it more ergonomic. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 6 +-- components/cdc/src/channel.rs | 30 ++++++++----- components/cdc/src/delegate.rs | 14 ++---- components/cdc/src/endpoint.rs | 4 +- components/cdc/src/errors.rs | 3 +- components/cdc/src/initializer.rs | 4 +- components/resolved_ts/src/endpoint.rs | 36 +++++++--------- components/resolved_ts/src/errors.rs | 3 +- components/resolved_ts/src/resolver.rs | 26 +++++------ components/tikv_util/src/memory.rs | 43 +++++++++++-------- 10 files changed, 87 insertions(+), 82 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index d6d49f0cf1c6..4f44ec46853a 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -480,7 +480,7 @@ impl TwoPhaseResolver { warn!("backup stream tracking lock as if in phase one"; "start_ts" => %start_ts, "key" => %utils::redact(&key)) } // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(start_ts, key, None)); + self.resolver.track_lock(start_ts, key, None).unwrap(); } pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec) { @@ -489,7 +489,7 @@ impl TwoPhaseResolver { return; } // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(start_ts, key, None)); + self.resolver.track_lock(start_ts, key, None).unwrap(); } pub fn untrack_lock(&mut self, key: &[u8]) { @@ -505,7 +505,7 @@ impl TwoPhaseResolver { match lock { FutureLock::Lock(key, ts) => { // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(ts, key, None)); + self.resolver.track_lock(ts, key, None).unwrap(); } FutureLock::Unlock(key) => self.resolver.untrack_lock(&key, None), } diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index a3ddeeb90305..b386c3561bb2 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -14,7 +14,11 @@ use grpcio::WriteFlags; use kvproto::cdcpb::{ChangeDataEvent, Event, ResolvedTs}; use protobuf::Message; use tikv_util::{ - future::block_on_timeout, impl_display_as_debug, memory::MemoryQuota, time::Instant, warn, + future::block_on_timeout, + impl_display_as_debug, + memory::{MemoryQuota, MemoryQuotaExceeded}, + time::Instant, + warn, }; use crate::metrics::*; @@ -234,6 +238,12 @@ impl_from_future_send_error! { TrySendError<(CdcEvent, usize)>, } +impl From for SendError { + fn from(_: MemoryQuotaExceeded) -> Self { + SendError::Congested + } +} + #[derive(Clone)] pub struct Sink { unbounded_sender: UnboundedSender<(CdcEvent, usize)>, @@ -245,8 +255,8 @@ impl Sink { pub fn unbounded_send(&self, event: CdcEvent, force: bool) -> Result<(), SendError> { // Try it's best to send error events. let bytes = if !force { event.size() as usize } else { 0 }; - if bytes != 0 && !self.memory_quota.alloc(bytes) { - return Err(SendError::Congested); + if bytes != 0 { + self.memory_quota.alloc(bytes)?; } match self.unbounded_sender.unbounded_send((event, bytes)) { Ok(_) => Ok(()), @@ -265,9 +275,7 @@ impl Sink { let bytes = event.size(); total_bytes += bytes; } - if !self.memory_quota.alloc(total_bytes as _) { - return Err(SendError::Congested); - } + self.memory_quota.alloc(total_bytes as _)?; for event in events { let bytes = event.size() as usize; if let Err(e) = self.bounded_sender.feed((event, bytes)).await { @@ -570,9 +578,9 @@ mod tests { } } let memory_quota = rx.memory_quota.clone(); - assert_eq!(memory_quota.alloc(event.size() as _), false,); + memory_quota.alloc(event.size() as _).unwrap_err(); drop(rx); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); } // Make sure memory quota is freed when tx is dropped before rx. { @@ -587,10 +595,10 @@ mod tests { } } let memory_quota = rx.memory_quota.clone(); - assert_eq!(memory_quota.alloc(event.size() as _), false,); + memory_quota.alloc(event.size() as _).unwrap_err(); drop(send); drop(rx); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); } // Make sure sending message to a closed channel does not leak memory quota. { @@ -602,7 +610,7 @@ mod tests { send(CdcEvent::Event(e.clone())).unwrap_err(); } assert_eq!(memory_quota.in_use(), 0); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); // Freeing bytes should not cause overflow. memory_quota.free(1024); diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index f7125aa88823..c82c4cb6f13e 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -249,9 +249,7 @@ impl Pending { fn push_pending_lock(&mut self, lock: PendingLock) -> Result<()> { let bytes = lock.heap_size(); - if !self.memory_quota.alloc(bytes) { - return Err(Error::MemoryQuotaExceeded); - } + self.memory_quota.alloc(bytes)?; self.locks.push(lock); self.pending_bytes += bytes; CDC_PENDING_BYTES_GAUGE.add(bytes as i64); @@ -260,16 +258,14 @@ impl Pending { fn on_region_ready(&mut self, resolver: &mut Resolver) -> Result<()> { fail::fail_point!("cdc_pending_on_region_ready", |_| Err( - Error::MemoryQuotaExceeded + Error::MemoryQuotaExceeded(tikv_util::memory::MemoryQuotaExceeded) )); // Must take locks, otherwise it may double free memory quota on drop. for lock in mem::take(&mut self.locks) { self.memory_quota.free(lock.heap_size()); match lock { PendingLock::Track { key, start_ts } => { - if !resolver.track_lock(start_ts, key, None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(start_ts, key, None)?; } PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), } @@ -900,9 +896,7 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - if !resolver.track_lock(row.start_ts.into(), row.key.clone(), None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(row.start_ts.into(), row.key.clone(), None)?; } None => { assert!(self.pending.is_some(), "region resolver not ready"); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 2b314f224430..a5f00a08028f 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -2644,7 +2644,9 @@ mod tests { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let mut resolver = Resolver::new(id, memory_quota); - assert!(resolver.track_lock(TimeStamp::compose(0, id), vec![], None)); + resolver + .track_lock(TimeStamp::compose(0, id), vec![], None) + .unwrap(); let mut region = Region::default(); region.id = id; region.set_region_epoch(region_epoch); diff --git a/components/cdc/src/errors.rs b/components/cdc/src/errors.rs index e44c39e3999f..e7bd7605e7de 100644 --- a/components/cdc/src/errors.rs +++ b/components/cdc/src/errors.rs @@ -10,6 +10,7 @@ use tikv::storage::{ mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, }; +use tikv_util::memory::MemoryQuotaExceeded; use txn_types::Error as TxnTypesError; use crate::channel::SendError; @@ -36,7 +37,7 @@ pub enum Error { #[error("Sink send error {0:?}")] Sink(#[from] SendError), #[error("Memory quota exceeded")] - MemoryQuotaExceeded, + MemoryQuotaExceeded(#[from] MemoryQuotaExceeded), } macro_rules! impl_from { diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index ef0b15caab9d..31cda4b9e729 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -424,9 +424,7 @@ impl Initializer { let lock = Lock::parse(value)?; match lock.lock_type { LockType::Put | LockType::Delete => { - if !resolver.track_lock(lock.ts, key, None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(lock.ts, key, None)?; } _ => (), }; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index e2d2aec4f701..2a2f56eaadd0 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -103,10 +103,10 @@ impl ResolverStatus { }; // Check if adding a new lock or unlock will exceed the memory // quota. - if !memory_quota.alloc(lock.heap_size()) { + memory_quota.alloc(lock.heap_size()).map_err(|e| { fail::fail_point!("resolved_ts_on_pending_locks_memory_quota_exceeded"); - return Err(Error::MemoryQuotaExceeded); - } + Error::MemoryQuotaExceeded(e) + })?; locks.push(lock); Ok(()) } @@ -292,13 +292,11 @@ impl ObserveRegion { for row in rows { match row { ChangeRow::Prewrite { key, start_ts, .. } => { - if !self.resolver.track_lock( + self.resolver.track_lock( *start_ts, key.to_raw().unwrap(), Some(*index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } ChangeRow::Commit { key, .. } => self .resolver @@ -328,13 +326,11 @@ impl ObserveRegion { panic!("region {:?} resolver has ready", self.meta.id) } for (key, lock) in locks { - if !self.resolver.track_lock( + self.resolver.track_lock( lock.ts, key.to_raw().unwrap(), Some(apply_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } } ScanEntry::None => { @@ -347,13 +343,11 @@ impl ObserveRegion { for lock in pending_locks { match lock { PendingLock::Track { key, start_ts } => { - if !self.resolver.track_lock( + self.resolver.track_lock( start_ts, key.to_raw().unwrap(), Some(pending_tracked_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } PendingLock::Untrack { key, .. } => self .resolver @@ -924,7 +918,7 @@ where if let Err(e) = observe_region.track_change_log(&logs) { drop(observe_region); let backoff = match e { - Error::MemoryQuotaExceeded => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), + Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, }; self.re_register_region(region_id, observe_id, e, backoff); @@ -947,13 +941,13 @@ where entries: Vec, apply_index: u64, ) { - let mut is_memory_quota_exceeded = false; + let mut memory_quota_exceeded = None; if let Some(observe_region) = self.regions.get_mut(®ion_id) { if observe_region.handle.id == observe_id { - if let Err(Error::MemoryQuotaExceeded) = + if let Err(Error::MemoryQuotaExceeded(e)) = observe_region.track_scan_locks(entries, apply_index) { - is_memory_quota_exceeded = true; + memory_quota_exceeded = Some(Error::MemoryQuotaExceeded(e)); } } } else { @@ -961,9 +955,9 @@ where "region_id" => region_id, "observe_id" => ?observe_id); } - if is_memory_quota_exceeded { + if let Some(e) = memory_quota_exceeded { let backoff = Some(MEMORY_QUOTA_EXCEEDED_BACKOFF); - self.re_register_region(region_id, observe_id, Error::MemoryQuotaExceeded, backoff); + self.re_register_region(region_id, observe_id, e, backoff); } } diff --git a/components/resolved_ts/src/errors.rs b/components/resolved_ts/src/errors.rs index b4a59a2c7a0b..4e14c1d78d9b 100644 --- a/components/resolved_ts/src/errors.rs +++ b/components/resolved_ts/src/errors.rs @@ -1,11 +1,12 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use thiserror::Error; +use tikv_util::memory::MemoryQuotaExceeded; #[derive(Debug, Error)] pub enum Error { #[error("Memory quota exceeded")] - MemoryQuotaExceeded, + MemoryQuotaExceeded(#[from] MemoryQuotaExceeded), #[error("Other error {0}")] Other(#[from] Box), } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index e0814176a92f..9a62a0eea988 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -5,7 +5,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; use tikv_util::{ - memory::{HeapSize, MemoryQuota}, + memory::{HeapSize, MemoryQuota, MemoryQuotaExceeded}, time::Instant, }; use txn_types::{Key, TimeStamp}; @@ -245,8 +245,12 @@ impl Resolver { } } - #[must_use] - pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { + pub fn track_lock( + &mut self, + start_ts: TimeStamp, + key: Vec, + index: Option, + ) -> Result<(), MemoryQuotaExceeded> { if let Some(index) = index { self.update_tracked_index(index); } @@ -260,13 +264,11 @@ impl Resolver { "memory_capacity" => self.memory_quota.capacity(), "key_heap_size" => bytes, ); - if !self.memory_quota.alloc(bytes) { - return false; - } + self.memory_quota.alloc(bytes)?; let key: Arc<[u8]> = key.into_boxed_slice().into(); self.locks_by_key.insert(key.clone(), start_ts); self.lock_ts_heap.entry(start_ts).or_default().insert(key); - true + Ok(()) } pub fn untrack_lock(&mut self, key: &[u8], index: Option) { @@ -500,11 +502,9 @@ mod tests { for e in case.clone() { match e { Event::Lock(start_ts, key) => { - assert!(resolver.track_lock( - start_ts.into(), - key.into_raw().unwrap(), - None - )); + resolver + .track_lock(start_ts.into(), key.into_raw().unwrap(), None) + .unwrap(); } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { @@ -527,7 +527,7 @@ mod tests { let mut key = vec![0; 77]; let lock_size = resolver.lock_heap_size(&key); let mut ts = TimeStamp::default(); - while resolver.track_lock(ts, key.clone(), None) { + while resolver.track_lock(ts, key.clone(), None).is_ok() { ts.incr(); key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 17b6b23cf788..291254c5227b 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -75,16 +75,23 @@ impl HeapSize for RaftCmdRequest { } } +#[derive(Debug)] +pub struct MemoryQuotaExceeded; + +impl std::error::Error for MemoryQuotaExceeded {} + +impl_display_as_debug!(MemoryQuotaExceeded); + pub struct MemoryQuota { - capacity: AtomicUsize, in_use: AtomicUsize, + capacity: AtomicUsize, } impl MemoryQuota { pub fn new(capacity: usize) -> MemoryQuota { MemoryQuota { - capacity: AtomicUsize::new(capacity), in_use: AtomicUsize::new(0), + capacity: AtomicUsize::new(capacity), } } @@ -93,28 +100,28 @@ impl MemoryQuota { } pub fn capacity(&self) -> usize { - self.capacity.load(Ordering::Acquire) + self.capacity.load(Ordering::Relaxed) } pub fn set_capacity(&self, capacity: usize) { - self.capacity.store(capacity, Ordering::Release) + self.capacity.store(capacity, Ordering::Relaxed); } - pub fn alloc(&self, bytes: usize) -> bool { + pub fn alloc(&self, bytes: usize) -> Result<(), MemoryQuotaExceeded> { + let capacity = self.capacity.load(Ordering::Relaxed); let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - let capacity = self.capacity.load(Ordering::Acquire); loop { if in_use_bytes + bytes > capacity { - return false; + return Err(MemoryQuotaExceeded); } let new_in_use_bytes = in_use_bytes + bytes; match self.in_use.compare_exchange_weak( in_use_bytes, new_in_use_bytes, - Ordering::Acquire, + Ordering::Relaxed, Ordering::Relaxed, ) { - Ok(_) => return true, + Ok(_) => return Ok(()), Err(current) => in_use_bytes = current, } } @@ -128,7 +135,7 @@ impl MemoryQuota { match self.in_use.compare_exchange_weak( in_use_bytes, new_in_use_bytes, - Ordering::Acquire, + Ordering::Relaxed, Ordering::Relaxed, ) { Ok(_) => return, @@ -145,13 +152,13 @@ mod tests { #[test] fn test_memory_quota() { let quota = MemoryQuota::new(100); - assert!(quota.alloc(10)); + quota.alloc(10).unwrap(); assert_eq!(quota.in_use(), 10); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 10); quota.free(5); assert_eq!(quota.in_use(), 5); - assert!(quota.alloc(95)); + quota.alloc(95).unwrap(); assert_eq!(quota.in_use(), 100); quota.free(95); assert_eq!(quota.in_use(), 5); @@ -160,19 +167,19 @@ mod tests { #[test] fn test_resize_memory_quota() { let quota = MemoryQuota::new(100); - assert!(quota.alloc(10)); + quota.alloc(10).unwrap(); assert_eq!(quota.in_use(), 10); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 10); quota.set_capacity(200); - assert!(quota.alloc(100)); + quota.alloc(100).unwrap(); assert_eq!(quota.in_use(), 110); quota.set_capacity(50); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 110); quota.free(100); assert_eq!(quota.in_use(), 10); - assert!(quota.alloc(40)); + quota.alloc(40).unwrap(); assert_eq!(quota.in_use(), 50); } } From 87d0f7cf143524222b4b0d80a4a8c5e02d11cf67 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 7 Sep 2023 15:44:15 +0800 Subject: [PATCH 036/203] raftstore-v2: supplement read track metrics (#15508) ref tikv/tikv#15409 supplement read track metrics Signed-off-by: SpadeA-Tang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 21 ++++++++++++------- .../raftstore-v2/src/operation/query/local.rs | 4 ++++ .../cases/test_read_execution_tracker.rs | 15 +++++++------ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index f6b9217ecbf4..d51d8eedb2a6 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -9,7 +9,7 @@ use crossbeam::channel::TryRecvError; use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{errorpb, raft_cmdpb::RaftCmdResponse}; -use raftstore::store::{Config, TabletSnapManager, Transport}; +use raftstore::store::{Config, ReadCallback, TabletSnapManager, Transport}; use slog::{debug, info, trace, Logger}; use tikv_util::{ is_zero_duration, @@ -17,6 +17,7 @@ use tikv_util::{ slog_panic, time::{duration_to_sec, Instant}, }; +use tracker::{TrackerToken, GLOBAL_TRACKERS}; use crate::{ batch::StoreContext, @@ -206,11 +207,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } #[inline] - fn on_receive_command(&self, send_time: Instant) { + fn on_receive_command(&self, send_time: Instant, read_token: Option) { + let propose_wait_time = send_time.saturating_elapsed(); self.store_ctx .raft_metrics .propose_wait_time - .observe(duration_to_sec(send_time.saturating_elapsed())); + .observe(duration_to_sec(propose_wait_time)); + if let Some(token) = read_token { + GLOBAL_TRACKERS.with_tracker(token, |tracker| { + tracker.metrics.read_index_propose_wait_nanos = propose_wait_time.as_nanos() as u64; + }); + } } fn on_tick(&mut self, tick: PeerTick) { @@ -243,17 +250,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm.peer.on_raft_message(self.store_ctx, msg); } PeerMsg::RaftQuery(cmd) => { - self.on_receive_command(cmd.send_time); + self.on_receive_command(cmd.send_time, cmd.ch.read_tracker()); self.on_query(cmd.request, cmd.ch) } PeerMsg::AdminCommand(cmd) => { - self.on_receive_command(cmd.send_time); + self.on_receive_command(cmd.send_time, None); self.fsm .peer_mut() .on_admin_command(self.store_ctx, cmd.request, cmd.ch) } PeerMsg::SimpleWrite(write) => { - self.on_receive_command(write.send_time); + self.on_receive_command(write.send_time, None); self.fsm.peer_mut().on_simple_write( self.store_ctx, write.header, @@ -262,7 +269,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, ); } PeerMsg::UnsafeWrite(write) => { - self.on_receive_command(write.send_time); + self.on_receive_command(write.send_time, None); self.fsm .peer_mut() .on_unsafe_write(self.store_ctx, write.data); diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 36dbb26e4c76..2f074fdc04df 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -580,6 +580,10 @@ impl<'r> SnapRequestInspector<'r> { )); } + fail::fail_point!("perform_read_index", |_| Ok(ReadRequestPolicy::ReadIndex)); + + fail::fail_point!("perform_read_local", |_| Ok(ReadRequestPolicy::ReadLocal)); + let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); if flags.contains(WriteBatchFlags::STALE_READ) { return Ok(ReadRequestPolicy::StaleRead); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index c5ff93a70c11..7351044b2979 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -2,13 +2,13 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; -use test_raftstore::{ - kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, -}; +use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; +use test_raftstore_macro::test_case; -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); @@ -104,18 +104,21 @@ fn test_read_execution_tracking() { ); }; - fail::cfg("perform_read_index", "return()").unwrap(); + // return read_index twich: one for local reader and one for raftstore + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = kv_read(&client, ctx.clone(), k1.clone(), 100); read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = kv_batch_read(&client, ctx, vec![k1, k2], 100); read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = client.coprocessor(&coprocessor_request).unwrap(); From 98eb383b41695b11a03e3d1ce471181f02bfc741 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:06:14 +0800 Subject: [PATCH 037/203] raftstore-v2: fix chaos between on_memtable_sealed and on_flush_completed (#15543) close tikv/tikv#15534 fix chaos between on_memtable_sealed and on_flush_completed Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +- components/engine_rocks/src/event_listener.rs | 11 ++- components/engine_traits/src/flush.rs | 31 +++++-- tests/failpoints/cases/test_engine.rs | 88 ++++++++++++++++++- 4 files changed, 124 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4cd0882628bc..7e09c3d2979d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 9628c61c23f1..03a40d005c88 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -194,8 +194,15 @@ impl rocksdb::EventListener for RocksPersistenceListener { fn on_memtable_sealed(&self, info: &MemTableInfo) { // Note: first_seqno is effectively the smallest seqno of memtable. // earliest_seqno has ambiguous semantics. - self.0 - .on_memtable_sealed(info.cf_name().to_string(), info.first_seqno()); + self.0.on_memtable_sealed( + info.cf_name().to_string(), + info.first_seqno(), + info.largest_seqno(), + ); + } + + fn on_flush_begin(&self, _: &FlushJobInfo) { + fail::fail_point!("on_flush_begin"); } fn on_flush_completed(&self, job: &FlushJobInfo) { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 9344e84bb4e7..8590236e1265 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -18,14 +18,17 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, RwLock, }, + time::Duration, }; use kvproto::import_sstpb::SstMeta; -use slog_global::info; -use tikv_util::set_panic_mark; +use slog_global::{info, warn}; +use tikv_util::{set_panic_mark, time::Instant}; use crate::{data_cf_offset, RaftEngine, RaftLogBatch, DATA_CFS_LEN}; +const HEAVY_WORKER_THRESHOLD: Duration = Duration::from_millis(25); + #[derive(Debug)] pub struct ApplyProgress { cf: String, @@ -203,7 +206,11 @@ impl PersistenceListener { /// Called when memtable is frozen. /// /// `smallest_seqno` should be the smallest seqno of the memtable. - pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64) { + /// + /// Note: After https://github.com/tikv/rocksdb/pull/347, rocksdb global lock will + /// be held during this method, so we should avoid do heavy things in it. + pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64, largest_seqno: u64) { + let t = Instant::now_coarse(); (|| { fail_point!("on_memtable_sealed", |t| { assert_eq!(t.unwrap().as_str(), cf); @@ -219,8 +226,9 @@ impl PersistenceListener { let flushed = prs.last_flushed[offset]; if flushed > smallest_seqno { panic!( - "sealed seqno has been flushed {} {} {} <= {}", - cf, apply_index, smallest_seqno, flushed + "sealed seqno conflict with latest flushed index, cf {}, + sealed smallest_seqno {}, sealed largest_seqno {}, last_flushed {}, apply_index {}", + cf, smallest_seqno, largest_seqno, flushed, apply_index, ); } prs.prs.push_back(ApplyProgress { @@ -228,6 +236,11 @@ impl PersistenceListener { apply_index, smallest_seqno, }); + if t.saturating_elapsed() > HEAVY_WORKER_THRESHOLD { + warn!( + "heavy work in on_memtable_sealed, the code should be reviewed"; + ); + } } /// Called a memtable finished flushing. @@ -244,7 +257,13 @@ impl PersistenceListener { if flushed >= largest_seqno { // According to facebook/rocksdb#11183, it's possible OnFlushCompleted can be // called out of order. But it's guaranteed files are installed in order. - info!("flush complete reorder found"; "flushed" => flushed, "largest_seqno" => largest_seqno, "file_no" => file_no, "cf" => cf); + info!( + "flush complete reorder found"; + "flushed" => flushed, + "largest_seqno" => largest_seqno, + "file_no" => file_no, + "cf" => cf + ); return; } prs.last_flushed[offset] = largest_seqno; diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 93d1c96597b1..073f72764194 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -1,6 +1,11 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use std::{ + sync::{mpsc::channel, Mutex}, + time::Duration, +}; + +use engine_traits::{MiscExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; use tikv_util::config::ReadableSize; fn dummy_string(len: usize) -> String { @@ -51,3 +56,84 @@ fn test_write_buffer_manager() { cluster.must_put_cf(CF_WRITE, key.as_bytes(), dummy.as_bytes()); } } + +// The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: +// note: before rocksdb/pull/347, lock is called before on_memtable_sealed. +// Case: +// Assume FlushMemtable cf1 (schedule flush task) and BackgroundCallFlush cf1 +// (execute flush task) are performed concurrently. +// t FlushMemtable cf1 BackgroundCallFlush cf1 +// 1. lock +// 2. convert memtable t2(seqno. 10-20) +// to immemtable +// 3. unlock +// 4. lock +// 5. pick memtables to flush: +// t1(0-10), t2(10-20) +// flush job(0-20) +// 6. finish flush +// 7. unlock +// 8. on_flush_completed: +// update last_flushed to 20 +// 9. on_memtable_sealed +// 10 > 20 *panic* +#[test] +fn test_rocksdb_listener() { + use test_raftstore_v2::*; + let count = 1; + let mut cluster = new_node_cluster(0, count); + // make flush thread num 1 to be easy to construct the case + cluster.cfg.rocksdb.max_background_flushes = 1; + cluster.run(); + + let r = cluster.get_region(b"k10"); + cluster.must_split(&r, b"k10"); + + for i in 0..20 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + let r1 = cluster.get_region(b"k00").get_id(); + let r2 = cluster.get_region(b"k15").get_id(); + + let engine = cluster.get_engine(1); + let tablet1 = engine.get_tablet_by_id(r1).unwrap(); + let tablet2 = engine.get_tablet_by_id(r2).unwrap(); + + fail::cfg("on_flush_begin", "1*pause").unwrap(); + tablet1.flush_cf("default", false).unwrap(); // call flush 1 + std::thread::sleep(Duration::from_secs(1)); + + tablet2.flush_cf("default", false).unwrap(); // call flush 2 + for i in 20..30 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + fail::cfg("on_memtable_sealed", "pause").unwrap(); + + let h = std::thread::spawn(move || { + tablet2.flush_cf("default", true).unwrap(); + }); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_flush_completed", move || { + let _ = tx.lock().unwrap().send(true); // call flush 3 + }) + .unwrap(); + fail::remove("on_flush_begin"); + + let _ = rx.recv(); // flush 1 done + // Now, flush 1 has done, flush 3 is blocked at on_memtable_sealed. + // Before https://github.com/tikv/rocksdb/pull/347, unlock will be called + // before calling on_memtable_sealed, so flush 2 can pick the memtable sealed by + // flush 3 and thus make the order chaos. + // Now, unlock will not be called, so we have to remove failpoint to avoid + // deadlock. 2 seconds is long enough to make the test failed before + // rocksdb/pull/347. + std::thread::sleep(Duration::from_secs(2)); + fail::remove("on_memtable_sealed"); + + h.join().unwrap(); +} From 6f0d84e911a86837263b914e8b1ddba9a1da5232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 12 Sep 2023 08:49:39 +0800 Subject: [PATCH 038/203] sst_importer: don't cache rewritten files (#15502) close tikv/tikv#15483 The rewrite step of sst_importer::apply has been delayed to while iterating the file. Signed-off-by: hillium Co-authored-by: 3pointer --- components/sst_importer/src/sst_importer.rs | 42 +++--- .../tikv_util/src/codec/stream_event.rs | 109 ++++++++++++++-- src/import/sst_service.rs | 5 +- tests/integrations/import/mod.rs | 1 + tests/integrations/import/test_apply_log.rs | 72 ++++++++++ tests/integrations/import/util.rs | 123 +++++++++++++++++- 6 files changed, 322 insertions(+), 30 deletions(-) create mode 100644 tests/integrations/import/test_apply_log.rs diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 33f3c691a260..181f9d67b2fe 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -642,7 +642,6 @@ impl SstImporter { async fn exec_download( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { @@ -690,9 +689,8 @@ impl SstImporter { .with_label_values(&["exec_download"]) .observe(start.saturating_elapsed().as_secs_f64()); - let rewrite_buff = self.rewrite_kv_file(buff, rewrite_rule)?; Ok(LoadedFile { - content: Arc::from(rewrite_buff.into_boxed_slice()), + content: Arc::from(buff.into_boxed_slice()), permit, }) } @@ -700,7 +698,6 @@ impl SstImporter { pub async fn do_read_kv_file( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { @@ -741,7 +738,7 @@ impl SstImporter { } cache - .get_or_try_init(|| self.exec_download(meta, rewrite_rule, ext_storage, speed_limiter)) + .get_or_try_init(|| self.exec_download(meta, ext_storage, speed_limiter)) .await?; Ok(CacheKvFile::Mem(cache)) } @@ -814,7 +811,6 @@ impl SstImporter { pub async fn read_from_kv_file( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, backend: &StorageBackend, speed_limiter: &Limiter, @@ -823,7 +819,7 @@ impl SstImporter { self.do_download_kv_file(meta, backend, speed_limiter) .await? } else { - self.do_read_kv_file(meta, rewrite_rule, ext_storage, speed_limiter) + self.do_read_kv_file(meta, ext_storage, speed_limiter) .await? }; match c { @@ -841,8 +837,7 @@ impl SstImporter { let mut buffer = Vec::new(); reader.read_to_end(&mut buffer)?; - let rewrite_buff = self.rewrite_kv_file(buffer, rewrite_rule)?; - Ok(Arc::from(rewrite_buff.into_boxed_slice())) + Ok(Arc::from(buffer.into_boxed_slice())) } } } @@ -940,7 +935,11 @@ impl SstImporter { // perform iteration and key rewrite. let mut new_buff = Vec::with_capacity(file_buff.len()); - let mut event_iter = EventIterator::new(file_buff.as_slice()); + let mut event_iter = EventIterator::with_rewriting( + file_buff.as_slice(), + rewrite_rule.get_old_key_prefix(), + rewrite_rule.get_new_key_prefix(), + ); let mut key = new_prefix.to_vec(); let new_prefix_data_key_len = key.len(); @@ -983,9 +982,14 @@ impl SstImporter { start_ts: u64, restore_ts: u64, file_buff: Arc<[u8]>, + rewrite_rule: &RewriteRule, mut build_fn: impl FnMut(Vec, Vec), ) -> Result> { - let mut event_iter = EventIterator::new(file_buff.as_ref()); + let mut event_iter = EventIterator::with_rewriting( + file_buff.as_ref(), + rewrite_rule.get_old_key_prefix(), + rewrite_rule.get_new_key_prefix(), + ); let mut smallest_key = None; let mut largest_key = None; let mut total_key = 0; @@ -1001,6 +1005,16 @@ impl SstImporter { event_iter.next()?; INPORTER_APPLY_COUNT.with_label_values(&["key_meet"]).inc(); + if !event_iter + .key() + .starts_with(rewrite_rule.get_new_key_prefix()) + { + return Err(Error::WrongKeyPrefix { + what: "do_apply_kv_file", + key: event_iter.key().to_vec(), + prefix: rewrite_rule.get_old_key_prefix().to_vec(), + }); + } let key = event_iter.key().to_vec(); let value = event_iter.value().to_vec(); let ts = Key::decode_ts_from(&key)?; @@ -1028,7 +1042,7 @@ impl SstImporter { largest_key = largest_key .map_or_else(|| Some(key.clone()), |v: Vec| Some(v.max(key.clone()))); } - if total_key != not_in_range { + if not_in_range != 0 || ts_not_expected != 0 { info!("build download request file done"; "total_keys" => %total_key, "ts_filtered_keys" => %ts_not_expected, @@ -2050,10 +2064,8 @@ mod tests { }; // test do_read_kv_file() - let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let output = block_on_external_io(importer.do_read_kv_file( &kv_meta, - rewrite_rule, ext_storage, &Limiter::new(f64::INFINITY), )) @@ -2163,7 +2175,6 @@ mod tests { }; let importer = SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1, false).unwrap(); - let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2181,7 +2192,6 @@ mod tests { assert!(importer.import_support_download()); let output = block_on_external_io(importer.read_from_kv_file( &kv_meta, - rewrite_rule, ext_storage, &backend, &Limiter::new(f64::INFINITY), diff --git a/components/tikv_util/src/codec/stream_event.rs b/components/tikv_util/src/codec/stream_event.rs index 5b00cad63726..3c1a04f77e32 100644 --- a/components/tikv_util/src/codec/stream_event.rs +++ b/components/tikv_util/src/codec/stream_event.rs @@ -6,6 +6,13 @@ use bytes::{Buf, Bytes}; use crate::{codec::Result, Either}; +// Note: maybe allow them to be different lifetime. +// But not necessary for now, so keep it simple...? +pub struct Rewrite<'a> { + from: &'a [u8], + to: &'a [u8], +} + pub trait Iterator { fn next(&mut self) -> Result<()>; @@ -19,10 +26,12 @@ pub trait Iterator { pub struct EventIterator<'a> { buf: &'a [u8], offset: usize, - key_offset: usize, value_offset: usize, - key_len: usize, value_len: usize, + + key_buf: Vec, + + rewrite_rule: Option>, } impl EventIterator<'_> { @@ -30,10 +39,21 @@ impl EventIterator<'_> { EventIterator { buf, offset: 0, - key_offset: 0, - key_len: 0, + key_buf: vec![], value_offset: 0, value_len: 0, + rewrite_rule: None, + } + } + + pub fn with_rewriting<'a>(buf: &'a [u8], from: &'a [u8], to: &'a [u8]) -> EventIterator<'a> { + EventIterator { + buf, + offset: 0, + key_buf: vec![], + value_offset: 0, + value_len: 0, + rewrite_rule: Some(Rewrite { from, to }), } } @@ -42,14 +62,47 @@ impl EventIterator<'_> { self.offset += 4; result } + + fn consume_key_with_len(&mut self, key_len: usize) { + self.key_buf.clear(); + self.key_buf.reserve(key_len); + self.key_buf + .extend_from_slice(&self.buf[self.offset..self.offset + key_len]); + self.offset += key_len; + } + + fn move_to_next_key_with_rewrite(&mut self) { + let key_len = self.get_size() as usize; + let rewrite = self.rewrite_rule.as_ref().expect("rewrite rule not set"); + if key_len < rewrite.from.len() + || &self.buf[self.offset..self.offset + rewrite.from.len()] != rewrite.from + { + self.consume_key_with_len(key_len); + return; + } + self.key_buf.clear(); + self.key_buf + .reserve(rewrite.to.len() + key_len - rewrite.from.len()); + self.key_buf.extend_from_slice(rewrite.to); + self.key_buf + .extend_from_slice(&self.buf[self.offset + rewrite.from.len()..self.offset + key_len]); + self.offset += key_len; + } + + fn fetch_key_buffer_and_move_to_value(&mut self) { + if self.rewrite_rule.is_some() { + self.move_to_next_key_with_rewrite() + } else { + let key_len = self.get_size() as usize; + self.consume_key_with_len(key_len); + } + } } impl Iterator for EventIterator<'_> { fn next(&mut self) -> Result<()> { if self.valid() { - self.key_len = self.get_size() as usize; - self.key_offset = self.offset; - self.offset += self.key_len; + self.fetch_key_buffer_and_move_to_value(); self.value_len = self.get_size() as usize; self.value_offset = self.offset; @@ -63,7 +116,7 @@ impl Iterator for EventIterator<'_> { } fn key(&self) -> &[u8] { - &self.buf[self.key_offset..self.key_offset + self.key_len] + &self.key_buf[..] } fn value(&self) -> &[u8] { @@ -155,4 +208,44 @@ mod tests { } assert_eq!(count, index); } + + #[test] + fn test_rewrite() { + let mut rng = rand::thread_rng(); + let mut event = vec![]; + let mut keys = vec![]; + let mut vals = vec![]; + let count = 20; + + for _i in 0..count { + let should_rewrite = rng.gen::(); + let mut key: Vec = std::iter::once(if should_rewrite { b'k' } else { b'l' }) + .chain((0..100).map(|_| rng.gen_range(0..255))) + .collect(); + let val: Vec = (0..100).map(|_| rng.gen_range(0..255)).collect(); + let e = EventEncoder::encode_event(&key, &val); + for s in e { + event.extend_from_slice(s.as_ref()); + } + if should_rewrite { + key[0] = b'r'; + } + keys.push(key); + vals.push(val); + } + + let mut iter = EventIterator::with_rewriting(&event, b"k", b"r"); + + let mut index = 0_usize; + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + assert_eq!(iter.key(), keys[index]); + assert_eq!(iter.value(), vals[index]); + index += 1; + } + assert_eq!(count, index); + } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 0c81873c130d..6d40ffe959c8 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -165,6 +165,9 @@ impl RequestCollector { } fn accept_kv(&mut self, cf: &str, is_delete: bool, k: Vec, v: Vec) { + debug!("Accepting KV."; "cf" => %cf, + "key" => %log_wrappers::Value::key(&k), + "value" => %log_wrappers::Value::key(&v)); // Need to skip the empty key/value that could break the transaction or cause // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. if k.is_empty() || (!is_delete && v.is_empty()) { @@ -567,7 +570,6 @@ impl ImportSstService { let buff = importer .read_from_kv_file( meta, - rule, ext_storage.clone(), req.get_storage_backend(), &limiter, @@ -579,6 +581,7 @@ impl ImportSstService { meta.get_start_ts(), meta.get_restore_ts(), buff, + rule, |k, v| collector.accept_kv(meta.get_cf(), meta.get_is_delete(), k, v), )? { if let Some(range) = range.as_mut() { diff --git a/tests/integrations/import/mod.rs b/tests/integrations/import/mod.rs index 96e2c655e18f..4de0fa264724 100644 --- a/tests/integrations/import/mod.rs +++ b/tests/integrations/import/mod.rs @@ -1,4 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +mod test_apply_log; mod test_sst_service; mod util; diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs new file mode 100644 index 000000000000..3d8cf85b02cf --- /dev/null +++ b/tests/integrations/import/test_apply_log.rs @@ -0,0 +1,72 @@ +use engine_traits::CF_DEFAULT; +use external_storage_export::LocalStorage; +use kvproto::import_sstpb::ApplyRequest; +use tempfile::TempDir; + +use crate::import::util; + +#[test] +fn test_basic_apply() { + let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let tmp = TempDir::new().unwrap(); + let storage = LocalStorage::new(tmp.path()).unwrap(); + let default = [ + (b"k1", b"v1", 1), + (b"k2", b"v2", 2), + (b"k3", b"v3", 3), + (b"k4", b"v4", 4), + ]; + let default_rewritten = [(b"r1", b"v1", 1), (b"r2", b"v2", 2), (b"r3", b"v3", 3)]; + let mut sst_meta = util::make_plain_file(&storage, "file1.log", default.into_iter()); + util::register_range_for(&mut sst_meta, b"k1", b"k3a"); + let mut req = ApplyRequest::new(); + req.set_context(ctx.clone()); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_metas(vec![sst_meta].into()); + req.set_storage_backend(util::local_storage(&tmp)); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_rewritten.into_iter()); +} + +#[test] +fn test_apply_twice() { + let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let tmp = TempDir::new().unwrap(); + let storage = LocalStorage::new(tmp.path()).unwrap(); + let default = [( + b"k1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + let default_fst = [( + b"r1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + let default_snd = [( + b"z1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + + let mut sst_meta = util::make_plain_file(&storage, "file2.log", default.into_iter()); + util::register_range_for(&mut sst_meta, b"k1", b"k1a"); + let mut req = ApplyRequest::new(); + req.set_context(ctx.clone()); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_metas(vec![sst_meta.clone()].into()); + req.set_storage_backend(util::local_storage(&tmp)); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_fst.into_iter()); + + util::register_range_for(&mut sst_meta, b"k1", b"k1a"); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"z")].into()); + req.set_metas(vec![sst_meta].into()); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf( + &tikv, + &ctx, + CF_DEFAULT, + default_fst.into_iter().chain(default_snd.into_iter()), + ); +} diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index cc5d22d517d2..d8a11d50746b 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -1,16 +1,31 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, thread, time::Duration}; - +use std::{ + io::{Cursor, Write}, + sync::Arc, + thread, + time::Duration, +}; + +use collections::HashMap; use engine_rocks::RocksEngine; -use futures::{executor::block_on, stream, SinkExt}; +use engine_traits::CF_DEFAULT; +use external_storage_export::{ExternalStorage, UnpinReader}; +use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; -use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; +use kvproto::{ + brpb::{Local, StorageBackend}, + import_sstpb::{KvMeta, *}, + kvrpcpb::*, + tikvpb::*, +}; use security::SecurityConfig; +use tempfile::TempDir; use test_raftstore::*; use test_raftstore_v2::{Cluster as ClusterV2, ServerCluster as ServerClusterV2}; use tikv::config::TikvConfig; -use tikv_util::HandyRwLock; +use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io, HandyRwLock}; +use txn_types::Key; use uuid::Uuid; const CLEANUP_SST_MILLIS: u64 = 10; @@ -246,6 +261,40 @@ pub fn check_ingested_kvs_cf(tikv: &TikvClient, ctx: &Context, cf: &str, sst_ran } } +#[track_caller] +pub fn check_applied_kvs_cf, V: AsRef<[u8]> + std::fmt::Debug>( + tikv: &TikvClient, + ctx: &Context, + cf: &str, + entries: impl Iterator, +) { + let mut get = RawBatchGetRequest::default(); + get.set_cf(cf.to_owned()); + get.set_context(ctx.clone()); + let mut keymap = HashMap::default(); + for (key, value, ts) in entries { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + keymap.insert(the_key.clone(), value); + get.mut_keys().push(the_key); + } + for pair in tikv.raw_batch_get(&get).unwrap().get_pairs() { + let entry = keymap.remove(pair.get_key()).expect("unexpected key"); + assert_eq!( + entry.as_ref(), + pair.get_value(), + "key is {:?}", + pair.get_key() + ); + } + assert!( + keymap.is_empty(), + "not all keys consumed, remained {:?}", + keymap + ); +} + pub fn check_ingested_txn_kvs( tikv: &TikvClient, ctx: &Context, @@ -273,3 +322,67 @@ pub fn check_sst_deleted(client: &ImportSstClient, meta: &SstMeta, data: &[u8]) } send_upload_sst(client, meta, data).unwrap(); } + +pub fn make_plain_file(storage: &dyn ExternalStorage, name: &str, kvs: I) -> KvMeta +where + I: Iterator, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + let mut buf = vec![]; + let mut file = Cursor::new(&mut buf); + let mut start_ts: Option = None; + for (key, value, ts) in kvs { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + start_ts = Some(start_ts.map_or(ts, |ts0| ts0.min(ts))); + for segment in EventEncoder::encode_event(&the_key, value.as_ref()) { + file.write_all(segment.as_ref()).unwrap(); + } + } + file.flush().unwrap(); + let len = buf.len() as u64; + block_on_external_io(storage.write(name, UnpinReader(Box::new(AsyncCursor::new(buf))), len)) + .unwrap(); + let mut meta = KvMeta::new(); + meta.set_start_ts(start_ts.unwrap_or_default()); + meta.set_length(len); + meta.set_restore_ts(u64::MAX); + meta.set_compression_type(kvproto::brpb::CompressionType::Unknown); + meta.set_name(name.to_owned()); + meta.set_cf(CF_DEFAULT.to_owned()); + meta +} + +pub fn rewrite_for(meta: &mut KvMeta, old_prefix: &[u8], new_prefix: &[u8]) -> RewriteRule { + assert_eq!(old_prefix.len(), new_prefix.len()); + fn rewrite(key: &mut Vec, old_prefix: &[u8], new_prefix: &[u8]) { + assert!(key.starts_with(old_prefix)); + let len = old_prefix.len(); + key.splice(..len, new_prefix.iter().cloned()); + } + rewrite(meta.mut_start_key(), old_prefix, new_prefix); + rewrite(meta.mut_end_key(), old_prefix, new_prefix); + let mut rule = RewriteRule::default(); + rule.set_old_key_prefix(old_prefix.to_vec()); + rule.set_new_key_prefix(new_prefix.to_vec()); + rule +} + +pub fn register_range_for(meta: &mut KvMeta, start: &[u8], end: &[u8]) { + let start = Key::from_raw(start); + let end = Key::from_raw(end); + meta.set_start_key(start.into_encoded()); + meta.set_end_key(end.into_encoded()); +} + +pub fn local_storage(tmp: &TempDir) -> StorageBackend { + let mut backend = StorageBackend::default(); + backend.set_local({ + let mut local = Local::default(); + local.set_path(tmp.path().to_str().unwrap().to_owned()); + local + }); + backend +} From d830a58335839fe02434727f2d8b252a02ba386d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 12 Sep 2023 18:04:41 +0800 Subject: [PATCH 039/203] [Dynamic Region] Supply extra test cases for `gc`. (#15544) ref tikv/tikv#15409 Supply extra test cases, including integration tests and unit tests for raftstore-v2 on `gc`. Signed-off-by: lucasliang --- tests/failpoints/cases/test_gc_worker.rs | 105 +++++++++++------------ tests/integrations/server/gc_worker.rs | 18 ++-- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index d24ec85f0403..50b71b59f47f 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -14,9 +14,10 @@ use raftstore::coprocessor::{ RegionInfo, RegionInfoCallback, RegionInfoProvider, Result as CopResult, SeekRegionCallback, }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::{ server::gc_worker::{ - AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGcRunner, + sync_gc, AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGcRunner, }, storage::{ kv::TestEngineBuilder, @@ -61,11 +62,38 @@ fn test_error_in_compaction_filter() { fail::remove(fp); } +#[derive(Clone)] +struct MockSafePointProvider; +impl GcSafePointProvider for MockSafePointProvider { + fn get_safe_point(&self) -> GcWorkerResult { + Ok(TimeStamp::from(0)) + } +} + +#[derive(Clone)] +struct MockRegionInfoProvider; +impl RegionInfoProvider for MockRegionInfoProvider { + fn seek_region(&self, _: &[u8], _: SeekRegionCallback) -> CopResult<()> { + Ok(()) + } + fn find_region_by_id( + &self, + _: u64, + _: RegionInfoCallback>, + ) -> CopResult<()> { + Ok(()) + } + fn get_regions_in_range(&self, _start_key: &[u8], _end_key: &[u8]) -> CopResult> { + Ok(vec![]) + } +} + // Test GC worker can receive and handle orphan versions emit from write CF's // compaction filter correctly. -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_orphan_versions_from_compaction_filter() { - let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { + let (cluster, leader, ctx) = new_cluster(|cluster| { cluster.cfg.gc.enable_compaction_filter = true; cluster.cfg.gc.compaction_filter_skip_version_check = true; cluster.pd_client.disable_default_operator(); @@ -76,8 +104,20 @@ fn test_orphan_versions_from_compaction_filter() { let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader_store)); let client = TikvClient::new(channel); - init_compaction_filter(&cluster, leader_store); - let engine = cluster.engines.get(&leader_store).unwrap(); + // Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine + // compaction filter and GC worker so that GC worker can help to process orphan + // versions on default CF. + { + let sim = cluster.sim.rl(); + let gc_worker = sim.get_gc_worker(leader_store); + gc_worker + .start_auto_gc( + AutoGcConfig::new(MockSafePointProvider, MockRegionInfoProvider, 1), + Arc::new(AtomicU64::new(0)), + ) + .unwrap(); + } + let engine = cluster.get_engine(leader_store); let pk = b"k1".to_vec(); let large_value = vec![b'x'; 300]; @@ -91,22 +131,23 @@ fn test_orphan_versions_from_compaction_filter() { if start_ts < 40 { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_some()); + assert!(engine.get_value(&key).unwrap().is_some()); } } let fp = "write_compaction_filter_flush_write_batch"; fail::cfg(fp, "return").unwrap(); - let mut gc_runner = TestGcRunner::new(100); - gc_runner.gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); - gc_runner.gc(&engine.kv); + let gc_safe_ponit = TimeStamp::from(100); + let gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); + let region = cluster.get_region(&pk); + sync_gc(&gc_scheduler, region, gc_safe_ponit).unwrap(); 'IterKeys: for &start_ts in &[10, 20, 30] { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); for _ in 0..100 { - if engine.kv.get_value(&key).unwrap().is_some() { + if engine.get_value(&key).unwrap().is_some() { thread::sleep(Duration::from_millis(20)); continue; } @@ -117,47 +158,3 @@ fn test_orphan_versions_from_compaction_filter() { fail::remove(fp); } - -// Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine -// compaction filter and GC worker so that GC worker can help to process orphan -// versions on default CF. -fn init_compaction_filter(cluster: &Cluster, store_id: u64) { - #[derive(Clone)] - struct MockSafePointProvider; - impl GcSafePointProvider for MockSafePointProvider { - fn get_safe_point(&self) -> GcWorkerResult { - Ok(TimeStamp::from(0)) - } - } - - #[derive(Clone)] - struct MockRegionInfoProvider; - impl RegionInfoProvider for MockRegionInfoProvider { - fn seek_region(&self, _: &[u8], _: SeekRegionCallback) -> CopResult<()> { - Ok(()) - } - fn find_region_by_id( - &self, - _: u64, - _: RegionInfoCallback>, - ) -> CopResult<()> { - Ok(()) - } - fn get_regions_in_range( - &self, - _start_key: &[u8], - _end_key: &[u8], - ) -> CopResult> { - Ok(vec![]) - } - } - - let sim = cluster.sim.rl(); - let gc_worker = sim.get_gc_worker(store_id); - gc_worker - .start_auto_gc( - AutoGcConfig::new(MockSafePointProvider, MockRegionInfoProvider, 1), - Arc::new(AtomicU64::new(0)), - ) - .unwrap(); -} diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index cfadde844056..238102df6b68 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -7,15 +7,17 @@ use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::server::gc_worker::sync_gc; use tikv_util::HandyRwLock; use txn_types::Key; // Since v5.0 GC bypasses Raft, which means GC scans/deletes records with // `keys::DATA_PREFIX`. This case ensures it's performed correctly. -#[test] +#[test_case(test_raftstore::must_new_cluster_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_mul)] fn test_gc_bypass_raft() { - let (cluster, leader, ctx) = must_new_cluster_mul(2); + let (cluster, leader, ctx) = new_cluster(2); cluster.pd_client.disable_default_operator(); let env = Arc::new(Environment::new(1)); @@ -25,7 +27,7 @@ fn test_gc_bypass_raft() { let pk = b"k1".to_vec(); let value = vec![b'x'; 300]; - let engine = cluster.engines.get(&leader_store).unwrap(); + let engine = cluster.get_engine(leader_store); for &start_ts in &[10, 20, 30, 40] { let commit_ts = start_ts + 5; @@ -37,11 +39,11 @@ fn test_gc_bypass_raft() { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_some()); + assert!(engine.get_value(&key).unwrap().is_some()); let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_some()); + assert!(engine.get_value_cf(CF_WRITE, &key).unwrap().is_some()); } let node_ids = cluster.get_node_ids(); @@ -53,16 +55,16 @@ fn test_gc_bypass_raft() { region.set_end_key(b"k2".to_vec()); sync_gc(&gc_sched, region, 200.into()).unwrap(); - let engine = cluster.engines.get(&store_id).unwrap(); + let engine = cluster.get_engine(store_id); for &start_ts in &[10, 20, 30] { let commit_ts = start_ts + 5; let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_none()); + assert!(engine.get_value(&key).unwrap().is_none()); let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_none()); + assert!(engine.get_value_cf(CF_WRITE, &key).unwrap().is_none()); } } } From db0304e65045fdc6701e8fe0db80416a0210e412 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 13 Sep 2023 07:43:38 +0800 Subject: [PATCH 040/203] *: update cargo.lock (#15573) close tikv/tikv#15579 update cargo.lock Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e09c3d2979d..fb5e711d34da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "libc 0.2.146", "librocksdb_sys", From d5d89ba60b07e508e4073b5460b192680c272213 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 13 Sep 2023 14:10:38 +0800 Subject: [PATCH 041/203] coprocessor: use the deadline in kvrpcpb::Context (#15564) close tikv/tikv#15565 Signed-off-by: lance6716 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/mod.rs | 46 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 140d3c0476e9..fcd16f9b9478 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -159,7 +159,11 @@ impl ReqContext { cache_match_version: Option, perf_level: PerfLevel, ) -> Self { - let deadline = Deadline::from_now(max_handle_duration); + let mut deadline_duration = max_handle_duration; + if context.max_execution_duration_ms > 0 { + deadline_duration = Duration::from_millis(context.max_execution_duration_ms); + } + let deadline = Deadline::from_now(deadline_duration); let bypass_locks = TsSet::from_u64s(context.take_resolved_locks()); let access_locks = TsSet::from_u64s(context.take_committed_locks()); let lower_bound = match ranges.first().as_ref() { @@ -235,6 +239,23 @@ lazy_static! { mod tests { use super::*; + fn default_req_ctx_with_ctx_duration( + context: kvrpcpb::Context, + max_handle_duration: Duration, + ) -> ReqContext { + ReqContext::new( + ReqTag::test, + context, + Vec::new(), + max_handle_duration, + None, + None, + TimeStamp::max(), + None, + PerfLevel::EnableCount, + ) + } + #[test] fn test_build_task_id() { let mut ctx = ReqContext::default_for_test(); @@ -246,4 +267,27 @@ mod tests { ctx.context.set_task_id(0); assert_eq!(ctx.build_task_id(), start_ts); } + + #[test] + fn test_deadline_from_req_ctx() { + let ctx = kvrpcpb::Context::default(); + let max_handle_duration = Duration::from_millis(100); + let req_ctx = default_req_ctx_with_ctx_duration(ctx, max_handle_duration); + // sleep at least 100ms + std::thread::sleep(Duration::from_millis(200)); + req_ctx + .deadline + .check() + .expect_err("deadline should exceed"); + + let mut ctx = kvrpcpb::Context::default(); + ctx.max_execution_duration_ms = 100_000; + let req_ctx = default_req_ctx_with_ctx_duration(ctx, max_handle_duration); + // sleep at least 100ms + std::thread::sleep(Duration::from_millis(200)); + req_ctx + .deadline + .check() + .expect("deadline should not exceed"); + } } From b75f55901e5defd5c87a10de2ca7088749c16b7f Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 13 Sep 2023 17:19:38 +0800 Subject: [PATCH 042/203] tidb_query_datatype,collation: remove utf8mb4_0900_bin from need_restored_data (#15572) close tikv/tikv#15571 Signed-off-by: Yang Keao --- .../tidb_query_datatype/src/def/field_type.rs | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index 06f4454b36d4..8a56ac5ac686 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -140,7 +140,10 @@ impl Collation { } pub fn is_bin_collation(&self) -> bool { - matches!(self, Collation::Utf8Mb4Bin | Collation::Latin1Bin) + matches!( + self, + Collation::Utf8Mb4Bin | Collation::Latin1Bin | Collation::Utf8Mb40900Bin + ) } } @@ -333,6 +336,10 @@ pub trait FieldTypeAccessor { .map(|col| col.is_bin_collation()) .unwrap_or(false) || self.is_varchar_like()) + && self + .collation() + .map(|col| col != Collation::Utf8Mb40900Bin) + .unwrap_or(false) } } @@ -455,6 +462,7 @@ mod tests { use std::i32; use super::*; + use crate::builder::FieldTypeBuilder; fn field_types() -> Vec { vec![ @@ -583,4 +591,31 @@ mod tests { } } } + + #[test] + fn test_need_restored_data() { + let cases = vec![ + (FieldTypeTp::String, Collation::Binary, false), + (FieldTypeTp::VarString, Collation::Binary, false), + (FieldTypeTp::String, Collation::Utf8Mb4Bin, false), + (FieldTypeTp::VarString, Collation::Utf8Mb4Bin, true), + (FieldTypeTp::String, Collation::Utf8Mb4GeneralCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb4GeneralCi, true), + (FieldTypeTp::String, Collation::Utf8Mb4UnicodeCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb4UnicodeCi, true), + (FieldTypeTp::String, Collation::Utf8Mb40900AiCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb40900AiCi, true), + (FieldTypeTp::String, Collation::Utf8Mb40900Bin, false), + (FieldTypeTp::VarString, Collation::Utf8Mb40900Bin, false), + (FieldTypeTp::String, Collation::GbkBin, true), + (FieldTypeTp::VarString, Collation::GbkBin, true), + (FieldTypeTp::String, Collation::GbkChineseCi, true), + (FieldTypeTp::VarString, Collation::GbkChineseCi, true), + ]; + + for (tp, collation, result) in cases { + let ft = FieldTypeBuilder::new().tp(tp).collation(collation).build(); + assert_eq!(ft.need_restored_data(), result) + } + } } From 063c9cd64c8bcf0c2373358354994499d9edeb0b Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 14 Sep 2023 00:52:38 +0800 Subject: [PATCH 043/203] raftstore-v2: persist applied index after ingset sst (#15538) close tikv/tikv#15461 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- .../raftstore-v2/src/operation/command/mod.rs | 6 + .../src/operation/command/write/ingest.rs | 12 +- .../src/operation/ready/apply_trace.rs | 244 ++++++++++++++++-- .../raftstore-v2/src/operation/ready/mod.rs | 10 +- components/raftstore-v2/src/raft/apply.rs | 14 +- .../src/router/internal_message.rs | 7 + components/raftstore-v2/src/router/mod.rs | 2 +- tests/failpoints/cases/test_import_service.rs | 76 +++++- 8 files changed, 341 insertions(+), 30 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index c39f2412f325..e579d22c6da0 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -455,6 +455,11 @@ impl Peer { if is_leader { self.retry_pending_prepare_merge(ctx, apply_res.applied_index); } + if !apply_res.sst_applied_index.is_empty() { + self.storage_mut() + .apply_trace_mut() + .on_sst_ingested(&apply_res.sst_applied_index); + } self.on_data_modified(apply_res.modifications); self.handle_read_on_apply( ctx, @@ -866,6 +871,7 @@ impl Apply { apply_res.modifications = *self.modifications_mut(); apply_res.metrics = mem::take(&mut self.metrics); apply_res.bucket_stat = self.buckets.clone(); + apply_res.sst_applied_index = self.take_sst_applied_index(); let written_bytes = apply_res.metrics.written_bytes; let skip_report = || -> bool { diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 7e8ed381ad01..92f5923d1679 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -2,7 +2,7 @@ use collections::HashMap; use crossbeam::channel::TrySendError; -use engine_traits::{data_cf_offset, KvEngine, RaftEngine}; +use engine_traits::{data_cf_offset, KvEngine, RaftEngine, DATA_CFS_LEN}; use kvproto::import_sstpb::SstMeta; use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, @@ -16,7 +16,7 @@ use crate::{ batch::StoreContext, fsm::{ApplyResReporter, Store, StoreFsmDelegate}, raft::{Apply, Peer}, - router::{PeerMsg, StoreTick}, + router::{PeerMsg, SstApplyIndex, StoreTick}, worker::tablet, }; @@ -107,10 +107,12 @@ impl Peer { impl Apply { #[inline] pub fn apply_ingest(&mut self, index: u64, ssts: Vec) -> Result<()> { + fail::fail_point!("on_apply_ingest"); PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); let mut infos = Vec::with_capacity(ssts.len()); let mut size: i64 = 0; let mut keys: u64 = 0; + let mut cf_indexes = [u64::MAX; DATA_CFS_LEN]; for sst in &ssts { // This may not be enough as ingest sst may not trigger flush at all. let off = data_cf_offset(sst.get_cf_name()); @@ -138,6 +140,7 @@ impl Apply { slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); } } + cf_indexes[off] = index; } if !infos.is_empty() { // Unlike v1, we can't batch ssts accross regions. @@ -154,6 +157,11 @@ impl Apply { self.metrics.size_diff_hint += size; self.metrics.written_bytes += size as u64; self.metrics.written_keys += keys; + for (cf_index, index) in cf_indexes.into_iter().enumerate() { + if index != u64::MAX { + self.push_sst_applied_index(SstApplyIndex { cf_index, index }); + } + } Ok(()) } } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 1601e1f01dd1..af0257e763f1 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -29,6 +29,7 @@ use std::{ cmp, + collections::VecDeque, path::Path, sync::{atomic::Ordering, mpsc::SyncSender, Mutex}, }; @@ -56,7 +57,7 @@ use crate::{ ready::snapshot::{install_tablet, recv_snap_path}, }, raft::{Peer, Storage}, - router::PeerMsg, + router::{PeerMsg, SstApplyIndex}, worker::tablet, Result, StoreRouter, }; @@ -138,7 +139,7 @@ impl engine_traits::StateStorage for StateStorage< /// Mapping from data cf to an u64 index. pub type DataTrace = [u64; DATA_CFS_LEN]; -#[derive(Clone, Copy, Default, Debug)] +#[derive(Clone, Default, Debug)] struct Progress { flushed: u64, /// The index of last entry that has modification to the CF. The value @@ -146,6 +147,20 @@ struct Progress { /// /// If `flushed` == `last_modified`, then all data in the CF is persisted. last_modified: u64, + // applied indexes ranges that represent sst is ingested but not flushed indexes. + pending_sst_ranges: VecDeque, +} + +// A range representing [start, end], upper bound inclusive for handling +// convenience. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct IndexRange(u64, u64); + +#[derive(Debug)] +// track the global flushed index related to the write task. +struct ReadyFlushedIndex { + ready_number: u64, + flushed_index: u64, } /// `ApplyTrace` is used to track the indexes of modifications and flushes. @@ -178,6 +193,9 @@ pub struct ApplyTrace { last_flush_trigger: u64, /// `true` means the raft cf record should be persisted in next ready. try_persist: bool, + // Because we persist the global flushed in the write task, so we should track + // the task and handle sst cleanup after the write task finished. + flushed_index_queue: VecDeque, } impl ApplyTrace { @@ -230,6 +248,25 @@ impl ApplyTrace { self.admin.last_modified = index; } + pub fn on_sst_ingested(&mut self, sst_applied_index: &[SstApplyIndex]) { + use std::cmp::Ordering; + for &SstApplyIndex { cf_index, index } in sst_applied_index { + let p = &mut self.data_cfs[cf_index]; + if p.flushed < index { + let max_idx = p.pending_sst_ranges.iter().last().map(|r| r.1).unwrap_or(0) + 1; + match max_idx.cmp(&index) { + Ordering::Less => { + p.pending_sst_ranges.push_back(IndexRange(index, index)); + } + Ordering::Equal => { + p.pending_sst_ranges.iter_mut().last().unwrap().1 = index; + } + _ => {} + } + } + } + } + pub fn persisted_apply_index(&self) -> u64 { self.persisted_applied } @@ -283,17 +320,45 @@ impl ApplyTrace { } }) .min(); + // At best effort, we can only advance the index to `mem_index`. let candidate = cmp::min(mem_index, min_flushed.unwrap_or(u64::MAX)); + // try advance the index if there are any sst ingestion next to the flushed + // index, and always trigger a flush if there is any sst ingestion. + let (candidate, has_ingested_sst) = self.advance_flushed_index_for_ingest(candidate); if candidate > self.admin.flushed { self.admin.flushed = candidate; - if self.admin.flushed > self.persisted_applied + 100 { + if has_ingested_sst || (self.admin.flushed > self.persisted_applied + 100) { self.try_persist = true; } } // TODO: persist admin.flushed every 10 minutes. } + fn advance_flushed_index_for_ingest(&mut self, mut max_index: u64) -> (u64, bool) { + let mut has_ingest = false; + loop { + let mut has_change = false; + for p in self.data_cfs.iter_mut() { + while let Some(r) = p.pending_sst_ranges.front_mut() { + if r.0 > max_index + 1 { + break; + } else if r.1 > max_index { + max_index = r.1; + has_change = true; + } + p.pending_sst_ranges.pop_front(); + has_ingest = true; + } + } + if !has_change { + break; + } + } + + (max_index, has_ingest) + } + /// Get the flushed indexes of all data CF that is needed when recoverying /// logs. /// @@ -348,6 +413,38 @@ impl ApplyTrace { fail_point!("should_persist_apply_trace", |_| true); self.try_persist } + + #[inline] + pub fn register_flush_task(&mut self, ready_number: u64, flushed_index: u64) { + assert!( + self.flushed_index_queue + .iter() + .last() + .map(|f| f.ready_number) + .unwrap_or(0) + < ready_number + ); + self.flushed_index_queue.push_back(ReadyFlushedIndex { + ready_number, + flushed_index, + }); + } + + #[inline] + pub fn take_flush_index(&mut self, ready_number: u64) -> Option { + use std::cmp::Ordering; + while let Some(r) = self.flushed_index_queue.pop_front() { + match r.ready_number.cmp(&ready_number) { + Ordering::Equal => return Some(r.flushed_index), + Ordering::Greater => { + self.flushed_index_queue.push_front(r); + break; + } + _ => {} + } + } + None + } } impl Storage { @@ -546,6 +643,7 @@ impl Storage { .unwrap(); trace.try_persist = false; trace.persisted_applied = trace.admin.flushed; + trace.register_flush_task(write_task.ready_number(), trace.admin.flushed); } } @@ -566,24 +664,7 @@ impl Peer { let apply_trace = self.storage_mut().apply_trace_mut(); apply_trace.on_flush(cf, index); apply_trace.maybe_advance_admin_flushed(apply_index); - let stale_ssts = self.sst_apply_state().stale_ssts(cf, index); - if stale_ssts.is_empty() { - return; - } - info!( - self.logger, - "schedule delete stale ssts after flush"; - "stale_ssts" => ?stale_ssts, - "apply_index" => apply_index, - "cf" => cf, - "flushed_index" => index, - ); - let _ = ctx - .schedulers - .tablet - .schedule(tablet::Task::CleanupImportSst( - stale_ssts.into_boxed_slice(), - )); + self.cleanup_stale_ssts(ctx, &[cf], index, apply_index); } pub fn on_data_modified(&mut self, modification: DataTrace) { @@ -598,6 +679,38 @@ impl Peer { apply_trace.maybe_advance_admin_flushed(apply_index); } + pub fn cleanup_stale_ssts( + &mut self, + ctx: &mut StoreContext, + cfs: &[&str], + index: u64, + apply_index: u64, + ) { + let mut stale_ssts = vec![]; + for cf in cfs { + let ssts = self.sst_apply_state().stale_ssts(cf, index); + if !ssts.is_empty() { + info!( + self.logger, + "schedule delete stale ssts after flush"; + "stale_ssts" => ?stale_ssts, + "apply_index" => apply_index, + "cf" => cf, + "flushed_index" => index, + ); + stale_ssts.extend(ssts); + } + } + if !stale_ssts.is_empty() { + _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::CleanupImportSst( + stale_ssts.into_boxed_slice(), + )); + } + } + pub fn flush_before_close(&mut self, ctx: &StoreContext, tx: SyncSender<()>) { info!( self.logger, @@ -689,7 +802,7 @@ impl Peer { #[cfg(test)] mod tests { - use engine_traits::RaftEngineReadOnly; + use engine_traits::{CfName, RaftEngineReadOnly}; use kvproto::metapb::Peer; use tempfile::TempDir; @@ -809,6 +922,93 @@ mod tests { // Because modify is recorded, so we know there should be no admin // modification and index can be advanced. assert_eq!(5, trace.admin.flushed); + + fn range_equals(trace: &ApplyTrace, cf: &str, expected: Vec) { + let pending_ranges = &trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges; + assert_eq!( + pending_ranges.len(), + expected.len(), + "actual: {:?}, expected: {:?}", + pending_ranges, + &expected + ); + pending_ranges + .iter() + .zip(expected.iter()) + .for_each(|(r, e)| { + assert_eq!(r, e); + }); + } + + trace.on_modify(CF_DEFAULT, 8); + let ingested_ssts_idx = + make_sst_apply_index(vec![(CF_DEFAULT, 6), (CF_WRITE, 6), (CF_WRITE, 7)]); + trace.on_sst_ingested(&ingested_ssts_idx); + range_equals(&trace, CF_DEFAULT, vec![IndexRange(6, 6)]); + range_equals(&trace, CF_WRITE, vec![IndexRange(6, 7)]); + trace.maybe_advance_admin_flushed(8); + assert_eq!(7, trace.admin.flushed); + for cf in [CF_DEFAULT, CF_WRITE] { + assert_eq!( + trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges.len(), + 0 + ); + } + trace.on_modify(CF_DEFAULT, 10); + let ingested_ssts_idx = make_sst_apply_index(vec![(CF_DEFAULT, 10)]); + trace.on_sst_ingested(&ingested_ssts_idx); + trace.on_flush(CF_DEFAULT, 8); + trace.maybe_advance_admin_flushed(10); + assert_eq!(8, trace.admin.flushed); + range_equals(&trace, CF_DEFAULT, vec![IndexRange(10, 10)]); + + trace.on_modify(CF_DEFAULT, 16); + let ingested_ssts_idx = make_sst_apply_index(vec![ + (CF_DEFAULT, 11), + (CF_WRITE, 12), + (CF_LOCK, 13), + (CF_DEFAULT, 14), + (CF_WRITE, 14), + (CF_WRITE, 15), + (CF_LOCK, 16), + ]); + trace.on_sst_ingested(&ingested_ssts_idx); + range_equals( + &trace, + CF_DEFAULT, + vec![IndexRange(10, 11), IndexRange(14, 14)], + ); + range_equals( + &trace, + CF_WRITE, + vec![IndexRange(12, 12), IndexRange(14, 15)], + ); + range_equals( + &trace, + CF_LOCK, + vec![IndexRange(13, 13), IndexRange(16, 16)], + ); + trace.maybe_advance_admin_flushed(16); + assert_eq!(8, trace.admin.flushed); + + trace.on_flush(CF_DEFAULT, 9); + trace.maybe_advance_admin_flushed(16); + assert_eq!(16, trace.admin.flushed); + for cf in DATA_CFS { + assert_eq!( + trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges.len(), + 0 + ); + } + } + + fn make_sst_apply_index(data: Vec<(CfName, u64)>) -> Vec { + data.into_iter() + .map(|d| SstApplyIndex { + cf_index: data_cf_offset(d.0), + index: d.1, + }) + .collect() } #[test] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index b985fd69c275..ba7170ac8c87 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -31,7 +31,7 @@ use std::{ time::Instant, }; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, DATA_CFS}; use error_code::ErrorCodeExt; use kvproto::{ raft_cmdpb::AdminCmdType, @@ -896,6 +896,14 @@ impl Peer { self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); + if let Some(idx) = self + .storage_mut() + .apply_trace_mut() + .take_flush_index(ready_number) + { + let apply_index = self.flush_state().applied_index(); + self.cleanup_stale_ssts(ctx, DATA_CFS, idx, apply_index); + } if self.is_in_force_leader() { // forward commit index, the committed entries will be applied in diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 3e660c4549ce..f3aa5a541c1a 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -20,7 +20,7 @@ use tikv_util::{log::SlogFormat, worker::Scheduler, yatp_pool::FuturePool}; use crate::{ operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, - router::CmdResChannel, + router::{CmdResChannel, SstApplyIndex}, TabletTask, }; @@ -64,6 +64,7 @@ pub struct Apply { admin_cmd_result: Vec, flush_state: Arc, sst_apply_state: SstApplyState, + sst_applied_index: Vec, /// The flushed indexes of each column family before being restarted. /// /// If an apply index is less than the flushed index, the log can be @@ -138,6 +139,7 @@ impl Apply { res_reporter, flush_state, sst_apply_state, + sst_applied_index: vec![], log_recovery, metrics: ApplyMetrics::default(), buckets, @@ -308,6 +310,16 @@ impl Apply { &self.sst_apply_state } + #[inline] + pub fn push_sst_applied_index(&mut self, sst_index: SstApplyIndex) { + self.sst_applied_index.push(sst_index); + } + + #[inline] + pub fn take_sst_applied_index(&mut self) -> Vec { + mem::take(&mut self.sst_applied_index) + } + #[inline] pub fn log_recovery(&self) -> &Option> { &self.log_recovery diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 6c8d1136b3a1..7ac86c3f8c7e 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -25,4 +25,11 @@ pub struct ApplyRes { pub modifications: DataTrace, pub metrics: ApplyMetrics, pub bucket_stat: Option, + pub sst_applied_index: Vec, +} + +#[derive(Copy, Clone, Debug)] +pub struct SstApplyIndex { + pub cf_index: usize, + pub index: u64, } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 7630e35c2a5b..83a2497b3314 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -12,7 +12,7 @@ pub use self::response_channel::FlushChannel; pub use self::response_channel::FlushSubscriber; pub use self::{ imp::{RaftRouter, UnsafeRecoveryRouter}, - internal_message::ApplyRes, + internal_message::{ApplyRes, SstApplyIndex}, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ build_any_channel, AnyResChannel, AnyResSubscriber, BaseSubscriber, CmdResChannel, diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index a24874561085..e51c9862e474 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -7,10 +7,10 @@ use std::{ use file_system::calc_crc32; use futures::{executor::block_on, stream, SinkExt}; -use grpcio::{Result, WriteFlags}; -use kvproto::import_sstpb::*; +use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; +use kvproto::{import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir}; -use test_raftstore::Simulator; +use test_raftstore::{must_raw_put, Simulator}; use test_sst_importer::*; use tikv::config::TikvConfig; use tikv_util::{config::ReadableSize, HandyRwLock}; @@ -455,3 +455,73 @@ fn sst_file_count(paths: &Vec) -> u64 { } count } + +#[test] +fn test_flushed_applied_index_after_ingset() { + // disable data flushed + fail::cfg("on_flush_completed", "return()").unwrap(); + // disable data flushed + let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(None); + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + + // Create clients. + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(1)); + let client = TikvClient::new(channel); + + for i in 0..5 { + let sst_range = (i * 20, (i + 1) * 20); + let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + } + + // only 1 sst left because there is no more event to trigger a raft ready flush. + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + for i in 5..8 { + let sst_range = (i * 20, (i + 1) * 20); + let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + } + + // ingest more sst files, unflushed index still be 1. + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + // file a write to trigger ready flush, even if the write is not flushed. + must_raw_put(&client, ctx, b"key1".to_vec(), b"value1".to_vec()); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + + // restart node, should not tirgger any ingest + fail::cfg("on_apply_ingest", "panic").unwrap(); + cluster.stop_node(1); + cluster.start().unwrap(); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + + fail::remove("on_apply_ingest"); + fail::remove("on_flush_completed"); +} From b172835345cb015572faabb2bc164d532ba8d62f Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 13 Sep 2023 19:57:08 -0700 Subject: [PATCH 044/203] add option to update config without persist (#15587) close tikv/tikv#15588 add option to update TiKV config without persist in status API "POST /config?persist=false|true" Signed-off-by: tonyxuqqi --- src/server/status_server/mod.rs | 88 ++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index b49fdce12af1..98077d9e93f6 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -313,6 +313,18 @@ where req: Request, ) -> hyper::Result> { let mut body = Vec::new(); + let mut persist = true; + if let Some(query) = req.uri().query() { + let query_pairs: HashMap<_, _> = + url::form_urlencoded::parse(query.as_bytes()).collect(); + persist = match query_pairs.get("persist") { + Some(val) => match val.parse() { + Ok(val) => val, + Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), + }, + None => true, + }; + } req.into_body() .try_for_each(|bytes| { body.extend(bytes); @@ -320,7 +332,11 @@ where }) .await?; Ok(match decode_json(&body) { - Ok(change) => match cfg_controller.update(change) { + Ok(change) => match if persist { + cfg_controller.update(change) + } else { + cfg_controller.update_without_persist(change) + } { Err(e) => { if let Some(e) = e.downcast_ref::() { make_response( @@ -1227,6 +1243,76 @@ mod tests { status_server.stop(); } + #[test] + fn test_update_config_endpoint() { + let test_config = |persist: bool| { + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut config = TikvConfig::default(); + config.cfg_path = temp_dir + .path() + .join("tikv.toml") + .to_str() + .unwrap() + .to_string(); + let mut status_server = StatusServer::new( + 1, + ConfigController::new(config), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + GrpcServiceManager::dummy(), + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + let client = Client::new(); + let uri = if persist { + Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/config") + .build() + .unwrap() + } else { + Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/config?persist=false") + .build() + .unwrap() + }; + let mut req = Request::new(Body::from("{\"coprocessor.region-split-size\": \"1GB\"}")); + *req.method_mut() = Method::POST; + *req.uri_mut() = uri.clone(); + let handle = status_server.thread_pool.spawn(async move { + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + }); + block_on(handle).unwrap(); + + let client = Client::new(); + let handle2 = status_server.thread_pool.spawn(async move { + let resp = client.get(uri).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let mut v = Vec::new(); + resp.into_body() + .try_for_each(|bytes| { + v.extend(bytes); + ok(()) + }) + .await + .unwrap(); + let resp_json = String::from_utf8_lossy(&v).to_string(); + assert!(resp_json.contains("\"region-split-size\":\"1GiB\"")); + }); + block_on(handle2).unwrap(); + status_server.stop(); + }; + test_config(true); + test_config(false); + } + #[cfg(feature = "failpoints")] #[test] fn test_status_service_fail_endpoints() { From 905ecd79ee9a30bcd8b9b1949c430062c4c3fd07 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 14 Sep 2023 12:07:39 +0800 Subject: [PATCH 045/203] tracker: add a warn log for deadline exceeded query (#15577) ref tikv/tikv#15566 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- src/coprocessor/tracker.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index bb32a3a0e032..f6502c2459eb 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -434,6 +434,36 @@ impl Drop for Tracker { if let TrackerState::ItemFinished(_) = self.current_stage { self.on_finish_all_items(); } + + if self.current_stage != TrackerState::AllItemFinished + && self.req_ctx.deadline.check().is_err() + { + // record deadline exceeded error log. + let total_lifetime = self.request_begin_at.saturating_elapsed(); + let source_stmt = self.req_ctx.context.get_source_stmt(); + let first_range = self.req_ctx.ranges.first(); + let some_table_id = first_range.as_ref().map(|range| { + tidb_query_datatype::codec::table::decode_table_id(range.get_start()) + .unwrap_or_default() + }); + warn!("query deadline exceeded"; + "current_stage" => ?self.current_stage, + "connection_id" => source_stmt.get_connection_id(), + "session_alias" => source_stmt.get_session_alias(), + "region_id" => &self.req_ctx.context.get_region_id(), + "remote_host" => &self.req_ctx.peer, + "total_lifetime" => ?total_lifetime, + "wait_time" => ?self.wait_time, + "wait_time.schedule" => ?self.schedule_wait_time, + "wait_time.snapshot" => ?self.snapshot_wait_time, + "handler_build_time" => ?self.handler_build_time, + "total_process_time" => ?self.total_process_time, + "total_suspend_time" => ?self.total_suspend_time, + "txn_start_ts" => self.req_ctx.txn_start_ts, + "table_id" => some_table_id, + "tag" => self.req_ctx.tag.get_str(), + ); + } } } From 62c17991fd73269929bdfbd8e408710078e53351 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Wed, 13 Sep 2023 21:42:09 -0700 Subject: [PATCH 046/203] unsafe recovery: Enable force leader to rollback merge (#15578) close tikv/tikv#15580 Enable force leader to rollback merges when they are not able to proceed, previously, only regions with quorum can do this. Signed-off-by: Yang Zhang Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 17 ++- components/raftstore/src/store/peer.rs | 4 +- .../failpoints/cases/test_unsafe_recovery.rs | 110 ++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 9f7934e806e4..d61e67842952 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4434,6 +4434,9 @@ where fn schedule_merge(&mut self) -> Result<()> { fail_point!("on_schedule_merge", |_| Ok(())); + fail_point!("on_schedule_merge_ret_err", |_| Err(Error::RegionNotFound( + 1 + ))); let (request, target_id) = { let state = self.fsm.peer.pending_merge_state.as_ref().unwrap(); let expect_region = state.get_target(); @@ -4557,6 +4560,17 @@ where "error_code" => %e.error_code(), ); self.rollback_merge(); + } else if let Some(ForceLeaderState::ForceLeader { .. }) = + &self.fsm.peer.force_leader + { + info!( + "failed to schedule merge, rollback in force leader state"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "err" => %e, + "error_code" => %e.error_code(), + ); + self.rollback_merge(); } } else if !is_learner(&self.fsm.peer.peer) { info!( @@ -5228,7 +5242,8 @@ where // error-prone if !(msg.has_admin_request() && (msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeer - || msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeerV2)) + || msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeerV2 + || msg.get_admin_request().get_cmd_type() == AdminCmdType::RollbackMerge)) { return Err(Error::RecoveryInProgress(self.region_id())); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8c1a7ef61e9b..8ef857bfa129 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4237,7 +4237,9 @@ where // Should not propose normal in force leader state. // In `pre_propose_raft_command`, it rejects all the requests expect conf-change // if in force leader state. - if self.force_leader.is_some() { + if self.force_leader.is_some() + && req.get_admin_request().get_cmd_type() != AdminCmdType::RollbackMerge + { poll_ctx.raft_metrics.invalid_proposal.force_leader.inc(); panic!( "{} propose normal in force leader state {:?}", diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index cc33a01ff03b..978489b5cd68 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -440,3 +440,113 @@ fn test_unsafe_recovery_demotion_reentrancy() { assert_eq!(demoted, true); fail::remove("on_handle_apply_store_1"); } + +#[test_case(test_raftstore::new_node_cluster)] +fn test_unsafe_recovery_rollback_merge() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + for i in 0..10 { + cluster.must_put(format!("k{}", i).as_bytes(), b"v"); + } + + // Block merge commit, let go of the merge prepare. + fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Makes the leadership definite. + let left_peer_2 = find_peer(&left, nodes[2]).unwrap().to_owned(); + let right_peer_2 = find_peer(&right, nodes[2]).unwrap().to_owned(); + cluster.must_transfer_leader(left.get_id(), left_peer_2); + cluster.must_transfer_leader(right.get_id(), right_peer_2); + cluster.must_try_merge(left.get_id(), right.get_id()); + + // Makes the group lose its quorum. + cluster.stop_node(nodes[1]); + cluster.stop_node(nodes[2]); + { + let put = new_put_cmd(b"k2", b"v2"); + let req = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![put], + true, + ); + // marjority is lost, can't propose command successfully. + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); + } + + cluster.must_enter_force_leader(left.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + cluster.must_enter_force_leader(right.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + + // Construct recovery plan. + let mut plan = pdpb::RecoveryPlan::default(); + + let left_demote_peers: Vec = left + .get_peers() + .iter() + .filter(|&peer| peer.get_store_id() != nodes[0]) + .cloned() + .collect(); + let mut left_demote = pdpb::DemoteFailedVoters::default(); + left_demote.set_region_id(left.get_id()); + left_demote.set_failed_voters(left_demote_peers.into()); + let right_demote_peers: Vec = right + .get_peers() + .iter() + .filter(|&peer| peer.get_store_id() != nodes[0]) + .cloned() + .collect(); + let mut right_demote = pdpb::DemoteFailedVoters::default(); + right_demote.set_region_id(right.get_id()); + right_demote.set_failed_voters(right_demote_peers.into()); + plan.mut_demotes().push(left_demote); + plan.mut_demotes().push(right_demote); + + // Triggers the unsafe recovery plan execution. + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); + cluster.must_send_store_heartbeat(nodes[0]); + + let mut demoted = false; + for _ in 0..10 { + let new_left = block_on(pd_client.get_region_by_id(left.get_id())) + .unwrap() + .unwrap(); + let new_right = block_on(pd_client.get_region_by_id(right.get_id())) + .unwrap() + .unwrap(); + assert_eq!(new_left.get_peers().len(), 3); + assert_eq!(new_right.get_peers().len(), 3); + demoted = new_left + .get_peers() + .iter() + .filter(|peer| peer.get_store_id() != nodes[0]) + .all(|peer| peer.get_role() == metapb::PeerRole::Learner) + && new_right + .get_peers() + .iter() + .filter(|peer| peer.get_store_id() != nodes[0]) + .all(|peer| peer.get_role() == metapb::PeerRole::Learner); + if demoted { + break; + } + sleep_ms(100); + } + assert_eq!(demoted, true); + + fail::remove("on_schedule_merge_ret_err"); +} From e43a157c4a35034dfd705bdd94fac6d958e8a1ff Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 14 Sep 2023 16:10:39 +0800 Subject: [PATCH 047/203] resolved_ts: limit scanner memory usage (#15523) ref tikv/tikv#14864 * Break resolved ts scan entry into multiple tasks. * Limit concurrent resolved ts scan tasks. * Remove resolved ts dead code. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/endpoint.rs | 190 ++++++++---------- components/resolved_ts/src/metrics.rs | 5 + components/resolved_ts/src/resolver.rs | 27 ++- components/resolved_ts/src/scanner.rs | 222 +++++++++------------ src/config/mod.rs | 2 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 7 files changed, 198 insertions(+), 250 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 2a2f56eaadd0..34f00672fa72 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -5,15 +5,13 @@ use std::{ collections::HashMap, fmt, marker::PhantomData, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, MutexGuard, - }, + sync::{Arc, Mutex, MutexGuard}, time::Duration, }; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; +use futures::channel::oneshot::{channel, Receiver, Sender}; use grpcio::Environment; use kvproto::{kvrpcpb::LeaderInfo, metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; @@ -35,7 +33,7 @@ use tikv_util::{ warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; -use tokio::sync::Notify; +use tokio::sync::{Notify, Semaphore}; use txn_types::{Key, TimeStamp}; use crate::{ @@ -43,7 +41,7 @@ use crate::{ cmd::{ChangeLog, ChangeRow}, metrics::*, resolver::{LastAttempt, Resolver}, - scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, + scanner::{ScanEntries, ScanTask, ScannerPool}, Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, }; @@ -55,7 +53,7 @@ enum ResolverStatus { Pending { tracked_index: u64, locks: Vec, - cancelled: Arc, + cancelled: Option>, memory_quota: Arc, }, Ready, @@ -188,7 +186,12 @@ struct ObserveRegion { } impl ObserveRegion { - fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { + fn new( + meta: Region, + rrp: Arc, + memory_quota: Arc, + cancelled: Sender<()>, + ) -> Self { ObserveRegion { resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota.clone()), meta, @@ -196,7 +199,7 @@ impl ObserveRegion { resolver_status: ResolverStatus::Pending { tracked_index: 0, locks: vec![], - cancelled: Arc::new(AtomicBool::new(false)), + cancelled: Some(cancelled), memory_quota, }, } @@ -318,51 +321,45 @@ impl ObserveRegion { } /// Track locks in incoming scan entries. - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> Result<()> { - for es in entries { - match es { - ScanEntry::Lock(locks) => { - if let ResolverStatus::Ready = self.resolver_status { - panic!("region {:?} resolver has ready", self.meta.id) - } - for (key, lock) in locks { - self.resolver.track_lock( - lock.ts, - key.to_raw().unwrap(), - Some(apply_index), - )?; - } + fn track_scan_locks(&mut self, entries: ScanEntries, apply_index: u64) -> Result<()> { + match entries { + ScanEntries::Lock(locks) => { + if let ResolverStatus::Ready = self.resolver_status { + panic!("region {:?} resolver has ready", self.meta.id) } - ScanEntry::None => { - // Update the `tracked_index` to the snapshot's `apply_index` - self.resolver.update_tracked_index(apply_index); - let mut resolver_status = - std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); - let (pending_tracked_index, pending_locks) = - resolver_status.drain_pending_locks(self.meta.id); - for lock in pending_locks { - match lock { - PendingLock::Track { key, start_ts } => { - self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(pending_tracked_index), - )?; - } - PendingLock::Untrack { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), + for (key, lock) in locks { + self.resolver + .track_lock(lock.ts, key.to_raw().unwrap(), Some(apply_index))?; + } + } + ScanEntries::None => { + // Update the `tracked_index` to the snapshot's `apply_index` + self.resolver.update_tracked_index(apply_index); + let mut resolver_status = + std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); + let (pending_tracked_index, pending_locks) = + resolver_status.drain_pending_locks(self.meta.id); + for lock in pending_locks { + match lock { + PendingLock::Track { key, start_ts } => { + self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(pending_tracked_index), + )?; } + PendingLock::Untrack { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), } - info!( - "Resolver initialized"; - "region" => self.meta.id, - "observe_id" => ?self.handle.id, - "snapshot_index" => apply_index, - "pending_data_index" => pending_tracked_index, - ); } - ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), + info!( + "Resolver initialized"; + "region" => self.meta.id, + "observe_id" => ?self.handle.id, + "snapshot_index" => apply_index, + "pending_data_index" => pending_tracked_index, + ); } } Ok(()) @@ -378,6 +375,7 @@ pub struct Endpoint { region_read_progress: RegionReadProgressRegistry, regions: HashMap, scanner_pool: ScannerPool, + scan_concurrency_semaphore: Arc, scheduler: Scheduler, advance_worker: AdvanceTsWorker, _phantom: PhantomData<(T, E)>, @@ -442,10 +440,7 @@ where match &observed_region.resolver_status { ResolverStatus::Pending { locks, .. } => { for l in locks { - match l { - PendingLock::Track { key, .. } => stats.heap_size += key.len() as i64, - PendingLock::Untrack { key, .. } => stats.heap_size += key.len() as i64, - } + stats.heap_size += l.heap_size() as i64; } stats.unresolved_count += 1; } @@ -477,6 +472,7 @@ where RTS_ZERO_RESOLVED_TS.set(stats.zero_ts_count); RTS_LOCK_HEAP_BYTES_GAUGE.set(stats.resolver.heap_size); + RTS_LOCK_QUOTA_IN_USE_BYTES_GAUGE.set(self.memory_quota.in_use() as i64); RTS_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["resolved"]) .set(stats.resolver.resolved_count); @@ -678,6 +674,7 @@ where region_read_progress.clone(), store_resolver_gc_interval, ); + let scan_concurrency_semaphore = Arc::new(Semaphore::new(cfg.incremental_scan_concurrency)); let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), @@ -688,6 +685,7 @@ where region_read_progress, advance_worker, scanner_pool, + scan_concurrency_semaphore, regions: HashMap::default(), _phantom: PhantomData::default(), }; @@ -698,33 +696,28 @@ where fn register_region(&mut self, region: Region, backoff: Option) { let region_id = region.get_id(); assert!(self.regions.get(®ion_id).is_none()); - let observe_region = { - if let Some(read_progress) = self.region_read_progress.get(®ion_id) { - info!( - "register observe region"; - "region" => ?region - ); - ObserveRegion::new(region.clone(), read_progress, self.memory_quota.clone()) - } else { - warn!( - "try register unexit region"; - "region" => ?region, - ); - return; - } + let Some(read_progress) = self.region_read_progress.get(®ion_id) else { + warn!("try register nonexistent region"; "region" => ?region); + return; }; + info!("register observe region"; "region" => ?region); + let (cancelled_tx, cancelled_rx) = channel(); + let observe_region = ObserveRegion::new( + region.clone(), + read_progress, + self.memory_quota.clone(), + cancelled_tx, + ); let observe_handle = observe_region.handle.clone(); - let cancelled = match observe_region.resolver_status { - ResolverStatus::Pending { ref cancelled, .. } => cancelled.clone(), - ResolverStatus::Ready => panic!("resolved ts illeagal created observe region"), - }; observe_region .read_progress() .update_advance_resolved_ts_notify(self.advance_notify.clone()); self.regions.insert(region_id, observe_region); - let scan_task = self.build_scan_task(region, observe_handle, cancelled, backoff); - self.scanner_pool.spawn_task(scan_task); + let scan_task = self.build_scan_task(region, observe_handle, cancelled_rx, backoff); + let concurrency_semaphore = self.scan_concurrency_semaphore.clone(); + self.scanner_pool + .spawn_task(scan_task, concurrency_semaphore); RTS_SCAN_TASKS.with_label_values(&["total"]).inc(); } @@ -732,45 +725,17 @@ where &self, region: Region, observe_handle: ObserveHandle, - cancelled: Arc, + cancelled: Receiver<()>, backoff: Option, ) -> ScanTask { let scheduler = self.scheduler.clone(); - let scheduler_error = self.scheduler.clone(); - let region_id = region.id; - let observe_id = observe_handle.id; ScanTask { handle: observe_handle, - tag: String::new(), - mode: ScanMode::LockOnly, region, checkpoint_ts: TimeStamp::zero(), backoff, - is_cancelled: Box::new(move || cancelled.load(Ordering::Acquire)), - send_entries: Box::new(move |entries, apply_index| { - scheduler - .schedule(Task::ScanLocks { - region_id, - observe_id, - entries, - apply_index, - }) - .unwrap_or_else(|e| warn!("schedule resolved ts task failed"; "err" => ?e)); - RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); - }), - on_error: Some(Box::new(move |observe_id, _region, e| { - if let Err(e) = scheduler_error.schedule(Task::ReRegisterRegion { - region_id, - observe_id, - cause: e, - }) { - warn!("schedule re-register task failed"; - "region_id" => region_id, - "observe_id" => ?observe_id, - "error" => ?e); - } - RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); - })), + cancelled, + scheduler, } } @@ -778,7 +743,7 @@ where if let Some(observe_region) = self.regions.remove(®ion_id) { let ObserveRegion { handle, - resolver_status, + mut resolver_status, .. } = observe_region; @@ -791,8 +756,11 @@ where // Stop observing data handle.stop_observing(); // Stop scanning data - if let ResolverStatus::Pending { ref cancelled, .. } = resolver_status { - cancelled.store(true, Ordering::Release); + if let ResolverStatus::Pending { + ref mut cancelled, .. + } = resolver_status + { + let _ = cancelled.take(); } } else { debug!("deregister unregister region"; "region_id" => region_id); @@ -938,7 +906,7 @@ where &mut self, region_id: u64, observe_id: ObserveId, - entries: Vec, + entries: ScanEntries, apply_index: u64, ) { let mut memory_quota_exceeded = None; @@ -979,6 +947,8 @@ where self.advance_notify.notify_waiters(); self.memory_quota .set_capacity(self.cfg.memory_quota.0 as usize); + self.scan_concurrency_semaphore = + Arc::new(Semaphore::new(self.cfg.incremental_scan_concurrency)); info!( "resolved-ts config changed"; "prev" => prev, @@ -1047,7 +1017,7 @@ pub enum Task { ScanLocks { region_id: u64, observe_id: ObserveId, - entries: Vec, + entries: ScanEntries, apply_index: u64, }, ChangeConfig { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 02bb92f78878..fb751491d10a 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -104,6 +104,11 @@ lazy_static! { "Total bytes in memory of resolved-ts observed regions's lock heap" ) .unwrap(); + pub static ref RTS_LOCK_QUOTA_IN_USE_BYTES_GAUGE: IntGauge = register_int_gauge!( + "tikv_resolved_ts_memory_quota_in_use_bytes", + "Total bytes in memory of resolved-ts observed regions's lock heap" + ) + .unwrap(); pub static ref RTS_REGION_RESOLVE_STATUS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resolved_ts_region_resolve_status", "The status of resolved-ts observed regions", diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 9a62a0eea988..85e7acff4a4a 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -10,7 +10,7 @@ use tikv_util::{ }; use txn_types::{Key, TimeStamp}; -use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; +use crate::metrics::*; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB @@ -203,16 +203,23 @@ impl Resolver { // Return an approximate heap memory usage in bytes. pub fn approximate_heap_bytes(&self) -> usize { - // memory used by locks_by_key. - let memory_quota_in_use = self.memory_quota.in_use(); - - // memory used by lock_ts_heap. - let memory_lock_ts_heap = self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) - // memory used by HashSet> - + self.locks_by_key.len() * std::mem::size_of::>(); + if self.locks_by_key.is_empty() { + return 0; + } - memory_quota_in_use + memory_lock_ts_heap + const SAMPLE_COUNT: usize = 8; + let mut key_count = 0; + let mut key_bytes = 0; + for key in self.locks_by_key.keys() { + key_count += 1; + key_bytes += key.len(); + if key_count >= SAMPLE_COUNT { + break; + } + } + self.locks_by_key.len() * (key_bytes / key_count + std::mem::size_of::()) + + self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) } fn lock_heap_size(&self, key: &[u8]) -> usize { diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 615819db7993..6c8c90dc38f0 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -3,57 +3,79 @@ use std::{marker::PhantomData, sync::Arc, time::Duration}; use engine_traits::KvEngine; -use futures::compat::Future01CompatExt; -use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb::Region}; +use futures::{channel::oneshot::Receiver, compat::Future01CompatExt, FutureExt}; +use kvproto::metapb::Region; use raftstore::{ - coprocessor::{ObserveHandle, ObserveId}, + coprocessor::ObserveHandle, router::CdcHandle, store::{fsm::ChangeObserver, msg::Callback, RegionSnapshot}, }; use tikv::storage::{ kv::{ScanMode as MvccScanMode, Snapshot}, - mvcc::{DeltaScanner, MvccReader, ScannerBuilder}, - txn::{TxnEntry, TxnEntryScanner}, + mvcc::MvccReader, +}; +use tikv_util::{ + sys::thread::ThreadBuildWrapper, time::Instant, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, +}; +use tokio::{ + runtime::{Builder, Runtime}, + sync::Semaphore, }; -use tikv_util::{sys::thread::ThreadBuildWrapper, time::Instant, timer::GLOBAL_TIMER_HANDLE}; -use tokio::runtime::{Builder, Runtime}; use txn_types::{Key, Lock, LockType, TimeStamp}; use crate::{ errors::{Error, Result}, metrics::*, + Task, }; -const DEFAULT_SCAN_BATCH_SIZE: usize = 1024; +const DEFAULT_SCAN_BATCH_SIZE: usize = 128; const GET_SNAPSHOT_RETRY_TIME: u32 = 3; const GET_SNAPSHOT_RETRY_BACKOFF_STEP: Duration = Duration::from_millis(100); -pub type BeforeStartCallback = Box; -pub type OnErrorCallback = Box; -pub type OnEntriesCallback = Box, u64) + Send>; -pub type IsCancelledCallback = Box bool + Send>; - -pub enum ScanMode { - LockOnly, - All, - AllWithOldValue, -} - pub struct ScanTask { pub handle: ObserveHandle, - pub tag: String, - pub mode: ScanMode, pub region: Region, pub checkpoint_ts: TimeStamp, pub backoff: Option, - pub is_cancelled: IsCancelledCallback, - pub send_entries: OnEntriesCallback, - pub on_error: Option, + pub cancelled: Receiver<()>, + pub scheduler: Scheduler, +} + +impl ScanTask { + async fn send_entries(&self, entries: ScanEntries, apply_index: u64) { + let task = Task::ScanLocks { + region_id: self.region.get_id(), + observe_id: self.handle.id, + entries, + apply_index, + }; + if let Err(e) = self.scheduler.schedule(task) { + warn!("resolved_ts scheduler send entries failed"; "err" => ?e); + } + } + + fn is_cancelled(&mut self) -> bool { + matches!(self.cancelled.try_recv(), Err(_) | Ok(Some(_))) + } + + fn on_error(&self, err: Error) { + if let Err(e) = self.scheduler.schedule(Task::ReRegisterRegion { + region_id: self.region.get_id(), + observe_id: self.handle.id, + cause: err, + }) { + warn!("schedule re-register task failed"; + "region_id" => self.region.get_id(), + "observe_id" => ?self.handle.id, + "error" => ?e); + } + RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); + } } #[derive(Debug)] -pub enum ScanEntry { - TxnEntry(Vec), +pub enum ScanEntries { Lock(Vec<(Key, Lock)>), None, } @@ -82,109 +104,66 @@ impl, E: KvEngine> ScannerPool { } } - pub fn spawn_task(&self, mut task: ScanTask) { + pub fn spawn_task(&self, mut task: ScanTask, concurrency_semaphore: Arc) { let cdc_handle = self.cdc_handle.clone(); let fut = async move { + tikv_util::defer!({ + RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); + }); if let Some(backoff) = task.backoff { RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM.observe(backoff.as_secs_f64()); - if let Err(e) = GLOBAL_TIMER_HANDLE + let mut backoff = GLOBAL_TIMER_HANDLE .delay(std::time::Instant::now() + backoff) .compat() - .await - { - error!("failed to backoff"; "err" => ?e); + .fuse(); + futures::select! { + res = backoff => if let Err(e) = res { + error!("failed to backoff"; "err" => ?e); + }, + _ = &mut task.cancelled => {} } - if (task.is_cancelled)() { + if task.is_cancelled() { return; } } + let _permit = concurrency_semaphore.acquire().await; + if task.is_cancelled() { + return; + } + fail::fail_point!("resolved_ts_before_scanner_get_snapshot"); let snap = match Self::get_snapshot(&mut task, cdc_handle).await { Ok(snap) => snap, Err(e) => { warn!("resolved_ts scan get snapshot failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } + task.on_error(e); return; } }; fail::fail_point!("resolved_ts_after_scanner_get_snapshot"); let start = Instant::now(); let apply_index = snap.get_apply_index().unwrap(); - let mut entries = vec![]; - match task.mode { - ScanMode::All | ScanMode::AllWithOldValue => { - let txn_extra_op = if let ScanMode::AllWithOldValue = task.mode { - TxnExtraOp::ReadOldValue - } else { - TxnExtraOp::Noop - }; - let mut scanner = ScannerBuilder::new(snap, TimeStamp::max()) - .range(None, None) - .build_delta_scanner(task.checkpoint_ts, txn_extra_op) - .unwrap(); - let mut done = false; - while !done && !(task.is_cancelled)() { - let (es, has_remaining) = match Self::scan_delta(&mut scanner) { - Ok(rs) => rs, - Err(e) => { - warn!("resolved_ts scan delta failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } - return; - } - }; - done = !has_remaining; - entries.push(ScanEntry::TxnEntry(es)); - } - } - ScanMode::LockOnly => { - let mut reader = MvccReader::new(snap, Some(MvccScanMode::Forward), false); - let mut done = false; - let mut start = None; - while !done && !(task.is_cancelled)() { - let (locks, has_remaining) = - match Self::scan_locks(&mut reader, start.as_ref(), task.checkpoint_ts) - { - Ok(rs) => rs, - Err(e) => { - warn!("resolved_ts scan lock failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } - return; - } - }; - done = !has_remaining; - if has_remaining { - start = Some(locks.last().unwrap().0.clone()) + let mut reader = MvccReader::new(snap, Some(MvccScanMode::Forward), false); + let mut done = false; + let mut start_key = None; + while !done && !task.is_cancelled() { + let (locks, has_remaining) = + match Self::scan_locks(&mut reader, start_key.as_ref(), task.checkpoint_ts) { + Ok(rs) => rs, + Err(e) => { + warn!("resolved_ts scan lock failed"; "err" => ?e); + task.on_error(e); + return; } - entries.push(ScanEntry::Lock(locks)); - } + }; + done = !has_remaining; + if has_remaining { + start_key = Some(locks.last().unwrap().0.clone()) } + task.send_entries(ScanEntries::Lock(locks), apply_index) + .await; } - entries.push(ScanEntry::None); RTS_SCAN_DURATION_HISTOGRAM.observe(start.saturating_elapsed().as_secs_f64()); - (task.send_entries)(entries, apply_index); + task.send_entries(ScanEntries::None, apply_index).await; }; self.workers.spawn(fut); } @@ -196,18 +175,21 @@ impl, E: KvEngine> ScannerPool { let mut last_err = None; for retry_times in 0..=GET_SNAPSHOT_RETRY_TIME { if retry_times != 0 { - if let Err(e) = GLOBAL_TIMER_HANDLE + let mut backoff = GLOBAL_TIMER_HANDLE .delay( std::time::Instant::now() + GET_SNAPSHOT_RETRY_BACKOFF_STEP .mul_f64(10_f64.powi(retry_times as i32 - 1)), ) .compat() - .await - { - error!("failed to backoff"; "err" => ?e); + .fuse(); + futures::select! { + res = backoff => if let Err(e) = res { + error!("failed to backoff"; "err" => ?e); + }, + _ = &mut task.cancelled => {} } - if (task.is_cancelled)() { + if task.is_cancelled() { return Err(box_err!("scan task cancelled")); } } @@ -256,24 +238,4 @@ impl, E: KvEngine> ScannerPool { .map_err(|e| Error::Other(box_err!("{:?}", e)))?; Ok((locks, has_remaining)) } - - fn scan_delta(scanner: &mut DeltaScanner) -> Result<(Vec, bool)> { - let mut entries = Vec::with_capacity(DEFAULT_SCAN_BATCH_SIZE); - let mut has_remaining = true; - while entries.len() < entries.capacity() { - match scanner - .next_entry() - .map_err(|e| Error::Other(box_err!("{:?}", e)))? - { - Some(entry) => { - entries.push(entry); - } - None => { - has_remaining = false; - break; - } - } - } - Ok((entries, has_remaining)) - } } diff --git a/src/config/mod.rs b/src/config/mod.rs index be2a52d9b078..4f9a9a01b4a0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3000,6 +3000,7 @@ pub struct ResolvedTsConfig { #[online_config(skip)] pub scan_lock_pool_size: usize, pub memory_quota: ReadableSize, + pub incremental_scan_concurrency: usize, } impl ResolvedTsConfig { @@ -3021,6 +3022,7 @@ impl Default for ResolvedTsConfig { advance_ts_interval: ReadableDuration::secs(20), scan_lock_pool_size: 2, memory_quota: ReadableSize::mb(256), + incremental_scan_concurrency: 6, } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index c6e98e95c05b..c6f787df9a79 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -857,6 +857,7 @@ fn test_serde_custom_tikv_config() { advance_ts_interval: ReadableDuration::secs(5), scan_lock_pool_size: 1, memory_quota: ReadableSize::mb(1), + incremental_scan_concurrency: 7, }; value.causal_ts = CausalTsConfig { renew_interval: ReadableDuration::millis(100), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 653c3d2daef7..ece8cabae497 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -707,6 +707,7 @@ enable = true advance-ts-interval = "5s" scan-lock-pool-size = 1 memory-quota = "1MB" +incremental-scan-concurrency = 7 [split] detect-times = 10 From 32f58924b825230d159714db63bed344e913a58a Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 15 Sep 2023 13:16:39 +0800 Subject: [PATCH 048/203] *: update rust-toolchain (#15584) close tikv/tikv#15581 Signed-off-by: glorv --- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 1 + cmd/tikv-ctl/src/main.rs | 2 +- components/backup-stream/src/errors.rs | 4 +- .../backup-stream/src/metadata/client.rs | 5 +- components/backup-stream/src/router.rs | 4 +- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 4 +- components/backup/src/endpoint.rs | 6 +-- components/batch-system/src/fsm.rs | 8 +++- components/case_macros/src/lib.rs | 10 ++-- components/cdc/src/delegate.rs | 2 +- components/cdc/src/endpoint.rs | 6 +-- .../concurrency_manager/src/lock_table.rs | 4 +- components/coprocessor_plugin_api/src/util.rs | 4 ++ components/encryption/src/config.rs | 9 +--- components/engine_rocks/src/logger.rs | 2 - components/engine_rocks/src/properties.rs | 15 +++--- .../engine_tirocks/src/properties/mvcc.rs | 2 +- .../engine_tirocks/src/properties/range.rs | 10 ++-- components/engine_traits/src/flush.rs | 2 +- components/engine_traits/src/lib.rs | 4 +- components/engine_traits/src/tablet.rs | 2 +- .../online_config_derive/src/lib.rs | 14 ++---- components/raftstore-v2/src/batch/store.rs | 6 ++- components/raftstore-v2/src/lib.rs | 1 + .../operation/command/admin/merge/prepare.rs | 4 +- .../src/operation/command/admin/split.rs | 4 +- .../command/admin/transfer_leader.rs | 20 ++++---- components/raftstore-v2/src/operation/life.rs | 8 +++- .../raftstore-v2/src/operation/query/local.rs | 4 +- .../src/operation/ready/apply_trace.rs | 2 +- .../src/operation/ready/snapshot.rs | 14 +++--- .../raftstore-v2/src/operation/txn_ext.rs | 4 +- .../src/operation/unsafe_recovery/demote.rs | 5 +- .../src/worker/cleanup/compact.rs | 16 +++++-- .../raftstore-v2/src/worker/pd/region.rs | 15 ++---- .../raftstore-v2/src/worker/pd/split.rs | 6 ++- components/raftstore-v2/src/worker/tablet.rs | 13 ++++- .../tests/integrations/cluster.rs | 4 +- .../raftstore/src/coprocessor/dispatcher.rs | 5 +- components/raftstore/src/errors.rs | 2 +- components/raftstore/src/lib.rs | 4 +- .../raftstore/src/store/async_io/write.rs | 6 ++- .../raftstore/src/store/entry_storage.rs | 8 ++-- components/raftstore/src/store/fsm/apply.rs | 18 +++---- components/raftstore/src/store/fsm/peer.rs | 8 ++-- components/raftstore/src/store/msg.rs | 24 +++++++--- components/raftstore/src/store/peer.rs | 48 +++++++++---------- .../raftstore/src/store/peer_storage.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 6 +-- .../raftstore/src/store/simple_write.rs | 24 +++++++--- components/raftstore/src/store/snap.rs | 4 +- components/raftstore/src/store/snap/io.rs | 4 +- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 3 +- components/raftstore/src/store/worker/pd.rs | 20 +++----- components/raftstore/src/store/worker/read.rs | 3 +- .../raftstore/src/store/worker/region.rs | 4 +- .../raftstore/src/store/worker/split_check.rs | 8 ++-- .../src/store/worker/split_controller.rs | 11 ++--- components/resolved_ts/src/cmd.rs | 6 +-- components/resolved_ts/src/endpoint.rs | 20 ++++---- components/resolved_ts/src/scanner.rs | 3 +- .../resource_control/src/resource_group.rs | 4 +- components/resource_metering/src/lib.rs | 2 +- components/resource_metering/src/model.rs | 2 +- .../src/recorder/sub_recorder/cpu.rs | 4 +- .../resource_metering/tests/recorder_test.rs | 12 ++--- components/server/src/common.rs | 4 +- components/snap_recovery/src/leader_keeper.rs | 4 +- components/sst_importer/src/import_mode2.rs | 2 +- components/sst_importer/src/sst_importer.rs | 17 +++---- components/sst_importer/src/util.rs | 3 +- components/test_coprocessor/src/store.rs | 2 +- .../example_plugin/src/lib.rs | 2 +- components/test_pd/src/server.rs | 8 +--- components/test_pd_client/src/pd.rs | 2 +- components/test_raftstore-v2/src/cluster.rs | 3 +- components/test_raftstore-v2/src/lib.rs | 2 + components/test_raftstore-v2/src/node.rs | 2 +- components/test_raftstore-v2/src/server.rs | 14 +++++- components/test_raftstore/src/lib.rs | 2 + components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 8 +++- .../tidb_query_codegen/src/rpn_function.rs | 35 +++++++------- .../src/codec/collation/mod.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 12 ++--- .../src/codec/data_type/mod.rs | 2 +- .../src/codec/data_type/scalar.rs | 17 ++++--- .../tidb_query_datatype/src/codec/datum.rs | 8 ++-- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 4 +- .../src/codec/mysql/json/comparison.rs | 4 +- .../src/codec/mysql/json/jcodec.rs | 8 ++-- .../src/codec/mysql/json/json_modify.rs | 2 +- .../src/codec/mysql/time/mod.rs | 10 ++-- .../src/codec/row/v2/row_slice.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../src/index_scan_executor.rs | 4 +- components/tidb_query_executors/src/runner.rs | 18 +++---- .../src/selection_executor.rs | 4 +- .../src/util/aggr_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- components/tidb_query_expr/src/impl_cast.rs | 2 +- .../tidb_query_expr/src/impl_miscellaneous.rs | 5 +- components/tidb_query_expr/src/impl_string.rs | 6 +-- components/tidb_query_expr/src/lib.rs | 2 + .../tidb_query_expr/src/types/expr_eval.rs | 11 ++--- components/tikv_kv/src/cursor.rs | 2 +- components/tikv_kv/src/lib.rs | 1 + components/tikv_util/src/logger/formatter.rs | 6 +-- components/tikv_util/src/lru.rs | 2 +- components/tikv_util/src/memory.rs | 2 +- .../src/metrics/allocator_metrics.rs | 2 +- components/tikv_util/src/mpsc/future.rs | 2 + components/tikv_util/src/sys/cpu_time.rs | 2 +- components/tikv_util/src/timer.rs | 4 +- components/txn_types/src/timestamp.rs | 10 +--- components/txn_types/src/types.rs | 18 ++----- rust-toolchain | 2 +- src/config/mod.rs | 20 ++++---- src/coprocessor/metrics.rs | 2 +- src/coprocessor/mod.rs | 2 + src/import/sst_service.rs | 6 +-- src/lib.rs | 3 +- src/server/debug2.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 1 + src/server/gc_worker/gc_manager.rs | 8 ++-- src/server/gc_worker/gc_worker.rs | 14 ++---- src/server/lock_manager/deadlock.rs | 9 +--- src/server/raftkv/mod.rs | 5 +- src/server/raftkv2/mod.rs | 4 +- src/server/raftkv2/node.rs | 4 +- src/server/service/debug.rs | 1 - src/server/service/diagnostics/log.rs | 18 ++++--- src/server/service/diagnostics/sys.rs | 2 +- src/server/service/kv.rs | 1 - src/storage/lock_manager/lock_wait_context.rs | 12 ++--- .../lock_manager/lock_waiting_queue.rs | 7 +-- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 32 ++++++------- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 21 ++++---- src/storage/mvcc/reader/scanner/forward.rs | 4 +- src/storage/raw/raw_mvcc.rs | 2 +- src/storage/txn/actions/prewrite.rs | 2 - src/storage/txn/commands/atomic_store.rs | 4 +- src/storage/txn/commands/prewrite.rs | 26 +++++----- src/storage/txn/latch.rs | 20 ++++---- src/storage/txn/sched_pool.rs | 2 +- .../benches/coprocessor_executors/util/mod.rs | 2 +- tests/benches/hierarchy/mvcc/mod.rs | 2 +- .../misc/coprocessor/codec/chunk/chunk.rs | 2 +- tests/benches/misc/raftkv/mod.rs | 2 + tests/benches/raftstore/mod.rs | 2 +- tests/failpoints/cases/mod.rs | 3 ++ tests/failpoints/cases/test_disk_full.rs | 8 ++-- tests/failpoints/cases/test_engine.rs | 1 + tests/failpoints/cases/test_hibernate.rs | 1 + tests/failpoints/cases/test_pd_client.rs | 1 + .../failpoints/cases/test_pd_client_legacy.rs | 1 + tests/failpoints/cases/test_rawkv.rs | 2 +- .../cases/test_read_execution_tracker.rs | 11 +++-- tests/failpoints/cases/test_split_region.rs | 3 +- tests/failpoints/cases/test_storage.rs | 4 +- tests/failpoints/cases/test_transaction.rs | 2 +- .../failpoints/cases/test_transfer_leader.rs | 4 +- tests/integrations/backup/mod.rs | 1 + tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/mod.rs | 2 + .../integrations/raftstore/test_bootstrap.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 4 +- tests/integrations/raftstore/test_stats.rs | 1 + 173 files changed, 584 insertions(+), 534 deletions(-) diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index ef3ae7f80232..d1a917f5624b 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -265,6 +265,7 @@ where .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) } +#[allow(clippy::permissions_set_readonly_false)] fn add_write_permission>(path: P) -> Result<(), String> { let path = path.as_ref(); let mut pmt = std::fs::metadata(path) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6baa1fe6c39d..c1ab11cc507d 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(once_cell)] #![feature(let_chains)] +#![feature(lazy_cell)] #[macro_use] extern crate log; diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index c3cc91da9ff5..cc720d5aecc1 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -158,7 +158,7 @@ where /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. -#[macro_export(crate)] +#[macro_export] macro_rules! annotate { ($inner: expr, $message: expr) => { { @@ -242,6 +242,7 @@ mod test { #[bench] // 2,685 ns/iter (+/- 194) + #[allow(clippy::unnecessary_literal_unwrap)] fn contextual_add_format_strings_directly(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( @@ -305,6 +306,7 @@ mod test { #[bench] // 773 ns/iter (+/- 8) + #[allow(clippy::unnecessary_literal_unwrap)] fn baseline(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 1fdc1b3b1e8d..df8f0f025b19 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -663,11 +663,10 @@ impl MetadataClient { let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; - let cp = match global_cp { + match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, - }; - cp + } } _ => Checkpoint::from_kv(&r[0])?, }; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index b0d3453c958d..6ce8486109f7 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -950,7 +950,9 @@ impl StreamTaskInfo { .last_flush_time .swap(Box::into_raw(Box::new(Instant::now())), Ordering::SeqCst); // manual gc last instant - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } } pub fn should_flush(&self, flush_interval: &Duration) -> bool { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 4f44ec46853a..0803ba1b99a6 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -309,7 +309,7 @@ impl SubscriptionTracer { } }; - let mut subscription = sub.value_mut(); + let subscription = sub.value_mut(); let old_epoch = subscription.meta.get_region_epoch(); let new_epoch = new_region.get_region_epoch(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 1b150eaa1f0b..52b6f0e93914 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -342,7 +342,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. -#[macro_export(crate)] +#[macro_export] macro_rules! try_send { ($s:expr, $task:expr) => { match $s.schedule($task) { @@ -366,7 +366,7 @@ macro_rules! try_send { /// `backup_stream_debug`. because once we enable debug log for all crates, it /// would soon get too verbose to read. using this macro now we can enable debug /// log level for the crate only (even compile time...). -#[macro_export(crate)] +#[macro_export] macro_rules! debug { ($($t: tt)+) => { if cfg!(feature = "backup-stream-debug") { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index a4efc162092c..d6330f499667 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -2493,8 +2493,8 @@ pub mod tests { fn test_backup_file_name() { let region = metapb::Region::default(); let store_id = 1; - let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; - let test_target = vec![ + let test_cases = ["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = [ "1/0_0_000", "1/0_0_000", "1_0_0_000", @@ -2513,7 +2513,7 @@ pub mod tests { assert_eq!(target.to_string(), prefix_arr.join(delimiter)); } - let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + let test_target = ["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { let key = None; let filename = backup_file_name(store_id, ®ion, key, storage_name); diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 3fa5ad15a640..16113dde8e2a 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -149,7 +149,9 @@ impl FsmState { Ok(_) => return, Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } return; } Err(s) => s, @@ -179,7 +181,9 @@ impl Drop for FsmState { fn drop(&mut self) { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } } self.state_cnt.fetch_sub(1, Ordering::Relaxed); } diff --git a/components/case_macros/src/lib.rs b/components/case_macros/src/lib.rs index 057b68065d2f..b779373a59d2 100644 --- a/components/case_macros/src/lib.rs +++ b/components/case_macros/src/lib.rs @@ -5,12 +5,12 @@ use proc_macro::{Group, Literal, TokenStream, TokenTree}; macro_rules! transform_idents_in_stream_to_string { - ($stream:ident, $transform:expr) => { + ($stream:ident, $transform:ident) => { $stream .into_iter() .map(|token_tree| match token_tree { TokenTree::Ident(ref ident) => { - Literal::string(&$transform(ident.to_string())).into() + Literal::string(&$transform(&ident.to_string())).into() } // find all idents in `TokenGroup` apply and reconstruct the group TokenTree::Group(ref group) => TokenTree::Group(Group::new( @@ -20,7 +20,7 @@ macro_rules! transform_idents_in_stream_to_string { .into_iter() .map(|group_token_tree| { if let TokenTree::Ident(ref ident) = group_token_tree { - Literal::string(&$transform(ident.to_string())).into() + Literal::string(&$transform(&ident.to_string())).into() } else { group_token_tree } @@ -53,7 +53,7 @@ fn to_snake(s: &str) -> String { /// e.g. `HelloWorld` -> `hello-world` #[proc_macro] pub fn kebab_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_kebab(&s)) + transform_idents_in_stream_to_string!(stream, to_kebab) } /// Expands idents in the input stream as snake-case string literal @@ -61,5 +61,5 @@ pub fn kebab_case(stream: TokenStream) -> TokenStream { /// e.g. `HelloWorld` -> `hello_world` #[proc_macro] pub fn snake_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_snake(&s)) + transform_idents_in_stream_to_string!(stream, to_snake) } diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index c82c4cb6f13e..18528fd08e94 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1437,7 +1437,7 @@ mod tests { #[test] fn test_observed_range() { - for case in vec![ + for case in [ (b"".as_slice(), b"".as_slice(), false), (b"a", b"", false), (b"", b"b", false), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index a5f00a08028f..9d5601eba84d 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1015,10 +1015,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint( - &'m self, + pub fn find_first( + &self, start_key: Option<&Key>, end_key: Option<&Key>, mut pred: impl FnMut(Arc) -> Option, diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 31d75610d75b..06e8847402f1 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -19,10 +19,14 @@ pub type PluginConstructorSignature = /// Type signature of the exported function with symbol /// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. +// emit this warn because to fix it need to change the data type which is a breaking change. +#[allow(improper_ctypes_definitions)] pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; /// Type signature of the exported function with symbol /// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. +// emit this warn because to fix it need to change the data type which is a breaking change. +#[allow(improper_ctypes_definitions)] pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// Automatically collected build information about the plugin that is exposed diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 23e049e0df42..4455e4ce7cc9 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -134,11 +134,12 @@ impl KmsConfig { } } -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if // encryption is enabled. (i.e. when encryption_config.method != Plaintext). + #[default] Plaintext, // Pass master key from a file, with key encoded as a readable hex string. The file should end @@ -156,12 +157,6 @@ pub enum MasterKeyConfig { }, } -impl Default for MasterKeyConfig { - fn default() -> Self { - MasterKeyConfig::Plaintext - } -} - mod encryption_method_serde { use std::fmt; diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index 85f4de713acc..185411dcacfb 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -3,7 +3,6 @@ use rocksdb::{DBInfoLogLevel as InfoLogLevel, Logger}; use tikv_util::{crit, debug, error, info, warn}; // TODO(yiwu): abstract the Logger interface. -#[derive(Default)] pub struct RocksdbLogger; impl Logger for RocksdbLogger { @@ -44,7 +43,6 @@ impl Logger for TabletLogger { } } -#[derive(Default)] pub struct RaftDbLogger; impl Logger for RaftDbLogger { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 87ccab9e5ab4..700d7621dc64 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -144,10 +144,7 @@ pub struct RangeProperties { impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { - let idx = self - .offsets - .binary_search_by_key(&key, |&(ref k, _)| k) - .unwrap(); + let idx = self.offsets.binary_search_by_key(&key, |(k, _)| k).unwrap(); &self.offsets[idx].1 } @@ -205,11 +202,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -227,7 +224,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |&(ref k, _)| k) + .binary_search_by_key(&start_key, |(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -239,7 +236,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; @@ -869,7 +866,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(KeyMode::Txn); b.iter(|| { - for &(ref k, ref v) in &entries { + for (k, v) in &entries { collector.add(k, v, DBEntryType::Put, 0, 0); } }); diff --git a/components/engine_tirocks/src/properties/mvcc.rs b/components/engine_tirocks/src/properties/mvcc.rs index 1ca170f33d58..66c96284ea3e 100644 --- a/components/engine_tirocks/src/properties/mvcc.rs +++ b/components/engine_tirocks/src/properties/mvcc.rs @@ -356,7 +356,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); b.iter(|| { - for &(ref k, ref v) in &entries { + for (k, v) in &entries { collector.add(k, v, EntryType::kEntryPut, 0, 0).unwrap(); } }); diff --git a/components/engine_tirocks/src/properties/range.rs b/components/engine_tirocks/src/properties/range.rs index 59b9e68a6bbb..e8a3411b02f9 100644 --- a/components/engine_tirocks/src/properties/range.rs +++ b/components/engine_tirocks/src/properties/range.rs @@ -53,7 +53,7 @@ impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { let idx = self .offsets - .binary_search_by_key(&key, |&(ref k, _)| k) + .binary_search_by_key(&key, |(k, _)| k) .unwrap(); &self.offsets[idx].1 } @@ -112,11 +112,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -134,7 +134,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |&(ref k, _)| k) + .binary_search_by_key(&start_key, |(k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -146,7 +146,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |(k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 8590236e1265..6449399cef8b 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -119,7 +119,7 @@ impl SstApplyState { for sst in ssts { let cf_index = data_cf_offset(sst.get_cf_name()); if let Some(metas) = sst_list.get_mut(cf_index) { - metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); + let _ = metas.extract_if(|entry| entry.sst.get_uuid() == sst.get_uuid()); } } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index e09b1b52733d..0f89776e7fdb 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,8 +253,8 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] -#![feature(str_split_as_str)] -#![feature(drain_filter)] +#![feature(str_split_remainder)] +#![feature(extract_if)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index c88f1548513f..64e6dcbd4b45 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -241,7 +241,7 @@ impl TabletRegistry { let mut parts = name.rsplit('_'); let suffix = parts.next()?.parse().ok()?; let id = parts.next()?.parse().ok()?; - let prefix = parts.as_str(); + let prefix = parts.remainder().unwrap_or(""); Some((prefix, id, suffix)) } diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index bb37aad5924b..e48a540c6b80 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -330,15 +330,11 @@ fn is_option_type(ty: &Type) -> bool { // TODO store (with lazy static) the vec of string // TODO maybe optimization, reverse the order of segments fn extract_option_segment(path: &Path) -> Option<&PathSegment> { - let idents_of_path = path - .segments - .iter() - .into_iter() - .fold(String::new(), |mut acc, v| { - acc.push_str(&v.ident.to_string()); - acc.push('|'); - acc - }); + let idents_of_path = path.segments.iter().fold(String::new(), |mut acc, v| { + acc.push_str(&v.ident.to_string()); + acc.push('|'); + acc + }); vec!["Option|", "std|option|Option|", "core|option|Option|"] .into_iter() .find(|s| idents_of_path == *s) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 4c142a43abfa..5f036c610206 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -489,7 +489,11 @@ impl StorePollerBuilder { self.remove_dir(&path)?; continue; } - let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; + let Some((prefix, region_id, tablet_index)) = + self.tablet_registry.parse_tablet_name(&path) + else { + continue; + }; if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 5b5e132b9ceb..697d0525169a 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -27,6 +27,7 @@ #![feature(box_into_inner)] #![feature(assert_matches)] #![feature(option_get_or_insert_default)] +#![allow(clippy::needless_pass_by_ref_mut)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index d3d1896287c4..76b71a8906c2 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -343,7 +343,9 @@ impl Peer { entry.get_data(), entry.get_index(), entry.get_term(), - ) else { continue }; + ) else { + continue; + }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0f9cae7218df..2fe2b4b57356 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -1098,7 +1098,9 @@ mod test { } } - let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { + panic!() + }; // update cache let mut cache = apply.tablet_registry().get(parent_id).unwrap(); cache.set(*tablet.downcast().unwrap()); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 4cdeba3bc411..f60b9828bbb9 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -50,21 +50,21 @@ impl Peer { /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. execute_transfer_leader on follower - /// If follower passes all necessary checks, it will reply an - /// ACK with type MsgTransferLeader and its promised applied index. - /// 3. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. execute_transfer_leader on follower If follower passes all necessary + /// checks, it will reply an ACK with type MsgTransferLeader and its + /// promised applied index. + /// 3. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. The follower applies the TransferLeader command and replies an ACK + /// with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// /// See also: tikv/rfcs#37. pub fn propose_transfer_leader( diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8fe1d2a07b3d..395774e17f13 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -637,8 +637,12 @@ impl Peer { let check_peer_id = check.get_check_peer().get_id(); let records = self.storage().region_state().get_merged_records(); let Some(record) = records.iter().find(|r| { - r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) - }) else { return }; + r.get_source_peers() + .iter() + .any(|p| p.get_id() == check_peer_id) + }) else { + return; + }; let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { let source_checkpoint = super::merge_source_path( diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 2f074fdc04df..5f6d589eca6d 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -345,7 +345,9 @@ where match fut.await? { Some(query_res) => { if query_res.read().is_none() { - let QueryResult::Response(res) = query_res else { unreachable!() }; + let QueryResult::Response(res) = query_res else { + unreachable!() + }; // Get an error explicitly in header, // or leader reports KeyIsLocked error via read index. assert!( diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index af0257e763f1..2b6c9c666e6f 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -774,7 +774,7 @@ impl Peer { flushed = true; let flush_state = self.flush_state().clone(); - let mut apply_trace = self.storage_mut().apply_trace_mut(); + let apply_trace = self.storage_mut().apply_trace_mut(); let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 9e0ed449cef5..15caf5f0c847 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -570,10 +570,9 @@ impl Storage { pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { let mut states = self.snap_states.borrow_mut(); states.retain(|id, state| { - let SnapState::Generating { - ref index, - .. - } = *state else { return true; }; + let SnapState::Generating { ref index, .. } = *state else { + return true; + }; let snap_index = index.load(Ordering::SeqCst); if snap_index == 0 || compact_to <= snap_index + 1 { return true; @@ -600,10 +599,9 @@ impl Storage { } let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { - let SnapState::Generating { - ref index, - .. - } = *state else { return false }; + let SnapState::Generating { ref index, .. } = *state else { + return false; + }; if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( self.logger(), diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 272b2526b392..6c3a9269a7f8 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -266,7 +266,9 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else { + unreachable!() + }; self.on_simple_write(ctx, write.header, write.data, write.ch); true } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 37962a454527..e7b3c8e62b8c 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -100,7 +100,10 @@ impl Peer { failed_voters, target_index, demote_after_exit, - }) = self.unsafe_recovery_state() else { return }; + }) = self.unsafe_recovery_state() + else { + return; + }; if self.raft_group().raft.raft_log.applied < *target_index { return; diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index 7acdb943b917..feb519a04add 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -97,8 +97,12 @@ where ) { Ok(mut region_ids) => { for region_id in region_ids.drain(..) { - let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; for cf in &cf_names { if let Err(e) = tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) @@ -143,8 +147,12 @@ fn collect_regions_to_compact( ); let mut regions_to_compact = vec![]; for id in region_ids { - let Some(mut tablet_cache) = reg.get(id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = reg.get(id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; if tablet.auto_compactions_is_disabled().expect("cf") { info!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 763e12fff072..999eccb49629 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -113,10 +113,7 @@ where let approximate_keys = task.approximate_keys.unwrap_or_default(); let region_id = task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; @@ -373,10 +370,7 @@ where pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -398,10 +392,7 @@ where pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 7fec5a31bb60..7bafb6c442ad 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -142,8 +142,10 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await + else { + continue; + }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::ask_batch_split_imp( diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 7c330353836e..0b0429eb8d17 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -467,7 +467,8 @@ impl Runner { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) else { + .map(|mut cache| cache.latest().cloned()) + else { warn!( self.logger, "flush memtable failed to acquire tablet"; @@ -555,7 +556,15 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { + let Task::DeleteRange { + region_id, + tablet, + cf, + start_key, + end_key, + cb, + } = delete_range + else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 5b3cc5feb930..a949725090d2 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -137,7 +137,9 @@ impl TestRouter { match res { Ok(_) => return block_on(sub.result()).is_some(), Err(TrySendError::Disconnected(m)) => { - let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; + let PeerMsg::WaitFlush(ch) = m else { + unreachable!() + }; match self .store_router() .send_control(StoreMsg::WaitFlush { region_id, ch }) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d082013cd2c5..756b7dc399e9 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -471,10 +471,7 @@ impl CoprocessorHost { BoxSplitCheckObserver::new(KeysCheckObserver::new(ch)), ); registry.register_split_check_observer(100, BoxSplitCheckObserver::new(HalfCheckObserver)); - registry.register_split_check_observer( - 400, - BoxSplitCheckObserver::new(TableCheckObserver::default()), - ); + registry.register_split_check_observer(400, BoxSplitCheckObserver::new(TableCheckObserver)); registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index d1597a77121e..6cf83a6cf846 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -223,7 +223,7 @@ impl From for errorpb::Error { .mut_proposal_in_merging_mode() .set_region_id(region_id); } - Error::Transport(reason) if reason == DiscardReason::Full => { + Error::Transport(DiscardReason::Full) => { let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(RAFTSTORE_IS_BUSY.to_owned()); errorpb.set_server_is_busy(server_is_busy_err); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 1db5f79d2268..197eaefeac78 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -5,11 +5,13 @@ #![feature(div_duration)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] #![feature(let_chains)] #![feature(assert_matches)] #![feature(type_alias_impl_trait)] +#![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] +#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(test)] extern crate test; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index eedd5052bbbb..12617bc28a24 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -419,7 +419,11 @@ where } self.state_size = 0; if let ExtraBatchWrite::V2(_) = self.extra_batch_write { - let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; + let ExtraBatchWrite::V2(lb) = + mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) + else { + unreachable!() + }; wb.merge(lb).unwrap(); } } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index c91c68538dd6..95f099f77a77 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1338,14 +1338,14 @@ pub mod tests { // Test the initial data structure size. let (tx, rx) = mpsc::sync_channel(8); let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 896); + assert_eq!(rx.try_recv().unwrap(), 0); cache.append( 0, 0, &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], ); - assert_eq!(rx.try_recv().unwrap(), 3); + assert_eq!(rx.try_recv().unwrap(), 419); cache.prepend(vec![new_padded_entry(100, 1, 1)]); assert_eq!(rx.try_recv().unwrap(), 1); @@ -1371,7 +1371,7 @@ pub mod tests { // Test trace a dangle entry. let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 1); + assert_eq!(rx.try_recv().unwrap(), 97); // Test trace an entry which is still in cache. let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); @@ -1398,7 +1398,7 @@ pub mod tests { assert_eq!(rx.try_recv().unwrap(), -7); drop(cache); - assert_eq!(rx.try_recv().unwrap(), -896); + assert_eq!(rx.try_recv().unwrap(), -512); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c170e5a35f98..406c8d79d18c 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1262,9 +1262,9 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, - // it will also propose an empty entry. But that entry will not contain - // any associated callback. So no need to clear callback. + // 2. When a leader tries to read index during transferring leader, it will also + // propose an empty entry. But that entry will not contain any associated + // callback. So no need to clear callback. while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { if let Some(cb) = cmd.cb.take() { apply_ctx @@ -4787,12 +4787,12 @@ where // command may not read the writes of previous commands and break ACID. If // it's still leader, there are two possibility that mailbox is closed: // 1. The process is shutting down. - // 2. The leader is destroyed. A leader won't propose to destroy itself, so - // it should either destroyed by older leaders or newer leaders. Leader - // won't respond to read until it has applied to current term, so no - // command will be proposed until command from older leaders have applied, - // which will then stop it from accepting proposals. If the command is - // proposed by new leader, then it won't be able to propose new proposals. + // 2. The leader is destroyed. A leader won't propose to destroy itself, so it + // should either destroyed by older leaders or newer leaders. Leader won't + // respond to read until it has applied to current term, so no command will + // be proposed until command from older leaders have applied, which will then + // stop it from accepting proposals. If the command is proposed by new + // leader, then it won't be able to propose new proposals. // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index d61e67842952..36c4c7e8e5fb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1015,10 +1015,10 @@ where // in snapshot recovery after we stopped all conf changes from PD. // if the follower slow than leader and has the pending conf change. // that's means - // 1. if the follower didn't finished the conf change - // => it cannot be chosen to be leader during recovery. - // 2. if the follower has been chosen to be leader - // => it already apply the pending conf change already. + // 1. if the follower didn't finished the conf change => it cannot be chosen to + // be leader during recovery. + // 2. if the follower has been chosen to be leader => it already apply the + // pending conf change already. return; } debug!( diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 64c5be6d7e15..a858b5afddde 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -169,19 +169,25 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { return false; }; + let Callback::Write { proposed_cb, .. } = self else { + return false; + }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { return; }; + let Callback::Write { proposed_cb, .. } = self else { + return; + }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { return; }; + let Callback::Write { committed_cb, .. } = self else { + return; + }; if let Some(cb) = committed_cb.take() { cb(); } @@ -195,12 +201,16 @@ where } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { return None; }; + let Callback::Write { proposed_cb, .. } = self else { + return None; + }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { return None; }; + let Callback::Write { committed_cb, .. } = self else { + return None; + }; committed_cb.take() } } @@ -258,7 +268,9 @@ impl ReadCallback for Callback { } fn read_tracker(&self) -> Option { - let Callback::Read { tracker, .. } = self else { return None; }; + let Callback::Read { tracker, .. } = self else { + return None; + }; Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8ef857bfa129..aafd2f9695b9 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2314,14 +2314,14 @@ where CheckApplyingSnapStatus::Applying => { // If this peer is applying snapshot, we should not get a new ready. // There are two reasons in my opinion: - // 1. If we handle a new ready and persist the data(e.g. entries), - // we can not tell raft-rs that this ready has been persisted because - // the ready need to be persisted one by one from raft-rs's view. - // 2. When this peer is applying snapshot, the response msg should not - // be sent to leader, thus the leader will not send new entries to - // this peer. Although it's possible a new leader may send a AppendEntries - // msg to this peer, this possibility is very low. In most cases, there - // is no msg need to be handled. + // 1. If we handle a new ready and persist the data(e.g. entries), we can not + // tell raft-rs that this ready has been persisted because the ready need + // to be persisted one by one from raft-rs's view. + // 2. When this peer is applying snapshot, the response msg should not be sent + // to leader, thus the leader will not send new entries to this peer. + // Although it's possible a new leader may send a AppendEntries msg to this + // peer, this possibility is very low. In most cases, there is no msg need + // to be handled. // So we choose to not get a new ready which makes the logic more clear. debug!( "still applying snapshot, skip further handling"; @@ -4467,27 +4467,25 @@ where /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. pre_ack_transfer_leader_msg on follower: - /// If follower passes all necessary checks, it will try to warmup - /// the entry cache. - /// 3. ack_transfer_leader_msg on follower: - /// When the entry cache has been warmed up or the operator is timeout, - /// the follower reply an ACK with type MsgTransferLeader and - /// its promised persistent index. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. pre_ack_transfer_leader_msg on follower: If follower passes all + /// necessary checks, it will try to warmup the entry cache. + /// 3. ack_transfer_leader_msg on follower: When the entry cache has been + /// warmed up or the operator is timeout, the follower reply an ACK with + /// type MsgTransferLeader and its promised persistent index. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. ack_transfer_leader_msg on follower again: - /// The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. ack_transfer_leader_msg on follower again: The follower applies + /// the TransferLeader command and replies an ACK with special context + /// TRANSFER_LEADER_COMMAND_REPLY_CTX. /// - /// 4. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 4. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// See also: tikv/rfcs#37. fn propose_transfer_leader( @@ -5820,7 +5818,7 @@ mod tests { admin_req.clear_transfer_leader(); req.clear_admin_request(); - for (op, policy) in vec![ + for (op, policy) in [ (CmdType::Get, RequestPolicy::ReadLocal), (CmdType::Snap, RequestPolicy::ReadLocal), (CmdType::Put, RequestPolicy::ProposeNormal), @@ -5973,7 +5971,7 @@ mod tests { // (1, 4) and (1, 5) is not committed let entries = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)]; - let committed = vec![(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; + let committed = [(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; for (index, term) in entries.clone() { if term != 1 { continue; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a888929ca985..1556338e9c0c 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -96,7 +96,7 @@ impl PartialEq for SnapState { (&SnapState::Relax, &SnapState::Relax) | (&SnapState::ApplyAborted, &SnapState::ApplyAborted) | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (&SnapState::Applying(ref b1), &SnapState::Applying(ref b2)) => { + (SnapState::Applying(b1), SnapState::Applying(b2)) => { b1.load(Ordering::Relaxed) == b2.load(Ordering::Relaxed) } _ => false, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index bc22dfbf5866..40168707f6ab 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -438,7 +438,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engines.kv.put(&data_key(k), v).unwrap(); } let store = new_peer_storage(engines, &r); @@ -482,11 +482,11 @@ mod tests { let mut data = vec![]; { let db = &engines.kv; - for &(ref k, level) in &levels { + for (k, level) in &levels { db.put(&data_key(k), k).unwrap(); db.flush_cfs(&[], true).unwrap(); data.push((k.to_vec(), k.to_vec())); - db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) + db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(*level)) .unwrap(); } } diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index a303a5869356..1d8341c1c0b2 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -579,13 +579,17 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); - let SimpleWrite::Put(put) = write else { panic!("should be put") }; + let SimpleWrite::Put(put) = write else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); let write = decoder.next().unwrap(); - let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; + let SimpleWrite::Delete(delete) = write else { + panic!("should be delete") + }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); assert_matches!(decoder.next(), None); @@ -593,14 +597,18 @@ mod tests { let (bytes, _) = req_encoder2.encode(); decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, CF_LOCK); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); assert!(dr.notify_only); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, "cf"); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); @@ -626,7 +634,9 @@ mod tests { let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; + let SimpleWrite::Ingest(ssts) = write else { + panic!("should be ingest") + }; assert_eq!(exp, ssts); assert_matches!(decoder.next(), None); } @@ -715,7 +725,9 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); - let SimpleWrite::Put(put) = req else { panic!("should be put") }; + let SimpleWrite::Put(put) = req else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6fe21fe97502..dcb98dd9cb2e 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1323,7 +1323,7 @@ impl Write for Snapshot { } assert!(cf_file.size[self.cf_file_index] != 0); - let mut file_for_recving = cf_file + let file_for_recving = cf_file .file_for_recving .get_mut(self.cf_file_index) .unwrap(); @@ -2162,7 +2162,7 @@ impl TabletSnapManager { .stats .lock() .unwrap() - .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) + .extract_if(|_, (_, stat)| stat.get_region_id() > 0) .map(|(_, (_, stat))| stat) .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 3cdee1e40f1c..8fcaf826c6ab 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -327,7 +327,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_plain_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db: KvTestEngine = db_creater(dir.path(), db_opt.clone(), None).unwrap(); // Collect keys via the key_callback into a collection. @@ -408,7 +408,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_sst_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db = db_creater(dir.path(), db_opt.clone(), None).unwrap(); let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 0091fd4e7bb8..9c73be2b9eba 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -244,7 +244,7 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.drain_filter(|key, _| { + let removed_locks = self.map.extract_if(|key, _| { let key = &**key.as_encoded(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); key < start_key || (!end_key.is_empty() && key >= end_key) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 3f34fe691ee0..ed2c70822c99 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -124,8 +124,7 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { /// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet -// 2. Target peer is added newly due to member change or region split, but it's not -// created yet +// 2. Target peer is added newly due to member change or region split, but it's not created yet // For both cases the region start key and end key are attached in RequestVote and // Heartbeat message for the store of that peer to check whether to create a new peer // when receiving these messages, or just to wait for a pending region split to perform diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 32fbdbc31452..cb067ca840b8 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1710,10 +1710,7 @@ where fn handle_read_stats(&mut self, mut read_stats: ReadStats) { for (region_id, region_info) in read_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -1735,10 +1732,7 @@ where fn handle_write_stats(&mut self, mut write_stats: WriteStats) { for (region_id, region_info) in write_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num @@ -2096,7 +2090,10 @@ where let f = async move { for split_info in split_infos { let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + pd_client.get_region_by_id(split_info.region_id).await + else { + continue; + }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( @@ -2161,10 +2158,7 @@ where cpu_usage, ) = { let region_id = hb_task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5d6ede9c1936..5a6e641f5dcd 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -2155,11 +2155,12 @@ mod tests { let (notify_tx, notify_rx) = channel(); let (wait_spawn_tx, wait_spawn_rx) = channel(); let runtime = tokio::runtime::Runtime::new().unwrap(); - let _ = runtime.spawn(async move { + let handler = runtime.spawn(async move { wait_spawn_tx.send(()).unwrap(); notify.notified().await; notify_tx.send(()).unwrap(); }); + drop(handler); wait_spawn_rx.recv().unwrap(); thread::sleep(std::time::Duration::from_millis(500)); // Prevent lost notify. must_not_redirect(&mut reader, &rx, task); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 068904b2a677..7a675646f5cd 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -179,7 +179,7 @@ impl PendingDeleteRanges { ) -> Vec<(u64, Vec, Vec, u64)> { let ranges = self.find_overlap_ranges(start_key, end_key); - for &(_, ref s_key, ..) in &ranges { + for (_, s_key, ..) in &ranges { self.ranges.remove(s_key).unwrap(); } ranges @@ -1293,7 +1293,7 @@ pub(crate) mod tests { } }; - #[allow(dead_code)] + #[cfg(feature = "failpoints")] let must_not_finish = |ids: &[u64]| { for id in ids { let region_key = keys::region_state_key(*id); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 4ff853f70a05..468c06febd46 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -64,14 +64,14 @@ impl KeyEntry { impl PartialOrd for KeyEntry { fn partial_cmp(&self, rhs: &KeyEntry) -> Option { - // BinaryHeap is max heap, so we have to reverse order to get a min heap. - Some(self.key.cmp(&rhs.key).reverse()) + Some(self.cmp(rhs)) } } impl Ord for KeyEntry { fn cmp(&self, rhs: &KeyEntry) -> Ordering { - self.partial_cmp(rhs).unwrap() + // BinaryHeap is max heap, so we have to reverse order to get a min heap. + self.key.cmp(&rhs.key).reverse() } } @@ -287,7 +287,7 @@ impl Runner { region: &Region, bucket_ranges: &Vec, ) { - for (mut bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { + for (bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { let mut bucket_region = region.clone(); bucket_region.set_start_key(bucket_range.0.clone()); bucket_region.set_end_key(bucket_range.1.clone()); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 4bbcc7737638..9cf534c62b0e 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -178,7 +178,7 @@ impl Samples { // evaluate the samples according to the given key range, it will update the // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { - for mut sample in self.0.iter_mut() { + for sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { Ordering::Greater } else { @@ -496,10 +496,7 @@ pub struct WriteStats { impl WriteStats { pub fn add_query_num(&mut self, region_id: u64, kind: QueryKind) { - let query_stats = self - .region_infos - .entry(region_id) - .or_insert_with(QueryStats::default); + let query_stats = self.region_infos.entry(region_id).or_default(); query_stats.add_query_num(kind, 1); } @@ -988,8 +985,8 @@ mod tests { #[test] fn test_prefix_sum() { - let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; - let expect = vec![1, 3, 6, 10, 15, 21, 28, 36, 45]; + let v = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expect = [1, 3, 6, 10, 15, 21, 28, 36, 45]; let pre = prefix_sum(v.iter(), |x| *x); for i in 0..v.len() { assert_eq!(expect[i], pre[i]); diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 47d14304112f..328f725edaaa 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -213,13 +213,13 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) CF_WRITE => { if let Ok(ts) = key.decode_ts() { let key = key.truncate_ts().unwrap(); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.write.is_none()); row.write = Some(KeyOp::Put(Some(ts), value)); } } CF_LOCK => { - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.lock.is_none()); row.lock = Some(KeyOp::Put(None, value)); } @@ -239,7 +239,7 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) match delete.cf.as_str() { CF_LOCK => { let key = Key::from_encoded(delete.take_key()); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); row.lock = Some(KeyOp::Delete); } "" | CF_WRITE | CF_DEFAULT => {} diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 34f00672fa72..600da207ec43 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -65,7 +65,8 @@ impl Drop for ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { return; }; if locks.is_empty() { @@ -96,7 +97,8 @@ impl ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Check if adding a new lock or unlock will exceed the memory @@ -110,10 +112,7 @@ impl ResolverStatus { } fn update_tracked_index(&mut self, index: u64, region_id: u64) { - let ResolverStatus::Pending { - tracked_index, - .. - } = self else { + let ResolverStatus::Pending { tracked_index, .. } = self else { panic!("region {:?} resolver has ready", region_id) }; assert!( @@ -135,7 +134,8 @@ impl ResolverStatus { memory_quota, tracked_index, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Must take locks, otherwise it may double free memory quota on drop. @@ -687,7 +687,7 @@ where scanner_pool, scan_concurrency_semaphore, regions: HashMap::default(), - _phantom: PhantomData::default(), + _phantom: PhantomData, }; ep.handle_advance_resolved_ts(leader_resolver); ep @@ -870,7 +870,6 @@ where // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. - #[allow(clippy::drop_ref)] fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); @@ -884,7 +883,6 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { - drop(observe_region); let backoff = match e { Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, @@ -930,7 +928,7 @@ where } fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { - let regions = self.regions.keys().into_iter().copied().collect(); + let regions = self.regions.keys().copied().collect(); self.advance_worker.advance_ts_for_regions( regions, leader_resolver, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 6c8c90dc38f0..ad052338fa2a 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -100,7 +100,7 @@ impl, E: KvEngine> ScannerPool { Self { workers, cdc_handle, - _phantom: PhantomData::default(), + _phantom: PhantomData, } } @@ -168,6 +168,7 @@ impl, E: KvEngine> ScannerPool { self.workers.spawn(fut); } + #[allow(clippy::needless_pass_by_ref_mut)] async fn get_snapshot( task: &mut ScanTask, cdc_handle: T, diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index a356d30a7ac2..0e40255b3544 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -307,8 +307,8 @@ pub struct ResourceController { // 1. the priority factor is calculate based on read/write RU settings. // 2. for read request, we increase a constant virtual time delta at each `get_priority` call // because the cost can't be calculated at start, so we only increase a constant delta and - // increase the real cost after task is executed; but don't increase it at write because - // the cost is known so we just pre-consume it. + // increase the real cost after task is executed; but don't increase it at write because the + // cost is known so we just pre-consume it. is_read: bool, // Track the maximum ru quota used to calculate the factor of each resource group. // factor = max_ru_quota / group_ru_quota * 10.0 diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index ba8e2174e198..7b437ea43037 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -2,7 +2,7 @@ // TODO(mornyx): crate doc. -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] #![feature(core_intrinsics)] use std::{ diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 6f7118ef9e1c..03cd500eb2e9 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -87,7 +87,7 @@ impl RawRecords { pdqselect::select_by(&mut buf, k, |a, b| b.cmp(a)); let kth = buf[k]; // Evict records with cpu time less or equal than `kth` - let evicted_records = self.records.drain_filter(|_, r| r.cpu_time <= kth); + let evicted_records = self.records.extract_if(|_, r| r.cpu_time <= kth); // Record evicted into others for (_, record) in evicted_records { others.merge(&record); diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index 8c4053a80ab2..08675bb6153f 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -9,7 +9,7 @@ use crate::{ localstorage::{LocalStorage, SharedTagInfos}, SubRecorder, }, - RawRecord, RawRecords, + RawRecords, }; /// An implementation of [SubRecorder] for collecting cpu statistics. @@ -37,7 +37,7 @@ impl SubRecorder for CpuRecorder { if *last_stat != cur_stat { let delta_ms = (cur_stat.total_cpu_time() - last_stat.total_cpu_time()) * 1_000.; - let record = records.entry(cur_tag).or_insert_with(RawRecord::default); + let record = records.entry(cur_tag).or_default(); record.cpu_time += delta_ms as u32; } thread_stat.stat = cur_stat; diff --git a/components/resource_metering/tests/recorder_test.rs b/components/resource_metering/tests/recorder_test.rs index daa371e74771..6e164b8e5e81 100644 --- a/components/resource_metering/tests/recorder_test.rs +++ b/components/resource_metering/tests/recorder_test.rs @@ -55,7 +55,7 @@ mod tests { if let Some(tag) = self.current_ctx { self.records .entry(tag.as_bytes().to_vec()) - .or_insert_with(RawRecord::default) + .or_default() .cpu_time += ms; } self.ops.push(op); @@ -140,7 +140,7 @@ mod tests { if let Ok(mut r) = self.records.lock() { for (tag, record) in records.records.iter() { r.entry(tag.extra_attachment.to_vec()) - .or_insert_with(RawRecord::default) + .or_default() .merge(record); } } @@ -156,10 +156,10 @@ mod tests { let mut records = self.records.lock().unwrap(); for k in expected.keys() { - records.entry(k.clone()).or_insert_with(RawRecord::default); + records.entry(k.clone()).or_default(); } for k in records.keys() { - expected.entry(k.clone()).or_insert_with(RawRecord::default); + expected.entry(k.clone()).or_default(); } for (k, expected_value) in expected { let value = records.get(&k).unwrap(); @@ -324,10 +324,10 @@ mod tests { fn merge( maps: impl IntoIterator, RawRecord>>, ) -> HashMap, RawRecord> { - let mut map = HashMap::default(); + let mut map: HashMap, RawRecord> = HashMap::default(); for m in maps { for (k, v) in m { - map.entry(k).or_insert_with(RawRecord::default).merge(&v); + map.entry(k).or_default().merge(&v); } } map diff --git a/components/server/src/common.rs b/components/server/src/common.rs index c8cf879d9052..43b0314cbbe2 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -558,7 +558,9 @@ impl EnginesResourceInfo { }); for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { continue }; + let Some(tablet) = cache.latest() else { + continue; + }; for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 417d5becca31..48344fe50128 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -206,7 +206,7 @@ mod test { #[test] fn test_basic() { - let leaders = vec![1, 2, 3]; + let leaders = [1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, leaders); @@ -217,7 +217,7 @@ mod test { #[test] fn test_failure() { - let leaders = vec![1, 2, 3]; + let leaders = [1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 70b7d7fac5e1..4db29c47a6f7 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -139,7 +139,7 @@ impl ImportModeSwitcherV2 { pub fn ranges_in_import(&self) -> HashSet { let inner = self.inner.lock().unwrap(); - HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) + HashSet::from_iter(inner.import_mode_ranges.keys().cloned()) } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 181f9d67b2fe..502a81ff6a68 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -367,8 +367,8 @@ impl SstImporter { // This method is blocking. It performs the following transformations before // writing to disk: // - // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. - // (set the range to `["", ""]` to import everything). + // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. (set + // the range to `["", ""]` to import everything). // 2. keys are rewritten according to the given rewrite rule. // // Both the range and rewrite keys are specified using origin keys. However, @@ -1541,7 +1541,7 @@ mod tests { let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); - let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; + let cases = [(0, 10), (5, 15), (10, 20), (0, 100)]; let mut ingested = Vec::new(); @@ -2055,13 +2055,10 @@ mod tests { false, ) .unwrap(); - let ext_storage = { - let inner = importer.wrap_kms( - importer.external_storage_or_cache(&backend, "").unwrap(), - false, - ); - inner - }; + let ext_storage = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); // test do_read_kv_file() let output = block_on_external_io(importer.do_read_kv_file( diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index ff7526172d51..654971b0d411 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -97,7 +97,8 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { - pmts.set_readonly(false); + use std::os::unix::fs::PermissionsExt; + pmts.set_mode(0o644); file_system::set_permissions(clone, pmts)?; } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 96f405d8f39e..6763ea7bb1a1 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -203,7 +203,7 @@ impl Store { } pub fn put(&mut self, ctx: Context, mut kv: Vec<(Vec, Vec)>) { - self.handles.extend(kv.iter().map(|&(ref k, _)| k.clone())); + self.handles.extend(kv.iter().map(|(k, _)| k.clone())); let pk = kv[0].0.clone(); let kv = kv .drain(..) diff --git a/components/test_coprocessor_plugin/example_plugin/src/lib.rs b/components/test_coprocessor_plugin/example_plugin/src/lib.rs index afcaa4962b94..d383797c0692 100644 --- a/components/test_coprocessor_plugin/example_plugin/src/lib.rs +++ b/components/test_coprocessor_plugin/example_plugin/src/lib.rs @@ -18,4 +18,4 @@ impl CoprocessorPlugin for ExamplePlugin { } } -declare_plugin!(ExamplePlugin::default()); +declare_plugin!(ExamplePlugin); diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 90a420fbba08..02833e030eb8 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -128,12 +128,8 @@ impl Server { } #[allow(unused_mut)] -fn hijack_unary( - mock: &mut PdMock, - ctx: RpcContext<'_>, - sink: UnarySink, - f: F, -) where +fn hijack_unary(mock: &PdMock, ctx: RpcContext<'_>, sink: UnarySink, f: F) +where R: Send + 'static, F: Fn(&dyn PdMocker) -> Option>, { diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index c81230f6a163..58df59987583 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1438,7 +1438,7 @@ impl TestPdClient { pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); - let mut dr = status.mut_dr_auto_sync(); + let dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state); dr.available_stores = available_stores; diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 8ede32901671..346813e7d1fc 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -220,7 +220,7 @@ pub trait Simulator { None => { error!("call_query_on_node receives none response"; "request" => ?request); // Do not unwrap here, sometimes raftstore v2 may return none. - return Err(box_err!("receives none response {:?}", request)); + Err(box_err!("receives none response {:?}", request)) } } } @@ -1612,6 +1612,7 @@ impl, EK: KvEngine> Cluster { ) } + #[allow(clippy::let_underscore_future)] pub fn merge_region(&mut self, source: u64, target: u64, _cb: Callback) { // FIXME: callback is ignored. let mut req = self.new_prepare_merge(source, target); diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 685affe45d0f..45642df1e7fa 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -3,6 +3,8 @@ #![feature(type_alias_impl_trait)] #![feature(return_position_impl_trait_in_trait)] #![feature(let_chains)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::arc_with_non_send_sync)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index d63ca0aa2f28..70b6ccb14077 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -258,7 +258,7 @@ impl Simulator for NodeCluster { ) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; self.snap_mgrs.insert(node_id, snap_mgr.clone()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7b5d501a59f4..a7d64591fe1d 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1006,7 +1006,18 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} + +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = @@ -1015,6 +1026,7 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } + pub fn must_new_cluster_mul( count: usize, ) -> ( diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 04dfbd24de17..6f48c17190af 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,6 +1,8 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::arc_with_non_send_sync)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index f429f27ff8b6..8a9969c19137 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -281,7 +281,7 @@ impl Simulator for NodeCluster { (snap_mgr, Some(tmp)) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8d26bae968d8..0df44b4e7843 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -918,8 +918,14 @@ pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, pub fn must_new_cluster_and_kv_client_mul( count: usize, ) -> (Cluster, TikvClient, Context) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster), +) -> (Cluster, TikvClient, Context) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index 33976939c830..ea3017d5d027 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -1739,27 +1739,24 @@ mod tests_normal { /// Compare TokenStream with all white chars trimmed. fn assert_token_stream_equal(l: TokenStream, r: TokenStream) { - let result = l - .clone() - .into_iter() - .eq_by(r.clone().into_iter(), |x, y| match x { - TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), - TokenTree::Literal(x) => { - matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) - } - TokenTree::Punct(x) => { - matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) - } - TokenTree::Group(x) => { - if let TokenTree::Group(y) = y { - assert_token_stream_equal(x.stream(), y.stream()); + let result = l.clone().into_iter().eq_by(r.clone(), |x, y| match x { + TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), + TokenTree::Literal(x) => { + matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) + } + TokenTree::Punct(x) => { + matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) + } + TokenTree::Group(x) => { + if let TokenTree::Group(y) = y { + assert_token_stream_equal(x.stream(), y.stream()); - true - } else { - false - } + true + } else { + false } - }); + } + }); assert!(result, "expect: {:#?}, actual: {:#?}", &l, &r); } diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 22127e62f49f..738e0020de7f 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -251,7 +251,7 @@ where { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - C::sort_compare(self.inner.as_ref(), other.inner.as_ref()).ok() + Some(self.cmp(other)) } } diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 418841547cac..d2bbee78078b 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -574,13 +574,13 @@ pub fn bytes_to_int_without_context(bytes: &[u8]) -> Result { if let Some(&c) = trimed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(i64::from(c) - i64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { let cur = i64::from(*c - b'0'); r = r.and_then(|r| r.checked_mul(10)).and_then(|r| { if negative { @@ -605,13 +605,13 @@ pub fn bytes_to_uint_without_context(bytes: &[u8]) -> Result { let mut trimed = bytes.iter().skip_while(|&&b| b == b' ' || b == b'\t'); let mut r = Some(0u64); if let Some(&c) = trimed.next() { - if (b'0'..=b'9').contains(&c) { + if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -856,7 +856,7 @@ pub fn get_valid_int_prefix_helper<'a>( if (c == '+' || c == '-') && i == 0 { continue; } - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { valid_len = i + 1; continue; } @@ -917,7 +917,7 @@ pub fn get_valid_float_prefix_helper<'a>( break; } e_idx = i - } else if !('0'..='9').contains(&c) { + } else if !c.is_ascii_digit() { break; } else { saw_digit = true; diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 8ca367908247..b464b1119c81 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -248,7 +248,7 @@ macro_rules! impl_evaluable_type { } #[inline] - fn borrow_scalar_value_ref<'a>(v: ScalarValueRef<'a>) -> Option<&'a Self> { + fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self> { match v { ScalarValueRef::$ty(x) => x, other => panic!( diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index c74423107e4f..ff66ddc42eeb 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -467,24 +467,23 @@ impl<'a> ScalarValueRef<'a> { impl<'a> Ord for ScalarValueRef<'a> { fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other) - .expect("Cannot compare two ScalarValueRef in different type") - } -} - -impl<'a> PartialOrd for ScalarValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { match_template_evaltype! { TT, match (self, other) { // v1 and v2 are `Option`. However, in MySQL NULL values are considered lower // than any non-NULL value, so using `Option::PartialOrd` directly is fine. - (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => Some(v1.cmp(v2)), - _ => None, + (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => v1.cmp(v2), + _ => panic!("Cannot compare two ScalarValueRef in different type"), } } } } +impl<'a> PartialOrd for ScalarValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + impl<'a> PartialEq for ScalarValueRef<'a> { fn eq(&self, other: &ScalarValue) -> bool { self == &other.as_scalar_value_ref() diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index dde98003475c..f91d204b3b07 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -668,7 +668,7 @@ impl Datum { Datum::F64(res) } } - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l + r).into(); return dec.map(Datum::Dec); } @@ -700,7 +700,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_sub(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l - r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l - r).into(); return dec.map(Datum::Dec); } @@ -724,7 +724,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_mul(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l * r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => return Ok(Datum::Dec((l * r).unwrap())), + (Datum::Dec(l), Datum::Dec(r)) => return Ok(Datum::Dec((l * r).unwrap())), (l, r) => return Err(invalid_type!("{} can't multiply {}", l, r)), }; @@ -1179,7 +1179,7 @@ mod tests { | (&Datum::Null, &Datum::Null) | (&Datum::Time(_), &Datum::Time(_)) | (&Datum::Json(_), &Datum::Json(_)) => true, - (&Datum::Dec(ref d1), &Datum::Dec(ref d2)) => d1.prec_and_frac() == d2.prec_and_frac(), + (Datum::Dec(d1), Datum::Dec(d2)) => d1.prec_and_frac() == d2.prec_and_frac(), _ => false, } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 143ec6c77608..8853a1d6a164 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1872,7 +1872,7 @@ impl<'a> ConvertTo for JsonRef<'a> { fn first_non_digit(bs: &[u8], start_idx: usize) -> usize { bs.iter() .skip(start_idx) - .position(|c| !(b'0'..=b'9').contains(c)) + .position(|c| !c.is_ascii_digit()) .map_or_else(|| bs.len(), |s| s + start_idx) } diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 7279f7881462..4b7359777120 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -629,14 +629,14 @@ impl Eq for Duration {} impl PartialOrd for Duration { #[inline] fn partial_cmp(&self, rhs: &Duration) -> Option { - self.nanos.partial_cmp(&rhs.nanos) + Some(self.cmp(rhs)) } } impl Ord for Duration { #[inline] fn cmp(&self, rhs: &Duration) -> Ordering { - self.partial_cmp(rhs).unwrap() + self.nanos.partial_cmp(&rhs.nanos).unwrap() } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index d9104385bc6c..73e048858901 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -77,6 +77,8 @@ impl<'a> PartialEq for JsonRef<'a> { .map_or(false, |r| r == Ordering::Equal) } } + +#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)] impl<'a> PartialOrd for JsonRef<'a> { // See `CompareBinary` in TiDB `types/json/binary_functions.go` fn partial_cmp(&self, right: &JsonRef<'_>) -> Option { @@ -197,7 +199,7 @@ impl PartialEq for Json { impl PartialOrd for Json { fn partial_cmp(&self, right: &Json) -> Option { - self.as_ref().partial_cmp(&right.as_ref()) + Some(self.cmp(right)) } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index 867d8ec2c202..f76b29790f9f 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -28,9 +28,9 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryObject` in TiDB `types/json/binary.go` - fn write_json_obj_from_keys_values<'a>( + fn write_json_obj_from_keys_values( &mut self, - mut entries: Vec<(&[u8], JsonRef<'a>)>, + mut entries: Vec<(&[u8], JsonRef<'_>)>, ) -> Result<()> { entries.sort_by(|a, b| a.0.cmp(b.0)); // object: element-count size key-entry* value-entry* key* value* @@ -122,7 +122,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryArray` in TiDB `types/json/binary.go` - fn write_json_ref_array<'a>(&mut self, data: &[JsonRef<'a>]) -> Result<()> { + fn write_json_ref_array(&mut self, data: &[JsonRef<'_>]) -> Result<()> { let element_count = data.len(); let value_entries_len = VALUE_ENTRY_LEN * element_count; let values_len = data.iter().fold(0, |acc, v| acc + v.encoded_len()); @@ -167,7 +167,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryValElem` in TiDB `types/json/binary.go` - fn write_value_entry<'a>(&mut self, value_offset: &mut u32, v: &JsonRef<'a>) -> Result<()> { + fn write_value_entry(&mut self, value_offset: &mut u32, v: &JsonRef<'_>) -> Result<()> { let tp = v.get_type(); self.write_u8(tp as u8)?; match tp { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs index b359158d06b8..3cc78270d60c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs @@ -41,7 +41,7 @@ impl<'a> JsonRef<'a> { } } let mut res = self.to_owned(); - for (expr, value) in path_expr_list.iter().zip(values.into_iter()) { + for (expr, value) in path_expr_list.iter().zip(values) { let modifier = BinaryModifier::new(res.as_ref()); res = match mt { ModifyType::Insert => modifier.insert(expr, value)?, diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 4c6c2f676d7a..44228f2d88e8 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1094,7 +1094,7 @@ impl Time { ) } - fn try_into_chrono_datetime(self, ctx: &mut EvalContext) -> Result> { + fn try_into_chrono_datetime(self, ctx: &EvalContext) -> Result> { chrono_datetime( &ctx.cfg.tz, self.year(), @@ -2670,9 +2670,9 @@ mod tests { #[test] fn test_no_zero_in_date() -> Result<()> { - let cases = vec!["2019-01-00", "2019-00-01"]; + let cases = ["2019-01-00", "2019-00-01"]; - for &case in cases.iter() { + for case in cases { // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is // produced. let mut ctx = EvalContext::from(TimeEnv { @@ -2817,7 +2817,7 @@ mod tests { let actual = Time::from_duration(&mut ctx, duration, TimeType::DateTime)?; let today = actual - .try_into_chrono_datetime(&mut ctx)? + .try_into_chrono_datetime(&ctx)? .checked_sub_signed(chrono::Duration::nanoseconds(duration.to_nanos())) .unwrap(); @@ -2837,7 +2837,7 @@ mod tests { let mut ctx = EvalContext::default(); for i in 2..10 { let actual = Time::from_local_time(&mut ctx, TimeType::DateTime, i % MAX_FSP)?; - let c_datetime = actual.try_into_chrono_datetime(&mut ctx)?; + let c_datetime = actual.try_into_chrono_datetime(&ctx)?; let now0 = c_datetime.timestamp_millis() as u64; let now1 = Utc::now().timestamp_millis() as u64; diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index da117c96e2c0..aa5eb3fc56f2 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -298,7 +298,7 @@ impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, - _marker: PhantomData::default(), + _marker: PhantomData, } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 37becbfb801b..81ef4b072c62 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -528,7 +528,7 @@ pub fn generate_index_data_for_test( let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() - .map(|&(ref cid, ref value)| { + .map(|(cid, value)| { expect_row.insert( *cid, datum::encode_key(&mut EvalContext::default(), &[value.clone()]).unwrap(), diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 3a5c53a4d095..5ebf8a031d3e 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -611,8 +611,8 @@ impl IndexScanExecutorImpl { } #[inline] - fn build_operations<'a, 'b>( - &'b self, + fn build_operations<'a>( + &self, mut key_payload: &'a [u8], index_value: &'a [u8], ) -> Result<(DecodeHandleOp<'a>, DecodePartitionIdOp<'a>, RestoreData<'a>)> { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 7c410befb257..27e52dde2885 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -137,31 +137,31 @@ impl BatchExecutorsRunner<()> { .map_err(|e| other_err!("BatchProjectionExecutor: {}", e))?; } ExecType::TypeJoin => { - other_err!("Join executor not implemented"); + return Err(other_err!("Join executor not implemented")); } ExecType::TypeKill => { - other_err!("Kill executor not implemented"); + return Err(other_err!("Kill executor not implemented")); } ExecType::TypeExchangeSender => { - other_err!("ExchangeSender executor not implemented"); + return Err(other_err!("ExchangeSender executor not implemented")); } ExecType::TypeExchangeReceiver => { - other_err!("ExchangeReceiver executor not implemented"); + return Err(other_err!("ExchangeReceiver executor not implemented")); } ExecType::TypePartitionTableScan => { - other_err!("PartitionTableScan executor not implemented"); + return Err(other_err!("PartitionTableScan executor not implemented")); } ExecType::TypeSort => { - other_err!("Sort executor not implemented"); + return Err(other_err!("Sort executor not implemented")); } ExecType::TypeWindow => { - other_err!("Window executor not implemented"); + return Err(other_err!("Window executor not implemented")); } ExecType::TypeExpand => { - other_err!("Expand executor not implemented"); + return Err(other_err!("Expand executor not implemented")); } ExecType::TypeExpand2 => { - other_err!("Expand2 executor not implemented"); + return Err(other_err!("Expand2 executor not implemented")); } } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index bd65547109d3..ffcb22671da6 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -537,7 +537,7 @@ mod tests { }) .collect(); - for predicates in vec![ + for predicates in [ // Swap predicates should produce same results. vec![predicate[0](), predicate[1]()], vec![predicate[1](), predicate[0]()], @@ -572,7 +572,7 @@ mod tests { }) .collect(); - for predicates in vec![ + for predicates in [ // Swap predicates should produce same results. vec![predicate[0](), predicate[1](), predicate[2]()], vec![predicate[1](), predicate[2](), predicate[0]()], diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index 0535e8dbd83d..a5d760dc80d5 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -641,8 +641,8 @@ pub mod tests { )) as Box> }; - let test_paging_size = vec![2, 5, 7]; - let expect_call_num = vec![1, 3, 4]; + let test_paging_size = [2, 5, 7]; + let expect_call_num = [1, 3, 4]; let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index ca05e49fcd3d..db456a848832 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -28,13 +28,13 @@ pub fn ensure_columns_decoded( /// Evaluates expressions and outputs the result into the given Vec. Lifetime of /// the expressions are erased. -pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( +pub unsafe fn eval_exprs_decoded_no_lifetime( ctx: &mut EvalContext, exprs: &[RpnExpression], schema: &[FieldType], input_physical_columns: &LazyBatchColumnVec, input_logical_rows: &[usize], - output: &mut Vec>, + output: &mut Vec>, ) -> Result<()> { unsafe fn erase_lifetime<'a, T: ?Sized>(v: &T) -> &'a T { &*(v as *const T) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 76e90f79c5bf..b6619f9d8ccb 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -6528,7 +6528,7 @@ mod tests { "cast_decimal_as_duration", ); - let values = vec![ + let values = [ Decimal::from_bytes(b"9995959").unwrap().unwrap(), Decimal::from_bytes(b"-9995959").unwrap().unwrap(), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 5d2daed7f9ae..663571804ae2 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -58,7 +58,7 @@ pub fn inet_aton(addr: BytesRef) -> Result> { } let (mut byte_result, mut result, mut dot_count): (u64, u64, usize) = (0, 0, 0); for c in addr.chars() { - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { let digit = c as u64 - '0' as u64; byte_result = byte_result * 10 + digit; if byte_result > 255 { @@ -501,8 +501,9 @@ mod tests { (Some(hex("00000000")), Some(b"0.0.0.0".to_vec())), (Some(hex("0A000509")), Some(b"10.0.5.9".to_vec())), ( + // the output format has changed, see: https://github.com/rust-lang/rust/pull/112606 Some(hex("00000000000000000000000001020304")), - Some(b"::1.2.3.4".to_vec()), + Some(b"::102:304".to_vec()), ), ( Some(hex("00000000000000000000FFFF01020304")), diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index f3b9b03c287d..45754d0a101e 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -63,13 +63,13 @@ pub fn oct_string(s: BytesRef, writer: BytesWriter) -> Result { if let Some(&c) = trimmed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(writer.write(Some(b"0".to_vec()))); } - for c in trimmed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimmed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -879,7 +879,7 @@ impl TrimDirection { } #[inline] -fn trim<'a, 'b>(string: &'a [u8], pattern: &'b [u8], direction: TrimDirection) -> &'a [u8] { +fn trim<'a>(string: &'a [u8], pattern: &[u8], direction: TrimDirection) -> &'a [u8] { if pattern.is_empty() { return string; } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index c2ef67221486..40c1f485e544 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -10,6 +10,8 @@ #![allow(elided_lifetimes_in_paths)] // Necessary until rpn_fn accepts functions annotated with lifetimes. #![allow(incomplete_features)] +#![allow(clippy::needless_raw_string_hashes)] +#![allow(clippy::needless_return_with_question_mark)] #![feature(proc_macro_hygiene)] #![feature(specialization)] #![feature(test)] diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index b892333b0ef3..e3ab7d352971 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1091,16 +1091,13 @@ mod tests { use tipb::{Expr, ScalarFuncSig}; #[allow(clippy::trivially_copy_pass_by_ref)] - #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a::)] - fn fn_a_nonnull( - metadata: &i64, - v: &Int, - ) -> Result> { + #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a)] + fn fn_a_nonnull(metadata: &i64, v: &Int) -> Result> { assert_eq!(*metadata, 42); Ok(Some(v + *metadata)) } - fn prepare_a(_expr: &mut Expr) -> Result { + fn prepare_a(_expr: &mut Expr) -> Result { Ok(42) } @@ -1136,7 +1133,7 @@ mod tests { // fn_b: CastIntAsReal // fn_c: CastIntAsString Ok(match expr.get_sig() { - ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta::(), + ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta(), ScalarFuncSig::CastIntAsReal => fn_b_fn_meta::(), ScalarFuncSig::CastIntAsString => fn_c_fn_meta::(), _ => unreachable!(), diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 576aa5cfa768..858edfffec26 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -605,7 +605,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engine.put(&data_key(k), v).unwrap(); } (r, base_data) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 25f58352750f..43e5f1bea054 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -9,6 +9,7 @@ #![feature(min_specialization)] #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] +#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index c53c58965192..b786d2aa6813 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -11,9 +11,9 @@ where let mut start = 0; let bytes = file_name.as_bytes(); for (index, &b) in bytes.iter().enumerate() { - if (b'A'..=b'Z').contains(&b) - || (b'a'..=b'z').contains(&b) - || (b'0'..=b'9').contains(&b) + if b.is_ascii_uppercase() + || b.is_ascii_lowercase() + || b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'_' diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 76fad6e8a34c..a2d0943df90e 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -247,7 +247,7 @@ where HashMapEntry::Occupied(mut e) => { self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); - let mut entry = e.get_mut(); + let entry = e.get_mut(); self.trace.promote(entry.record); entry.value = value; } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 291254c5227b..a28978096837 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -33,7 +33,7 @@ pub trait HeapSize { impl HeapSize for [u8] { fn heap_size(&self) -> usize { - self.len() * mem::size_of::() + mem::size_of_val(self) } } diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index 260aa88ac8e0..af22e411767c 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -64,7 +64,7 @@ impl Collector for AllocStatsCollector { .set(dealloc as _); }); let mut g = self.memory_stats.collect(); - g.extend(self.allocation.collect().into_iter()); + g.extend(self.allocation.collect()); g } } diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 4492e33a9335..354ef74adb0f 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -302,6 +302,8 @@ mod tests { use super::*; + // the JoinHandler is useless here, so just ignore this warning. + #[allow(clippy::let_underscore_future)] fn spawn_and_wait( rx_builder: impl FnOnce() -> S, ) -> (Runtime, Arc) { diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 6ec1621c629d..61608d1518fe 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -333,7 +333,7 @@ mod tests { for _ in 0..num * 10 { std::thread::spawn(move || { loop { - let _ = (0..10_000_000).into_iter().sum::(); + let _ = (0..10_000_000).sum::(); } }); } diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index bb555e11794f..a7a2b421ab04 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -81,14 +81,14 @@ impl Eq for TimeoutTask {} impl PartialOrd for TimeoutTask { fn partial_cmp(&self, other: &TimeoutTask) -> Option { - self.next_tick.partial_cmp(&other.next_tick) + Some(self.cmp(other)) } } impl Ord for TimeoutTask { fn cmp(&self, other: &TimeoutTask) -> Ordering { // TimeoutTask.next_tick must have same type of instants. - self.partial_cmp(other).unwrap() + self.next_tick.partial_cmp(&other.next_tick).unwrap() } } diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index fb0cd9001232..79727575d604 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -118,9 +118,10 @@ impl slog::Value for TimeStamp { const TS_SET_USE_VEC_LIMIT: usize = 8; /// A hybrid immutable set for timestamps. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Default, Clone, PartialEq)] pub enum TsSet { /// When the set is empty, avoid the useless cloning of Arc. + #[default] Empty, /// `Vec` is suitable when the set is small or the set is barely used, and /// it doesn't worth converting a `Vec` into a `HashSet`. @@ -130,13 +131,6 @@ pub enum TsSet { Set(Arc>), } -impl Default for TsSet { - #[inline] - fn default() -> TsSet { - TsSet::Empty - } -} - impl TsSet { /// Create a `TsSet` from the given vec of timestamps. It will select the /// proper internal collection type according to the size. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 624ac81212d8..5305e3ec69ac 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -451,7 +451,7 @@ impl From for Mutation { /// `OldValue` is used by cdc to read the previous value associated with some /// key during the prewrite process. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Default, Clone, PartialEq)] pub enum OldValue { /// A real `OldValue`. Value { value: Value }, @@ -460,18 +460,13 @@ pub enum OldValue { /// `None` means we don't found a previous value. None, /// The user doesn't care about the previous value. + #[default] Unspecified, /// Not sure whether the old value exists or not. users can seek CF_WRITE to /// the give position to take a look. SeekWrite(Key), } -impl Default for OldValue { - fn default() -> Self { - OldValue::Unspecified - } -} - impl OldValue { pub fn value(value: Value) -> Self { OldValue::Value { value } @@ -590,8 +585,9 @@ impl WriteBatchFlags { /// The position info of the last actual write (PUT or DELETE) of a LOCK record. /// Note that if the last change is a DELETE, its LastChange can be either /// Exist(which points to it) or NotExist. -#[derive(Clone, Eq, PartialEq, Debug)] +#[derive(Clone, Default, Eq, PartialEq, Debug)] pub enum LastChange { + #[default] Unknown, /// The pointer may point to a PUT or a DELETE record. Exist { @@ -647,12 +643,6 @@ impl LastChange { } } -impl Default for LastChange { - fn default() -> Self { - LastChange::Unknown - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust-toolchain b/rust-toolchain index 4e5f9a4d82b8..c1eb62e26cb8 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-11-15 +nightly-2023-08-15 diff --git a/src/config/mod.rs b/src/config/mod.rs index 4f9a9a01b4a0..6b3332fb0151 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1482,7 +1482,7 @@ impl DbConfig { opts.set_paranoid_checks(b); } if for_engine == EngineType::RaftKv { - opts.set_info_log(RocksdbLogger::default()); + opts.set_info_log(RocksdbLogger); } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1858,7 +1858,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDbLogger::default()); + opts.set_info_log(RaftDbLogger); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -2015,7 +2015,7 @@ impl ConfigManager for DbConfigManger { self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); - let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); + let cf_config = change.extract_if(|(name, _)| name.ends_with("cf")); for (cf_name, cf_change) in cf_config { if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default @@ -2040,7 +2040,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_bytes_per_sec") + .extract_if(|(name, _)| name == "rate_bytes_per_sec") .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); @@ -2049,7 +2049,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_limiter_auto_tuned") + .extract_if(|(name, _)| name == "rate_limiter_auto_tuned") .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); @@ -2058,7 +2058,7 @@ impl ConfigManager for DbConfigManger { } if let Some(size) = change - .drain_filter(|(name, _)| name == "write_buffer_limit") + .extract_if(|(name, _)| name == "write_buffer_limit") .next() { let size: ReadableSize = size.1.into(); @@ -2066,14 +2066,14 @@ impl ConfigManager for DbConfigManger { } if let Some(f) = change - .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") + .extract_if(|(name, _)| name == "write_buffer_flush_oldest_first") .next() { self.db.set_flush_oldest_first(f.1.into())?; } if let Some(background_jobs_config) = change - .drain_filter(|(name, _)| name == "max_background_jobs") + .extract_if(|(name, _)| name == "max_background_jobs") .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); @@ -2081,7 +2081,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_subcompactions_config) = change - .drain_filter(|(name, _)| name == "max_sub_compactions") + .extract_if(|(name, _)| name == "max_sub_compactions") .next() { let max_subcompactions: u32 = background_subcompactions_config.1.into(); @@ -2090,7 +2090,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_flushes_config) = change - .drain_filter(|(name, _)| name == "max_background_flushes") + .extract_if(|(name, _)| name == "max_background_flushes") .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 02f45d353115..7d2d7e9e9477 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -285,7 +285,7 @@ pub fn tls_collect_scan_details(cmd: ReqTag, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index fcd16f9b9478..874917130e48 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -64,11 +64,13 @@ type HandlerStreamStepResult = Result<(Option, bool)>; #[async_trait] pub trait RequestHandler: Send { /// Processes current request and produces a response. + #[allow(clippy::diverging_sub_expression)] async fn handle_request(&mut self) -> Result> { panic!("unary request is not supported for this handler"); } /// Processes current request and produces streaming responses. + #[allow(clippy::diverging_sub_expression)] async fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { panic!("streaming request is not supported for this handler"); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6d40ffe959c8..1a670c917ca9 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -66,9 +66,9 @@ const REQUEST_WRITE_CONCURRENCY: usize = 16; /// bytes. In detail, they are: /// - 2 bytes for the request type (Tag+Value). /// - 2 bytes for every string or bytes field (Tag+Length), they are: -/// . + the key field -/// . + the value field -/// . + the CF field (None for CF_DEFAULT) +/// . + the key field +/// . + the value field +/// . + the CF field (None for CF_DEFAULT) /// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). /// - 2 bytes for the request itself (which would be embedded into a /// [`RaftCmdRequest`].) diff --git a/src/lib.rs b/src/lib.rs index b3e9ebaf8e84..aafb099c6cc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,13 +23,14 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(drain_filter)] +#![feature(extract_if)] #![feature(deadline_api)] #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] #![allow(incomplete_features)] #![feature(return_position_impl_trait_in_trait)] +#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index cf17aea81ebd..1ee1d108edc6 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1096,7 +1096,7 @@ fn get_tablet_cache( "tablet load failed, region_state {:?}", region_state.get_state() ); - return Err(box_err!(e)); + Err(box_err!(e)) } } } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 665824a1baca..fe5a252b8dbc 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -826,6 +826,7 @@ pub mod test_utils { use crate::storage::kv::RocksEngine as StorageRocksEngine; /// Do a global GC with the given safe point. + #[allow(clippy::needless_pass_by_ref_mut)] pub fn gc_by_compact(engine: &mut StorageRocksEngine, _: &[u8], safe_point: u64) { let engine = engine.get_rocksdb(); // Put a new key-value pair to ensure compaction can be triggered correctly. diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index be18f8216d5b..d2dc6532200a 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -546,7 +546,9 @@ impl GcMan ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); - let Some(region) = region else { return Ok(None) }; + let Some(region) = region else { + return Ok(None); + }; let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); @@ -807,7 +809,7 @@ mod tests { // Following code asserts gc_tasks == expected_gc_tasks. assert_eq!(gc_tasks.len(), expected_gc_tasks.len()); - let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks.into_iter()).all( + let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks).all( |((region, safe_point), (expect_region, expect_safe_point))| { region == expect_region && safe_point == expect_safe_point.into() }, @@ -884,7 +886,7 @@ mod tests { #[test] fn test_auto_gc_rewinding() { - for regions in vec![ + for regions in [ // First region starts with empty and last region ends with empty. vec![ (b"".to_vec(), b"1".to_vec(), 1), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index c608470ba87a..de40975632f2 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -254,7 +254,7 @@ fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Ve let mut keys_in_region = Vec::new(); loop { - let Some(key) = keys.peek() else {break}; + let Some(key) = keys.peek() else { break }; let key = key.as_encoded().as_slice(); if key < region.get_start_key() { @@ -552,7 +552,7 @@ impl GcRunner { let mut keys = keys.into_iter().peekable(); for region in regions { let mut raw_modifies = MvccRaw::new(); - let mut snapshot = self.get_snapshot(self.store_id, ®ion)?; + let snapshot = self.get_snapshot(self.store_id, ®ion)?; let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); let mut next_gc_key = keys_in_region.next(); @@ -563,7 +563,7 @@ impl GcRunner { &range_start_key, &range_end_key, &mut raw_modifies, - &mut snapshot, + &snapshot, &mut gc_info, ) { GC_KEY_FAILURES.inc(); @@ -615,7 +615,7 @@ impl GcRunner { range_start_key: &Key, range_end_key: &Key, raw_modifies: &mut MvccRaw, - kv_snapshot: &mut ::Snap, + kv_snapshot: &::Snap, gc_info: &mut GcInfo, ) -> Result<()> { let start_key = key.clone().append_ts(safe_point.prev()); @@ -669,10 +669,7 @@ impl GcRunner { } pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { - let stats = self - .stats_map - .entry(key_mode) - .or_insert_with(Default::default); + let stats = self.stats_map.entry(key_mode).or_default(); stats } @@ -2269,7 +2266,6 @@ mod tests { fn generate_keys(start: u64, end: u64) -> Vec { (start..end) - .into_iter() .map(|i| { let key = format!("k{:02}", i); Key::from_raw(key.as_bytes()) diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 9583df80dd67..938dfaff8a66 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -361,20 +361,15 @@ impl DetectTable { } /// The role of the detector. -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Default, PartialEq, Clone, Copy)] pub enum Role { /// The node is the leader of the detector. Leader, /// The node is a follower of the leader. + #[default] Follower, } -impl Default for Role { - fn default() -> Role { - Role::Follower - } -} - impl From for Role { fn from(role: StateRole) -> Role { match role { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 2074d469310f..f5b36dffbac3 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -306,6 +306,7 @@ struct WriteResFeed { unsafe impl Send for WriteResFeed {} impl WriteResFeed { + #[allow(clippy::arc_with_non_send_sync)] fn pair() -> (Self, WriteResSub) { let core = Arc::new(WriteResCore { ev: AtomicU8::new(0), @@ -581,7 +582,9 @@ where tx.notify(res); } rx.inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index dacc90a91f0e..81143e6c2be8 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -291,7 +291,9 @@ impl tikv_kv::Engine for RaftKv2 { early_err: res.err(), }) .inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index d9b17c5d35c0..73a15983bd08 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -269,7 +269,9 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); - let Some((_, mut system)) = self.system.take() else { return }; + let Some((_, mut system)) = self.system.take() else { + return; + }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); system.shutdown(); } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index d0b715542d57..497d8240684d 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -300,7 +300,6 @@ where let debugger = self.debugger.clone(); let res = self.pool.spawn(async move { - let req = req; debugger .compact( req.get_db(), diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 8e77d65233ea..413e36a6645d 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -612,7 +612,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -639,7 +639,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -662,7 +662,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec!["2019/08/23 18:09:53.387 +08:00"] + let expected = ["2019/08/23 18:09:53.387 +08:00"] .iter() .map(|s| timestamp(s)) .collect::>(); @@ -671,7 +671,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# expected ); - for time in vec![0, i64::MAX].into_iter() { + for time in [0, i64::MAX].into_iter() { let log_iter = LogIterator::new( &log_file, timestamp("2019/08/23 18:09:53.387 +08:00"), @@ -680,7 +680,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:09:59.387 +08:00", "2019/08/23 18:10:06.387 +08:00", @@ -704,7 +704,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![regex::Regex::new(".*test-filter.*").unwrap()], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:10:06.387 +08:00", // for invalid line ] @@ -783,7 +783,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# req.set_end_time(i64::MAX); req.set_levels(vec![LogLevel::Warn as _]); req.set_patterns(vec![".*test-filter.*".to_string()].into()); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:11:58.387 +08:00", "2019/08/23 18:11:59.387 +08:00", // for invalid line @@ -796,9 +796,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# s.collect::>() .await .into_iter() - .map(|mut resp| resp.take_messages().into_iter()) - .into_iter() - .flatten() + .flat_map(|mut resp| resp.take_messages().into_iter()) .map(|msg| msg.get_time()) .collect::>() }); diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 8a84eaf6293c..12494e9e7c4e 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -601,7 +601,7 @@ mod tests { ] ); // memory - for name in vec!["virtual", "swap"].into_iter() { + for name in ["virtual", "swap"].into_iter() { let item = collector .iter() .find(|x| x.get_tp() == "memory" && x.get_name() == name); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 5a4327ba46e2..6f1cf0eaa1f2 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -889,7 +889,6 @@ impl Tikv for Service { forward_duplex!(self.proxy, batch_commands, ctx, stream, sink); let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); - let ctx = Arc::new(ctx); let peer = ctx.peer(); let storage = self.storage.clone(); let copr = self.copr.clone(); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 32c99867a3fd..1eba8cd81b73 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -387,9 +387,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { .. }, + ))))) )); // The tx should be dropped. rx.recv().unwrap_err(); @@ -422,9 +422,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::KeyIsLocked(_), + ))))) )); // Since the cancellation callback can fully execute only when it's successfully // removed from the lock waiting queues, it's impossible that `finish_request` diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index a81248fe9e25..68e0118610aa 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -110,12 +110,7 @@ impl Eq for LockWaitEntry {} impl PartialOrd for LockWaitEntry { fn partial_cmp(&self, other: &Self) -> Option { - // Reverse it since the priority queue is a max heap and we want to pop the - // minimal. - other - .parameters - .start_ts - .partial_cmp(&self.parameters.start_ts) + Some(self.cmp(other)) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e9477b56b0ff..d3b3e89a3f85 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -63,7 +63,7 @@ pub fn tls_collect_scan_details(cmd: CommandKind, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cb4057bfd7e2..b8224df696bb 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1946,7 +1946,7 @@ impl Storage { key_ranges.push(build_key_range(k.as_encoded(), k.as_encoded(), false)); (k, v) }) - .filter(|&(_, ref v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) + .filter(|(_, v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) .map(|(k, v)| match v { Ok(v) => { let (user_key, _) = F::decode_raw_key_owned(k, false).unwrap(); @@ -3892,9 +3892,9 @@ mod tests { let result = block_on(storage.get(Context::default(), Key::from_raw(b"x"), 100.into())); assert!(matches!( result, - Err(Error(box ErrorInner::Txn(txn::Error( - box txn::ErrorInner::Mvcc(mvcc::Error(box mvcc::ErrorInner::KeyIsLocked { .. })) - )))) + Err(Error(box ErrorInner::Txn(txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked { .. }, + )))))) )); } @@ -5744,7 +5744,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -5803,7 +5803,7 @@ mod tests { let mut total_bytes: u64 = 0; let mut is_first = true; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6116,7 +6116,7 @@ mod tests { #[test] fn test_raw_batch_put() { - for for_cas in vec![false, true].into_iter() { + for for_cas in [false, true].into_iter() { test_kv_format_impl!(test_raw_batch_put_impl(for_cas)); } } @@ -6245,7 +6245,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6260,7 +6260,7 @@ mod tests { } // Verify pairs in a batch - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, @@ -6292,7 +6292,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6310,7 +6310,7 @@ mod tests { let mut ids = vec![]; let cmds = test_data .iter() - .map(|&(ref k, _)| { + .map(|(k, _)| { let mut req = RawGetRequest::default(); req.set_context(ctx.clone()); req.set_key(k.clone()); @@ -6331,7 +6331,7 @@ mod tests { #[test] fn test_raw_batch_delete() { - for for_cas in vec![false, true].into_iter() { + for for_cas in [false, true].into_iter() { test_kv_format_impl!(test_raw_batch_delete_impl(for_cas)); } } @@ -6381,10 +6381,10 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data .iter() - .map(|&(ref k, ref v)| Some((k.clone(), v.clone()))) + .map(|(k, v)| Some((k.clone(), v.clone()))) .collect(); expect_multi_values( results, @@ -6512,7 +6512,7 @@ mod tests { // Scan pairs with key only let mut results: Vec> = test_data .iter() - .map(|&(ref k, _)| Some((k.clone(), vec![]))) + .map(|(k, _)| Some((k.clone(), vec![]))) .collect(); expect_multi_values( results.clone(), @@ -6909,7 +6909,7 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index cc4403229c13..474c789a31dc 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -1287,7 +1287,7 @@ mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (1..30).into_iter().step_by(2) { + for start_ts in (1..30).step_by(2) { must_prewrite_lock(&mut engine, k, k, start_ts); must_commit(&mut engine, k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 48158eda946a..61a366c12eea 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -418,11 +418,10 @@ impl MvccReader { estimated_versions_to_last_change, } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); - let Some(value) = self - .snapshot - .get_cf(CF_WRITE, &key_with_ts)? else { - return Ok(None); - }; + let Some(value) = self.snapshot.get_cf(CF_WRITE, &key_with_ts)? + else { + return Ok(None); + }; self.statistics.write.get += 1; let write = WriteRef::parse(&value)?.to_owned(); assert!( @@ -2421,7 +2420,7 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2430,7 +2429,7 @@ pub mod tests { engine.commit(k, 45, 46); // Write enough LOCK recrods - for start_ts in (50..80).into_iter().step_by(2) { + for start_ts in (50..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2485,7 +2484,7 @@ pub mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2522,7 +2521,7 @@ pub mod tests { engine.put(k, 1, 2); // 10 locks were put - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2549,7 +2548,7 @@ pub mod tests { feature_gate.set_version("6.1.0").unwrap(); set_tls_feature_gate(feature_gate); engine.delete(k, 51, 52); - for start_ts in (56..80).into_iter().step_by(2) { + for start_ts in (56..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } let feature_gate = FeatureGate::default(); @@ -2581,7 +2580,7 @@ pub mod tests { let k = b"k"; engine.put(k, 1, 2); - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } engine.rollback(k, 30); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 3437a1e5432d..2b0a8e13582a 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -633,7 +633,7 @@ impl ScanPolicy for LatestEntryPolicy { fn scan_latest_handle_lock( current_user_key: Key, - cfg: &mut ScannerConfig, + cfg: &ScannerConfig, cursors: &mut Cursors, statistics: &mut Statistics, ) -> Result> { @@ -1636,7 +1636,7 @@ mod latest_kv_tests { must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); must_commit(&mut engine, b"k4", 3, 7); - for start_ts in (10..30).into_iter().step_by(2) { + for start_ts in (10..30).step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 8c4ad5da08b0..aa635827961d 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -290,7 +290,7 @@ mod tests { RawEncodeSnapshot::from_snapshot(raw_mvcc_snapshot); // get_cf - for &(ref key, ref value, _) in &test_data[6..12] { + for (key, value, _) in &test_data[6..12] { let res = encode_snapshot.get_cf(CF_DEFAULT, &ApiV2::encode_raw_key(key, None)); assert_eq!(res.unwrap(), Some(value.to_owned())); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 64e22a13585a..713155f91608 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -766,7 +766,6 @@ fn async_commit_timestamps( #[cfg(not(feature = "failpoints"))] let injected_fallback = false; - let max_commit_ts = max_commit_ts; if (!max_commit_ts.is_zero() && min_commit_ts > max_commit_ts) || injected_fallback { warn!("commit_ts is too large, fallback to normal 2PC"; "key" => log_wrappers::Value::key(key.as_encoded()), @@ -1875,7 +1874,6 @@ pub mod tests { // At most 12 ops per-case. let ops_count = rg.gen::() % 12; let ops = (0..ops_count) - .into_iter() .enumerate() .map(|(i, _)| { if i == 0 { diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 9a54895e7e20..61dbdac65652 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -88,8 +88,8 @@ mod tests { fn test_atomic_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let raw_keys = vec![b"ra", b"rz"]; - let raw_values = vec![b"valuea", b"valuez"]; + let raw_keys = [b"ra", b"rz"]; + let raw_values = [b"valuea", b"valuez"]; let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let mut modifies = vec![]; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 10446db6292b..2f39b29bc64c 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1853,9 +1853,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::AlreadyExist { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::AlreadyExist { .. }))) )); assert_eq!(cm.max_ts().into_inner(), 15); @@ -1878,9 +1876,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::WriteConflict { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. }))) )); } @@ -2286,9 +2282,9 @@ mod tests { .unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request @@ -2469,9 +2465,9 @@ mod tests { let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // Passing keys in different order gets the same result: let cmd = PrewritePessimistic::with_defaults( @@ -2492,9 +2488,9 @@ mod tests { let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // If the two keys are sent in different requests, it would be the client's duty diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index a662d9bab79c..549d1d226361 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -224,7 +224,7 @@ impl Latches { keep_latches_for_next_cmd: Option<(u64, &Lock)>, ) -> Vec { // Used to - let dummy_vec = vec![]; + let dummy_vec = []; let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), None => (None, dummy_vec.iter().peekable()), @@ -282,9 +282,9 @@ mod tests { fn test_wakeup() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k3", "k5"]; + let keys_a = ["k1", "k3", "k5"]; let mut lock_a = Lock::new(keys_a.iter()); - let keys_b = vec!["k4", "k5", "k6"]; + let keys_b = ["k4", "k5", "k6"]; let mut lock_b = Lock::new(keys_b.iter()); let cid_a: u64 = 1; let cid_b: u64 = 2; @@ -310,9 +310,9 @@ mod tests { fn test_wakeup_by_multi_cmds() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k4", "k5", "k6"]; - let keys_c = vec!["k3", "k4"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k4", "k5", "k6"]; + let keys_c = ["k3", "k4"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); @@ -353,10 +353,10 @@ mod tests { fn test_wakeup_by_small_latch_slot() { let latches = Latches::new(5); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k6", "k7", "k8"]; - let keys_c = vec!["k3", "k4"]; - let keys_d = vec!["k7", "k10"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k6", "k7", "k8"]; + let keys_c = ["k3", "k4"]; + let keys_d = ["k7", "k10"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 197363043730..2ca3ef145c87 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -267,7 +267,7 @@ pub fn tls_collect_scan_details(cmd: &'static str, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 0a5708c74ce0..3698860b4ea3 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -147,7 +147,7 @@ where I: 'static, { fn partial_cmp(&self, other: &Self) -> Option { - self.get_name().partial_cmp(other.get_name()) + Some(self.cmp(other)) } } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 92dacfe6dc9f..99f2c9ee1f42 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -61,7 +61,7 @@ where .unwrap(); } let write_data = WriteData::from_modifies(txn.into_modifies()); - let _ = tikv_kv::write(engine, &ctx, write_data, None); + let _ = futures::executor::block_on(tikv_kv::write(engine, &ctx, write_data, None)); let keys: Vec = kvs.iter().map(|(k, _)| Key::from_raw(k)).collect(); let snapshot = engine.snapshot(Default::default()).unwrap(); (snapshot, keys) diff --git a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs index 4c033f2a80df..241284a72282 100644 --- a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs +++ b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs @@ -79,7 +79,7 @@ impl ChunkBuilder { pub fn build(self, tps: &[FieldType]) -> Chunk { let mut fields = Vec::with_capacity(tps.len()); let mut arrays: Vec> = Vec::with_capacity(tps.len()); - for (field_type, column) in tps.iter().zip(self.columns.into_iter()) { + for (field_type, column) in tps.iter().zip(self.columns) { match field_type.as_accessor().tp() { FieldTypeTp::Tiny | FieldTypeTp::Short diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index d567edd5add9..a545d9935e64 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -171,6 +171,7 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { } #[bench] +#[allow(clippy::let_underscore_future)] fn bench_async_snapshot(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); @@ -205,6 +206,7 @@ fn bench_async_snapshot(b: &mut test::Bencher) { } #[bench] +#[allow(clippy::let_underscore_future)] fn bench_async_write(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 05c602824c20..e164d59f82a8 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -12,7 +12,7 @@ const DEFAULT_DATA_SIZE: usize = 100_000; fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { let mut wb = db.write_batch(); - for &(ref k, ref v) in kvs { + for (k, v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } wb.write().unwrap(); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index a9dbd36a81a6..f40f40e6af11 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,5 +1,8 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(clippy::arc_with_non_send_sync)] +#![allow(clippy::unnecessary_mut_passed)] +#[allow(clippy::let_underscore_future)] mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 217269bb5b85..55c06d87b070 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -35,7 +35,7 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { // check the region new leader is elected. fn assert_region_leader_changed( - cluster: &mut Cluster, + cluster: &Cluster, region_id: u64, original_leader: u64, ) { @@ -91,7 +91,7 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); fail::cfg(get_fp(usage, 1), "return").unwrap(); @@ -199,7 +199,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -393,7 +393,7 @@ fn test_disk_full_followers_with_hibernate_regions() { // check the region new leader is elected. fn assert_region_merged( - cluster: &mut Cluster, + cluster: &Cluster, left_region_key: &[u8], right_region_key: &[u8], ) { diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 073f72764194..2dd5b6ac04b4 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -57,6 +57,7 @@ fn test_write_buffer_manager() { } } +#[rustfmt::skip] // The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: // note: before rocksdb/pull/347, lock is called before on_memtable_sealed. // Case: diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 616a4e5e1965..74561396593f 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -93,6 +93,7 @@ fn test_break_leadership_on_restart() { // received, and become `GroupState::Ordered` after the proposal is received. // But they should keep wakeful for a while. #[test] +#[allow(clippy::let_underscore_future)] fn test_store_disconnect_with_hibernate() { let mut cluster = new_server_cluster(0, 3); let base_tick_ms = 50; diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 0115d6d7ba53..201aafce6fb5 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -43,6 +43,7 @@ macro_rules! request { } #[test] +#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let pd_client_reconnect_fp = "pd_client_reconnect"; diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index ac427c29e69d..583dad2ff34d 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -43,6 +43,7 @@ macro_rules! request { } #[test] +#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let client = Arc::new(client); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index a795422c120c..5ab7edb503f3 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -208,7 +208,7 @@ fn test_leader_transfer() { #[test] fn test_region_merge() { let mut suite = TestSuite::new(3, ApiVersion::V2); - let keys = vec![b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; + let keys = [b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; suite.must_raw_put(keys[1], b"v1"); suite.must_raw_put(keys[3], b"v3"); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index 7351044b2979..dc6906b668ae 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -4,11 +4,16 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; use test_raftstore_macro::test_case; +use tikv_util::config::ReadableDuration; -#[test_case(test_raftstore::must_new_cluster_and_kv_client)] -#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore::must_new_cluster_with_cfg_and_kv_client_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_with_cfg_and_kv_client_mul)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = new_cluster(); + let (_cluster, client, ctx) = new_cluster(1, |c| { + // set a small renew duration to avoid trigger pre-renew that can affact the + // metrics. + c.cfg.tikv.raft_store.renew_leader_lease_advance_duration = ReadableDuration::millis(1); + }); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 65c50793d7a6..10a65271462b 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1426,8 +1426,7 @@ impl Filter for TeeFilter { // 2. the splitted region set has_dirty_data be true in `apply_snapshot` // 3. the splitted region schedule tablet trim task in `on_applied_snapshot` // with tablet index 5 -// 4. the splitted region received a snapshot sent from its -// leader +// 4. the splitted region received a snapshot sent from its leader // 5. after finishing applying this snapshot, the tablet index in storage // changed to 6 // 6. tablet trim complete and callbacked to raftstore diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 57047bef9d41..4668c24ad661 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1620,9 +1620,7 @@ fn test_before_propose_deadline() { assert!( matches!( res, - Err(StorageError(box StorageErrorInner::Kv(KvError( - box KvErrorInner::Request(_), - )))) + Err(StorageError(box StorageErrorInner::Kv(KvError(box KvErrorInner::Request(_))))) ), "actual: {:?}", res diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 14f4161c7ae1..4154a764d99e 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -751,7 +751,7 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { let handle = std::thread::spawn(move || { let mut mutations = vec![]; - for key in vec![b"key3".to_vec(), b"key4".to_vec()] { + for key in [b"key3".to_vec(), b"key4".to_vec()] { let mut mutation = kvproto::kvrpcpb::Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(key); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 75eb62bab990..02fb8c046c84 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -361,8 +361,8 @@ fn test_read_lock_after_become_follower() { /// 1. Inserted 5 entries and make all stores commit and apply them. /// 2. Prevent the store 3 from append following logs. /// 3. Insert another 20 entries. -/// 4. Wait for some time so that part of the entry cache are compacted -/// on the leader(store 1). +/// 4. Wait for some time so that part of the entry cache are compacted on the +/// leader(store 1). macro_rules! run_cluster_for_test_warmup_entry_cache { ($cluster:expr) => { // Let the leader compact the entry cache. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 4cfd4be07be9..bd5461e6134f 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -492,6 +492,7 @@ fn test_backup_raw_meta() { } #[test] +#[allow(clippy::permissions_set_readonly_false)] fn test_invalid_external_storage() { let mut suite = TestSuite::new(1, 144 * 1024 * 1024, ApiVersion::V1); // Put some data. diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 3d8cf85b02cf..f821ffea2e7e 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -67,6 +67,6 @@ fn test_apply_twice() { &tikv, &ctx, CF_DEFAULT, - default_fst.into_iter().chain(default_snd.into_iter()), + default_fst.into_iter().chain(default_snd), ); } diff --git a/tests/integrations/mod.rs b/tests/integrations/mod.rs index 2b68c0a8ba94..86ceb5369e7a 100644 --- a/tests/integrations/mod.rs +++ b/tests/integrations/mod.rs @@ -4,6 +4,8 @@ #![feature(box_patterns)] #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::extra_unused_type_parameters)] extern crate test; diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 056641e1e3f8..30ea12a424b3 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -287,8 +287,8 @@ fn test_flush_before_stop2() { // 1. lock `k` with index 6 // 2. on_applied_res => lockcf's last_modified = 6 // 3. flush lock cf => lockcf's flushed_index = 6 -// 4. batch {unlock `k`, write `k`} with index 7 -// (last_modified is updated in store but RocksDB is modified in apply. So, +// 4. batch {unlock `k`, write `k`} with index 7 (last_modified is updated in +// store but RocksDB is modified in apply. So, // before on_apply_res, the last_modified is not updated.) // // flush-before-close: diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index fbc7629c73fe..56cb65cce87e 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -5,13 +5,13 @@ use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush(cluster: &Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { +fn flush_then_check(cluster: &Cluster, interval: u64, written: bool) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index d61d6a59182c..13e718b269d7 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -420,6 +420,7 @@ fn test_txn_query_stats_tmpl() { fail::remove("mock_collect_tick_interval"); } +#[allow(clippy::extra_unused_type_parameters)] fn raw_put( _cluster: &Cluster, client: &TikvClient, From 058336850ce52cd0eb2691931b92f10318529d09 Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 15 Sep 2023 14:55:39 +0800 Subject: [PATCH 049/203] stablize case test_store_disconnect_with_hibernate (#15596) close tikv/tikv#15607 None Signed-off-by: qupeng Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 5 +++++ tests/failpoints/cases/test_hibernate.rs | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 36c4c7e8e5fb..371e8cd8eb51 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2859,6 +2859,11 @@ where } fn reset_raft_tick(&mut self, state: GroupState) { + debug!( + "reset raft tick to {:?}", state; + "region_id"=> self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + ); self.fsm.reset_hibernate_state(state); self.fsm.missing_ticks = 0; self.fsm.peer.should_wake_up = false; diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 74561396593f..d8f73f312b66 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -102,10 +102,10 @@ fn test_store_disconnect_with_hibernate() { cluster.cfg.raft_store.raft_election_timeout_ticks = 10; cluster.cfg.raft_store.unreachable_backoff = ReadableDuration::millis(500); cluster.cfg.server.raft_client_max_backoff = ReadableDuration::millis(200); - // So the random election timeout will always be 10, which makes the case more - // stable. + // Use a small range but still random election timeouts, which makes the case + // more stable. cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; - cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; + cluster.cfg.raft_store.raft_max_election_timeout_ticks = 13; configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); let r = cluster.run_conf_change(); @@ -117,7 +117,7 @@ fn test_store_disconnect_with_hibernate() { must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); // Wait until all peers of region 1 hibernate. - thread::sleep(Duration::from_millis(base_tick_ms * 30)); + thread::sleep(Duration::from_millis(base_tick_ms * 40)); // Stop the region leader. fail::cfg("receive_raft_message_from_outside", "pause").unwrap(); @@ -129,7 +129,7 @@ fn test_store_disconnect_with_hibernate() { fail::remove("receive_raft_message_from_outside"); // Wait for a while. Peers of region 1 shouldn't hibernate. - thread::sleep(Duration::from_millis(base_tick_ms * 30)); + thread::sleep(Duration::from_millis(base_tick_ms * 40)); must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); } From 820ed9395b97853145fea4a21d6d906cbcd4d2fb Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Sat, 16 Sep 2023 14:42:09 +0800 Subject: [PATCH 050/203] tikv-ctl v2: get_all_regions_in_store excludes `tombstone` (#15522) ref tikv/tikv#14654 get_all_regions_in_store should exclude tombstone Signed-off-by: SpadeA-Tang Co-authored-by: tonyxuqqi --- cmd/tikv-ctl/src/executor.rs | 15 ++++-- src/server/debug2.rs | 94 +++++++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index a145118acead..a20d6ce26026 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1332,11 +1332,16 @@ impl DebugExecutor for DebuggerImplV2 { } fn get_region_size(&self, region: u64, cfs: Vec<&str>) -> Vec<(String, usize)> { - self.region_size(region, cfs) - .unwrap_or_else(|e| perror_and_exit("Debugger::region_size", e)) - .into_iter() - .map(|(cf, size)| (cf.to_owned(), size)) - .collect() + match self.region_size(region, cfs) { + Ok(v) => v + .into_iter() + .map(|(cf, size)| (cf.to_owned(), size)) + .collect(), + Err(e) => { + println!("Debugger::region_size: {}", e); + vec![] + } + } } fn get_region_info(&self, region: u64) -> RegionInfo { diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 1ee1d108edc6..7060b20bdb2b 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -688,19 +688,19 @@ impl Debugger for DebuggerImplV2 { fn region_size>(&self, region_id: u64, cfs: Vec) -> Result> { match self.raft_engine.get_region_state(region_id, u64::MAX) { Ok(Some(region_state)) => { - if region_state.get_state() != PeerState::Normal { - return Err(Error::NotFound(format!( - "region {:?} has been deleted", - region_id - ))); - } let region = region_state.get_region(); + let state = region_state.get_state(); let start_key = &keys::data_key(region.get_start_key()); let end_key = &keys::data_end_key(region.get_end_key()); let mut sizes = vec![]; let mut tablet_cache = get_tablet_cache(&self.tablet_reg, region.id, Some(region_state))?; - let tablet = tablet_cache.latest().unwrap(); + let Some(tablet) = tablet_cache.latest() else { + return Err(Error::NotFound(format!( + "tablet not found, region_id={:?}, peer_state={:?}", + region_id, state + ))); + }; for cf in cfs { let mut size = 0; box_try!(tablet.scan(cf.as_ref(), start_key, end_key, false, |k, v| { @@ -731,7 +731,7 @@ impl Debugger for DebuggerImplV2 { )); } - let mut region_states = get_all_region_states_with_normal_state(&self.raft_engine); + let mut region_states = get_all_active_region_states(&self.raft_engine); region_states.sort_by(|r1, r2| { r1.get_region() @@ -786,12 +786,21 @@ impl Debugger for DebuggerImplV2 { fn get_all_regions_in_store(&self) -> Result> { let mut region_ids = vec![]; + let raft_engine = &self.raft_engine; self.raft_engine .for_each_raft_group::(&mut |region_id| { + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state == PeerState::Tombstone { + return Ok(()); + } region_ids.push(region_id); Ok(()) }) .unwrap(); + region_ids.sort_unstable(); Ok(region_ids) } @@ -844,21 +853,29 @@ impl Debugger for DebuggerImplV2 { Err(e) => return Err(Error::EngineTrait(e)), }; - if region_state.state != PeerState::Normal { - return Err(Error::NotFound(format!("none region {:?}", region_id))); + let state = region_state.get_state(); + if state == PeerState::Tombstone { + return Err(Error::NotFound(format!( + "region {:?} is tombstone", + region_id + ))); } - let region = region_state.get_region(); - let start = keys::enc_start_key(region); - let end = keys::enc_end_key(region); - - let mut tablet_cache = - get_tablet_cache(&self.tablet_reg, region.id, Some(region_state.clone())).unwrap(); - let tablet = tablet_cache.latest().unwrap(); + let region = region_state.get_region().clone(); + let start = keys::enc_start_key(®ion); + let end = keys::enc_end_key(®ion); + + let mut tablet_cache = get_tablet_cache(&self.tablet_reg, region.id, Some(region_state))?; + let Some(tablet) = tablet_cache.latest() else { + return Err(Error::NotFound(format!( + "tablet not found, region_id={:?}, peer_state={:?}", + region_id, state + ))); + }; let mut res = dump_write_cf_properties(tablet, &start, &end)?; let mut res1 = dump_default_cf_properties(tablet, &start, &end)?; res.append(&mut res1); - let middle_key = match box_try!(get_region_approximate_middle(tablet, region)) { + let middle_key = match box_try!(get_region_approximate_middle(tablet, ®ion)) { Some(data_key) => keys::origin_key(&data_key).to_vec(), None => Vec::new(), }; @@ -1102,9 +1119,7 @@ fn get_tablet_cache( } } -fn get_all_region_states_with_normal_state( - raft_engine: &ER, -) -> Vec { +fn get_all_active_region_states(raft_engine: &ER) -> Vec { let mut region_states = vec![]; raft_engine .for_each_raft_group::(&mut |region_id| { @@ -1112,7 +1127,7 @@ fn get_all_region_states_with_normal_state( .get_region_state(region_id, u64::MAX) .unwrap() .unwrap(); - if region_state.state == PeerState::Normal { + if region_state.state != PeerState::Tombstone { region_states.push(region_state); } Ok(()) @@ -1133,7 +1148,7 @@ fn deivde_regions_for_concurrency( registry: &TabletRegistry, threads: u64, ) -> Result>> { - let region_states = get_all_region_states_with_normal_state(raft_engine); + let region_states = get_all_active_region_states(raft_engine); if threads == 1 { return Ok(vec![ @@ -1452,6 +1467,7 @@ mod tests { let mut wb = raft_engine.log_batch(10); wb.put_region_state(region_id, 10, &state).unwrap(); raft_engine.consume(&mut wb, true).unwrap(); + debugger.tablet_reg.remove(region_id); debugger.region_size(region_id, cfs.clone()).unwrap_err(); } @@ -1930,9 +1946,9 @@ mod tests { assert_eq!(region_info_2, region_info_2_before); } - #[test] // It tests that the latest apply state cannot be read as it is invisible // on persisted_applied + #[test] fn test_drop_unapplied_raftlog_2() { let dir = test_util::temp_dir("test-debugger", false); let debugger = new_debugger(dir.path()); @@ -1968,4 +1984,34 @@ mod tests { 80 ); } + + #[test] + fn test_get_all_regions_in_store() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + + init_region_state(raft_engine, 1, &[100, 101], 1); + init_region_state(raft_engine, 3, &[100, 101], 1); + init_region_state(raft_engine, 4, &[100, 101], 1); + + let mut lb = raft_engine.log_batch(3); + + let mut put_tombsotne_region = |region_id: u64| { + let mut region = metapb::Region::default(); + region.set_id(region_id); + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Tombstone); + region_state.set_region(region.clone()); + lb.put_region_state(region_id, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + raft_engine.consume(&mut lb, true).unwrap(); + }; + + put_tombsotne_region(2); + put_tombsotne_region(5); + + let regions = debugger.get_all_regions_in_store().unwrap(); + assert_eq!(regions, vec![1, 3, 4]); + } } From 086965358f0109340b84261695fbeaccce3a62e2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 18 Sep 2023 18:06:11 +0800 Subject: [PATCH 051/203] raftstore-v2: report async snapshot metrics to prometheus (#15562) ref tikv/tikv#15401 report async snapshot metrics to prometheus Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/query/local.rs | 8 ++++- src/server/raftkv/mod.rs | 2 +- src/server/raftkv2/mod.rs | 32 ++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 5f6d589eca6d..fcc93636640d 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -28,6 +28,7 @@ use raftstore::{ use slog::{debug, Logger}; use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now, Either}; use time::Timespec; +use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS}; use txn_types::WriteBatchFlags; use crate::{ @@ -335,7 +336,12 @@ where async move { let (mut fut, mut reader) = match res { - Either::Left(Ok(snap)) => return Ok(snap), + Either::Left(Ok(snap)) => { + GLOBAL_TRACKERS.with_tracker(get_tls_tracker_token(), |t| { + t.metrics.local_read = true; + }); + return Ok(snap); + } Either::Left(Err(e)) => return Err(e), Either::Right((fut, reader)) => (fut, reader), }; diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index f5b36dffbac3..58287c2bb834 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -686,7 +686,7 @@ where tracker.metrics.read_index_propose_wait_nanos as f64 / 1_000_000_000.0, ); - // snapshot may be hanlded by lease read in raftstore + // snapshot may be handled by lease read in raftstore if tracker.metrics.read_index_confirm_wait_nanos > 0 { ASYNC_REQUESTS_DURATIONS_VEC .snapshot_read_index_confirm diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 81143e6c2be8..9785e821312f 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -28,6 +28,7 @@ use raftstore_v2::{ }; use tikv_kv::{Modify, WriteEvent}; use tikv_util::time::Instant; +use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS}; use txn_types::{TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::{ @@ -172,7 +173,7 @@ impl tikv_kv::Engine for RaftKv2 { .set_key_ranges(mem::take(&mut ctx.key_ranges).into()); } ASYNC_REQUESTS_COUNTER_VEC.snapshot.all.inc(); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut header = new_request_header(ctx.pb_ctx); let mut flags = 0; @@ -200,9 +201,32 @@ impl tikv_kv::Engine for RaftKv2 { let res = f.await; match res { Ok(snap) => { - ASYNC_REQUESTS_DURATIONS_VEC - .snapshot - .observe(begin_instant.saturating_elapsed_secs()); + let elapse = begin_instant.saturating_elapsed_secs(); + let tracker = get_tls_tracker_token(); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + if tracker.metrics.read_index_propose_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_propose_wait + .observe( + tracker.metrics.read_index_propose_wait_nanos as f64 + / 1_000_000_000.0, + ); + // snapshot may be handled by lease read in raftstore + if tracker.metrics.read_index_confirm_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_confirm + .observe( + tracker.metrics.read_index_confirm_wait_nanos as f64 + / 1_000_000_000.0, + ); + } + } else if tracker.metrics.local_read { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_local_read + .observe(elapse); + } + }); + ASYNC_REQUESTS_DURATIONS_VEC.snapshot.observe(elapse); ASYNC_REQUESTS_COUNTER_VEC.snapshot.success.inc(); Ok(snap) } From 4a5fb7321ca2ee2bab0b31f6556c8fb196a590f4 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 18 Sep 2023 18:24:11 +0800 Subject: [PATCH 052/203] test: make test test_destroy_missing more stable (#15616) close tikv/tikv#15615 Signed-off-by: glorv Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore-v2/src/worker/tablet.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 0b0429eb8d17..ef9739226e75 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -602,6 +602,13 @@ impl Runner { } } +#[cfg(test)] +impl Runner { + pub fn get_running_task_count(&self) -> usize { + self.low_pri_pool.get_running_task_count() + } +} + impl Runnable for Runner where EK: KvEngine, @@ -822,6 +829,14 @@ mod tests { runner.run(Task::destroy(r_1, 100)); assert!(path.exists()); registry.remove(r_1); + // waiting for async `pause_background_work` to be finished, + // this task can block tablet's destroy. + for _i in 0..100 { + if runner.get_running_task_count() == 0 { + break; + } + std::thread::sleep(Duration::from_millis(5)); + } runner.on_timeout(); assert!(!path.exists()); assert!(runner.pending_destroy_tasks.is_empty()); From 2db4b895a1e82d32830493eb10cea30925f65c7e Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 20 Sep 2023 10:40:42 +0800 Subject: [PATCH 053/203] raftstore-v2: fix rollback merge and commit merge can happen simultaneously (#15625) ref tikv/tikv#15242 fix rollback merge and commit merge can happen simultaneously Signed-off-by: SpadeA-Tang --- .../operation/command/admin/merge/commit.rs | 10 +- .../operation/command/admin/merge/rollback.rs | 12 +- components/raftstore-v2/src/raft/peer.rs | 10 ++ tests/failpoints/cases/test_merge.rs | 147 ++++++++++++++++++ 4 files changed, 177 insertions(+), 2 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 5bd92e3ea1c0..5208dcc96a86 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -319,7 +319,7 @@ impl Peer { region ); assert!(!self.storage().has_dirty_data()); - if self.is_leader() { + if self.is_leader() && !self.leader_transferring() { let index = commit_of_merge(req.get_admin_request().get_commit_merge()); if self.proposal_control().is_merging() { // `on_admin_command` may delay our request indefinitely. It's better to check @@ -341,12 +341,19 @@ impl Peer { "res" => ?res, ); } else { + fail::fail_point!("on_propose_commit_merge_success"); return; } } let _ = store_ctx .router .force_send(source_id, PeerMsg::RejectCommitMerge { index }); + } else if self.leader_transferring() { + info!( + self.logger, + "not to propose commit merge when transferring leader"; + "transferee" => self.leader_transferee(), + ); } } else { info!( @@ -362,6 +369,7 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) -> Result { + (|| fail::fail_point!("propose_commit_merge_1", store_ctx.store_id == 1, |_| {}))(); let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::COMMIT_MERGE); let data = req.write_to_bytes().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index cb45fdcf1cf1..d931a295f4d6 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -38,6 +38,7 @@ impl Peer { store_ctx: &mut StoreContext, index: u64, ) { + fail::fail_point!("on_reject_commit_merge_1", store_ctx.store_id == 1, |_| {}); let self_index = self.merge_context().and_then(|c| c.prepare_merge_index()); if self_index != Some(index) { info!( @@ -75,7 +76,7 @@ impl Apply { pub fn apply_rollback_merge( &mut self, req: &AdminRequest, - _index: u64, + index: u64, ) -> Result<(AdminResponse, AdminCmdResult)> { fail::fail_point!("apply_rollback_merge"); PEER_ADMIN_CMD_COUNTER.rollback_merge.all.inc(); @@ -95,6 +96,15 @@ impl Apply { "state" => ?merge_state, ); } + + let prepare_merge_commit = rollback.commit; + info!( + self.logger, + "execute RollbackMerge"; + "commit" => prepare_merge_commit, + "index" => index, + ); + let mut region = self.region().clone(); let version = region.get_region_epoch().get_version(); // Update version to avoid duplicated rollback requests. diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index c3a80e3756c2..87d41de776c1 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -862,6 +862,16 @@ impl Peer { ) } + #[inline] + pub fn leader_transferee(&self) -> u64 { + self.leader_transferee + } + + #[inline] + pub fn leader_transferring(&self) -> bool { + self.leader_transferee != raft::INVALID_ID + } + #[inline] pub fn long_uncommitted_threshold(&self) -> Duration { Duration::from_secs(self.long_uncommitted_threshold) diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index eb6b8a235e1d..08b7474bb8e6 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -3,6 +3,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, + mpsc::{channel, Sender}, *, }, thread, @@ -19,6 +20,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::store::*; +use raftstore_v2::router::PeerMsg; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; @@ -1706,3 +1708,148 @@ fn test_destroy_source_peer_while_merging() { must_get_equal(&cluster.get_engine(i), b"k5", b"v5"); } } + +struct MsgTimeoutFilter { + tx: Sender, +} + +impl Filter for MsgTimeoutFilter { + fn before(&self, msgs: &mut Vec) -> raftstore::Result<()> { + let mut res = Vec::with_capacity(msgs.len()); + for m in msgs.drain(..) { + if m.get_message().msg_type == MessageType::MsgTimeoutNow { + self.tx.send(m).unwrap(); + } else { + res.push(m); + } + } + + *msgs = res; + check_messages(msgs) + } +} + +// Concurrent execution between transfer leader and merge can cause rollback and +// commit merge at the same time before this fix which corrupt the region. +// It can happen as this: +// Assume at the begin, leader of source and target are both on node-1 +// 1. node-1 transfer leader to node-2: execute up to sending MsgTimeoutNow +// (leader_transferre has been set), but before becoming follower. +// 2. node-1 source region propose, and apply PrepareMerge +// 3. node-1 target region propose CommitMerge but fail (due to +// leader_transferre being set) +// 4. node-1 source region successfully proposed rollback merge +// 5. node-2 target region became leader and apply the first no-op entry +// 6. node-2 target region successfully proposed commit merge +// Now, rollback at source region and commit at target region are both proposed +// and will be executed which will cause region corrupt +#[test] +fn test_concurrent_between_transfer_leader_and_merge() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.run(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + for i in 0..3 { + must_get_equal(&cluster.get_engine(i + 1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); + } + + let pd_client = Arc::clone(&cluster.pd_client); + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let right = pd_client.get_region(b"k1").unwrap(); + let left = pd_client.get_region(b"k3").unwrap(); + cluster.must_transfer_leader( + left.get_id(), + left.get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + cluster.must_transfer_leader( + right.get_id(), + right + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + // Source region: 1, Target Region: 1000 + // Let target region in leader_transfering status by interceptting MsgTimeoutNow + // msg by using Filter. So we make node-1-1000 be in leader_transferring status + // for some time. + let (tx, rx_msg) = channel(); + let filter = MsgTimeoutFilter { tx }; + cluster.add_send_filter_on_node(1, Box::new(filter)); + + pd_client.transfer_leader( + right.get_id(), + right + .get_peers() + .iter() + .find(|p| p.store_id == 2) + .cloned() + .unwrap(), + vec![], + ); + + let msg = rx_msg.recv().unwrap(); + + // Now, node-1-1000 is in leader_transferring status. After it reject proposing + // commit merge, make node-1-1 block before proposing rollback merge until + // node-2-1000 propose commit merge. + + fail::cfg("on_reject_commit_merge_1", "pause").unwrap(); + + let router = cluster.get_router(2).unwrap(); + let (tx, rx) = channel(); + let _ = fail::cfg_callback("propose_commit_merge_1", move || { + tx.send(()).unwrap(); + }); + + let (tx2, rx2) = channel(); + let _ = fail::cfg_callback("on_propose_commit_merge_success", move || { + tx2.send(()).unwrap(); + }); + + cluster.merge_region(left.get_id(), right.get_id(), Callback::None); + + // Actually, store 1 should not reach the line of propose_commit_merge_1 + let _ = rx.recv_timeout(Duration::from_secs(2)); + router + .force_send(msg.get_region_id(), PeerMsg::RaftMessage(Box::new(msg))) + .unwrap(); + + // Wait region 1 of node 2 to become leader + rx2.recv().unwrap(); + fail::remove("on_reject_commit_merge_1"); + + let timer = Instant::now(); + loop { + if right.get_region_epoch().get_version() + == cluster.get_region_epoch(right.get_id()).get_version() + { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("region {:?} is still not merged.", right); + } + } else { + break; + } + sleep_ms(10); + } + + let region = pd_client.get_region(b"k1").unwrap(); + assert_eq!(region.get_id(), right.get_id()); + assert_eq!(region.get_start_key(), right.get_start_key()); + assert_eq!(region.get_end_key(), left.get_end_key()); + + cluster.must_put(b"k4", b"v4"); +} From ec4a9002f153f86c609e902ba685eee7a1224e6c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 20 Sep 2023 11:51:13 +0800 Subject: [PATCH 054/203] raftstore: upgrade tokio timer to fix insecure issues. (#15622) ref tikv/tikv#15621 Signed-off-by: lucasliang --- Cargo.lock | 42 ++++++++++++--------------------- Cargo.toml | 1 + components/tikv_util/Cargo.toml | 2 +- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb5e711d34da..34f9c3819584 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1290,7 +1290,7 @@ dependencies = [ "crossbeam-deque", "crossbeam-epoch", "crossbeam-queue", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1300,7 +1300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1311,7 +1311,7 @@ checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1322,7 +1322,7 @@ checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ "autocfg", "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "lazy_static", "memoffset 0.6.4", "scopeguard", @@ -1335,7 +1335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1346,21 +1346,10 @@ checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "scopeguard", ] -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if 0.1.10", - "lazy_static", -] - [[package]] name = "crossbeam-utils" version = "0.8.8" @@ -1989,7 +1978,7 @@ dependencies = [ "bcc", "collections", "crc32fast", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "fs2", "lazy_static", "libc 0.2.146", @@ -4839,7 +4828,7 @@ checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "num_cpus", ] @@ -7136,11 +7125,10 @@ dependencies = [ [[package]] name = "tokio-executor" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb2d1b8f4548dbf5e1f7818512e9c406860678f29c300cdf0ebac72d1a3a1671" +version = "0.1.9" +source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#4394380fa3c1f7f2c702a4ccc5ff01384746fdfd" dependencies = [ - "crossbeam-utils 0.7.2", + "crossbeam-utils", "futures 0.1.31", ] @@ -7201,9 +7189,9 @@ dependencies = [ [[package]] name = "tokio-timer" version = "0.2.13" -source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#e8ac149d93f4a9bf49ea569d8d313ee40c5eb448" +source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#4394380fa3c1f7f2c702a4ccc5ff01384746fdfd" dependencies = [ - "crossbeam-utils 0.7.2", + "crossbeam-utils", "futures 0.1.31", "slab", "tokio-executor", @@ -7377,7 +7365,7 @@ name = "tracker" version = "0.0.1" dependencies = [ "collections", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "kvproto", "lazy_static", "parking_lot 0.12.1", @@ -7907,7 +7895,7 @@ source = "git+https://github.com/tikv/yatp.git?branch=master#5572a78702572087cab dependencies = [ "crossbeam-deque", "crossbeam-skiplist", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "dashmap", "fail", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 5bc49b17e423..c4c70e999be9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -368,6 +368,7 @@ tipb = { git = "https://github.com/pingcap/tipb.git" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 6de354fa2595..b502a701136c 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -57,7 +57,7 @@ thiserror = "1.0" tikv_alloc = { workspace = true } time = "0.1" tokio = { version = "1.5", features = ["rt-multi-thread"] } -tokio-executor = "0.1" +tokio-executor = { workspace = true } tokio-timer = { workspace = true } tracker = { workspace = true } url = "2" From 76df17e2c67e139a79653293b566d604a94a0352 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Wed, 20 Sep 2023 14:22:43 +0800 Subject: [PATCH 055/203] log backup: fix the race of on events and do flush (#15618) close tikv/tikv#15602 Signed-off-by: 3pointer --- components/backup-stream/src/router.rs | 132 ++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 16 deletions(-) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 6ce8486109f7..b2fd9acc743b 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -708,22 +708,25 @@ impl TempFileKey { /// The full name of the file owns the key. fn temp_file_name(&self) -> String { + let timestamp = (|| { + fail::fail_point!("temp_file_name_timestamp", |t| t.map_or_else( + || TimeStamp::physical_now(), + |v| + // reduce the precision of timestamp + v.parse::().ok().map_or(0, |u| TimeStamp::physical_now() / u) + )); + TimeStamp::physical_now() + })(); + let uuid = uuid::Uuid::new_v4(); if self.is_meta { format!( - "meta_{:08}_{}_{:?}_{}.temp.log", - self.region_id, - self.cf, - self.cmd_type, - TimeStamp::physical_now(), + "meta_{:08}_{}_{:?}_{:?}_{}.temp.log", + self.region_id, self.cf, self.cmd_type, uuid, timestamp, ) } else { format!( - "{:08}_{:08}_{}_{:?}_{}.temp.log", - self.table_id, - self.region_id, - self.cf, - self.cmd_type, - TimeStamp::physical_now(), + "{:08}_{:08}_{}_{:?}_{:?}_{}.temp.log", + self.table_id, self.region_id, self.cf, self.cmd_type, uuid, timestamp, ) } } @@ -864,6 +867,7 @@ impl StreamTaskInfo { } async fn on_events_of_key(&self, key: TempFileKey, events: ApplyEvents) -> Result<()> { + fail::fail_point!("before_generate_temp_file"); if let Some(f) = self.files.read().await.get(&key) { self.total_size .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); @@ -886,6 +890,7 @@ impl StreamTaskInfo { let f = w.get(&key).unwrap(); self.total_size .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); + fail::fail_point!("after_write_to_file"); Ok(()) } @@ -970,7 +975,9 @@ impl StreamTaskInfo { pub async fn move_to_flushing_files(&self) -> Result<&Self> { // if flushing_files is not empty, which represents this flush is a retry // operation. - if !self.flushing_files.read().await.is_empty() { + if !self.flushing_files.read().await.is_empty() + || !self.flushing_meta_files.read().await.is_empty() + { return Ok(self); } @@ -1032,7 +1039,12 @@ impl StreamTaskInfo { // and push it into merged_file_info(DataFileGroup). file_info_clone.set_range_offset(stat_length); data_files_open.push({ - let file = shared_pool.open_raw_for_read(data_file.inner.path())?; + let file = shared_pool + .open_raw_for_read(data_file.inner.path()) + .context(format_args!( + "failed to open read file {:?}", + data_file.inner.path() + ))?; let compress_length = file.len().await?; stat_length += compress_length; file_info_clone.set_range_length(compress_length); @@ -1097,7 +1109,6 @@ impl StreamTaskInfo { .await?; self.merge_log(metadata, storage.clone(), &self.flushing_meta_files, true) .await?; - Ok(()) } @@ -1157,7 +1168,8 @@ impl StreamTaskInfo { UnpinReader(Box::new(Cursor::new(meta_buff))), buflen as _, ) - .await?; + .await + .context(format_args!("flush meta {:?}", meta_path))?; } Ok(()) } @@ -1191,13 +1203,14 @@ impl StreamTaskInfo { .await? .generate_metadata(store_id) .await?; + + fail::fail_point!("after_moving_to_flushing_files"); crate::metrics::FLUSH_DURATION .with_label_values(&["generate_metadata"]) .observe(sw.lap().as_secs_f64()); // flush log file to storage. self.flush_log(&mut metadata_info).await?; - // the field `min_resolved_ts` of metadata will be updated // only after flush is done. metadata_info.min_resolved_ts = metadata_info @@ -2413,4 +2426,91 @@ mod tests { let r = cfg_manager.dispatch(changed); assert!(r.is_err()); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn test_flush_on_events_race() -> Result<()> { + let (tx, _rx) = dummy_scheduler(); + let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); + let router = Arc::new(RouterInner::new( + tx, + Config { + prefix: tmp.clone(), + // disable auto flush. + temp_file_size_limit: 1000, + temp_file_memory_quota: 2, + max_flush_interval: Duration::from_secs(300), + }, + )); + + let (task, _path) = task("race".to_owned()).await?; + must_register_table(router.as_ref(), task, 1).await; + router + .must_mut_task_info("race", |i| { + i.storage = Arc::new(NoopStorage::default()); + }) + .await; + let mut b = KvEventsBuilder::new(42, 0); + b.put_table(CF_DEFAULT, 1, b"k1", b"v1"); + let events_before_flush = b.finish(); + + b.put_table(CF_DEFAULT, 1, b"k1", b"v1"); + let events_after_flush = b.finish(); + + // make timestamp precision to 1 seconds. + fail::cfg("temp_file_name_timestamp", "return(1000)").unwrap(); + + let (trigger_tx, trigger_rx) = std::sync::mpsc::sync_channel(0); + let trigger_rx = std::sync::Mutex::new(trigger_rx); + + let (fp_tx, fp_rx) = std::sync::mpsc::sync_channel(0); + let fp_rx = std::sync::Mutex::new(fp_rx); + + let t = router.get_task_info("race").await.unwrap(); + let _ = router.on_events(events_before_flush).await; + + // make generate temp files ***happen after*** moving files to flushing_files + // and read flush file ***happen between*** genenrate file name and + // write kv to file. T1 is write thread. T2 is flush thread + // The order likes + // [T1] generate file name -> [T2] moving files to flushing_files -> [T1] write + // kv to file -> [T2] read flush file. + fail::cfg_callback("after_write_to_file", move || { + fp_tx.send(()).unwrap(); + }) + .unwrap(); + + fail::cfg_callback("before_generate_temp_file", move || { + trigger_rx.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + + fail::cfg_callback("after_moving_to_flushing_files", move || { + trigger_tx.send(()).unwrap(); + fp_rx.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + + // set flush status to true, because we disabled the auto flush. + t.set_flushing_status(true); + let router_clone = router.clone(); + let _ = tokio::join!( + // do flush in another thread + tokio::spawn(async move { + router_clone.do_flush("race", 42, TimeStamp::max()).await; + }), + router.on_events(events_after_flush) + ); + fail::remove("after_write_to_file"); + fail::remove("before_generate_temp_file"); + fail::remove("after_moving_to_flushing_files"); + fail::remove("temp_file_name_timestamp"); + + // set flush status to true, because we disabled the auto flush. + t.set_flushing_status(true); + let res = router.do_flush("race", 42, TimeStamp::max()).await; + // this time flush should success. + assert!(res.is_some()); + assert_eq!(t.files.read().await.len(), 0,); + Ok(()) + } } From 641f9b8dab1d8770ef5fded564490f8dbc094b74 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 20 Sep 2023 14:42:13 +0800 Subject: [PATCH 056/203] metrics: make disk usage clearer in the grafana (#15583) close tikv/tikv#15582 add metrics for detail disk usage. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/metrics.rs | 24 +++++++++++-- .../src/operation/command/write/ingest.rs | 3 ++ .../raftstore-v2/src/worker/pd/store.rs | 30 ++++++++-------- components/raftstore/src/store/fsm/store.rs | 4 ++- components/raftstore/src/store/worker/pd.rs | 36 +++++++++---------- components/sst_importer/src/sst_importer.rs | 19 +++++++++- metrics/grafana/tikv_details.json | 29 ++++++++++++++- 7 files changed, 106 insertions(+), 39 deletions(-) diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index 4e185658f156..7e7121170d66 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -48,6 +48,20 @@ make_static_metric! { try_connect, } + pub label_enum StoreSizeEventType { + capacity, + available, + used, + snap_size, + raft_size, + kv_size, + import_size, + } + + pub struct StoreSizeEventIntrVec: IntGauge { + "type" => StoreSizeEventType, + } + pub struct PDRequestEventHistogramVec: Histogram { "type" => PDRequestEventType, } @@ -101,8 +115,14 @@ lazy_static! { &["type"] ) .unwrap(); - pub static ref STORE_SIZE_GAUGE_VEC: IntGaugeVec = - register_int_gauge_vec!("tikv_store_size_bytes", "Size of storage.", &["type"]).unwrap(); + pub static ref STORE_SIZE_EVENT_INT_VEC: StoreSizeEventIntrVec = + register_static_int_gauge_vec!( + StoreSizeEventIntrVec, + "tikv_store_size_bytes", + "Size of storage.", + &["type"] + ) + .unwrap(); pub static ref REGION_READ_KEYS_HISTOGRAM: Histogram = register_histogram!( "tikv_region_read_keys", "Histogram of keys written for regions", diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 92f5923d1679..e963434fe837 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -4,6 +4,7 @@ use collections::HashMap; use crossbeam::channel::TrySendError; use engine_traits::{data_cf_offset, KvEngine, RaftEngine, DATA_CFS_LEN}; use kvproto::import_sstpb::SstMeta; +use pd_client::metrics::STORE_SIZE_EVENT_INT_VEC; use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, Result, @@ -39,6 +40,8 @@ impl Store { &mut self, ctx: &mut StoreContext, ) -> Result<()> { + let import_size = box_try!(ctx.sst_importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); if ssts.is_empty() { return Ok(()); diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index a5aad42d85c4..b3fd3245be6d 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -9,7 +9,7 @@ use kvproto::pdpb; use pd_client::{ metrics::{ REGION_READ_BYTES_HISTOGRAM, REGION_READ_KEYS_HISTOGRAM, REGION_WRITTEN_BYTES_HISTOGRAM, - REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_GAUGE_VEC, + REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_EVENT_INT_VEC, }, PdClient, }; @@ -263,15 +263,9 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["capacity"]) - .set(capacity as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["available"]) - .set(available as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["used"]) - .set(used_size as i64); + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); // Update slowness statistics self.update_slowness_in_store_stats(&mut stats, last_query_sum); @@ -473,12 +467,16 @@ where true }); let snap_size = self.snap_mgr.total_snap_size().unwrap(); - let used_size = snap_size - + kv_size - + self - .raft_engine - .get_engine_size() - .expect("raft engine used size"); + let raft_size = self + .raft_engine + .get_engine_size() + .expect("engine used size"); + + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_size as i64); + + let used_size = snap_size + kv_size + raft_size; let mut available = capacity.checked_sub(used_size).unwrap_or_default(); // We only care about rocksdb SST file size, so we should check disk available // here. diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 53559bbe1b83..2434dfdd8e63 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -43,7 +43,7 @@ use kvproto::{ raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{Feature, FeatureGate, PdClient}; +use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -2791,6 +2791,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); let mut validate_ssts = Vec::new(); + let import_size = box_try!(self.ctx.importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index cb067ca840b8..6aa192bd28e2 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1359,15 +1359,9 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["capacity"]) - .set(capacity as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["available"]) - .set(available as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["used"]) - .set(used_size as i64); + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); let slow_score = self.slow_score.get(); stats.set_slow_score(slow_score as u64); @@ -2590,15 +2584,21 @@ fn collect_engine_size( } else { store_info.capacity }; - let used_size = snap_mgr_size - + store_info - .kv_engine - .get_engine_used_size() - .expect("kv engine used size") - + store_info - .raft_engine - .get_engine_size() - .expect("raft engine used size"); + let raft_size = store_info + .raft_engine + .get_engine_size() + .expect("raft engine used size"); + + let kv_size = store_info + .kv_engine + .get_engine_used_size() + .expect("kv engine used size"); + + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_mgr_size as i64); + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + + let used_size = snap_mgr_size + kv_size + raft_size; let mut available = capacity.checked_sub(used_size).unwrap_or_default(); // We only care about rocksdb SST file size, so we should check disk available // here. diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 502a81ff6a68..910cfa602dd7 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -4,7 +4,7 @@ use std::{ borrow::Cow, collections::HashMap, fs::File, - io::{self, BufReader, Read}, + io::{self, BufReader, ErrorKind, Read}, ops::Bound, path::{Path, PathBuf}, sync::{ @@ -293,6 +293,23 @@ impl SstImporter { path.save } + pub fn get_total_size(&self) -> Result { + let mut total_size = 0; + for entry in file_system::read_dir(self.dir.get_root_dir())? { + match entry.and_then(|e| e.metadata().map(|m| (e, m))) { + Ok((_, m)) => { + if !m.is_file() { + continue; + } + total_size += m.len(); + } + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + }; + } + Ok(total_size) + } + pub fn create(&self, meta: &SstMeta) -> Result { match self.dir.create(meta, self.key_manager.clone()) { Ok(f) => { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c31ee12b27b8..57c887820316 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -422,9 +422,36 @@ "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"used\"}) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-used", "refId": "A", "step": 10 + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"kv_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-kv_size", + "refId": "B", + "step": 10, + "hide": true + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"raft_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-raft_size", + "refId": "C", + "step": 10, + "hide": true + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"import_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-import_size", + "refId": "D", + "step": 10, + "hide": true } ], "thresholds": [], From 10f51d8478e488dcef026b4d2e7fdeea80f478eb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Sep 2023 14:55:43 +0800 Subject: [PATCH 057/203] resolved_ts: remove hash set to save memory (#15554) close tikv/tikv#15553 The Resolver uses a hash set to keep track of locks associated with the same timestamp. When the length of the hash set reaches zero, it indicates that the transaction has been fully committed. To save memory, we can replace the hash set with an integer. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 26 ++- components/cdc/src/initializer.rs | 14 +- components/resolved_ts/src/endpoint.rs | 38 ++-- components/resolved_ts/src/resolver.rs | 210 ++++++++++++------ 4 files changed, 181 insertions(+), 107 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 0803ba1b99a6..2dae8ce745d9 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -8,7 +8,7 @@ use dashmap::{ }; use kvproto::metapb::Region; use raftstore::coprocessor::*; -use resolved_ts::{Resolver, TsSource}; +use resolved_ts::{Resolver, TsSource, TxnLocks}; use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; @@ -99,7 +99,7 @@ impl ActiveSubscription { pub enum CheckpointType { MinTs, StartTsOfInitialScan, - StartTsOfTxn(Option>), + StartTsOfTxn(Option<(TimeStamp, TxnLocks)>), } impl std::fmt::Debug for CheckpointType { @@ -109,10 +109,7 @@ impl std::fmt::Debug for CheckpointType { Self::StartTsOfInitialScan => write!(f, "StartTsOfInitialScan"), Self::StartTsOfTxn(arg0) => f .debug_tuple("StartTsOfTxn") - .field(&format_args!( - "{}", - utils::redact(&arg0.as_ref().map(|x| x.as_ref()).unwrap_or(&[])) - )) + .field(&format_args!("{:?}", arg0)) .finish(), } } @@ -466,9 +463,11 @@ impl std::fmt::Debug for FutureLock { impl TwoPhaseResolver { /// try to get one of the key of the oldest lock in the resolver. - pub fn sample_far_lock(&self) -> Option> { - let (_, keys) = self.resolver.locks().first_key_value()?; - keys.iter().next().cloned() + pub fn sample_far_lock(&self) -> Option<(TimeStamp, TxnLocks)> { + self.resolver + .locks() + .first_key_value() + .map(|(ts, txn_locks)| (*ts, txn_locks.clone())) } pub fn in_phase_one(&self) -> bool { @@ -572,6 +571,7 @@ mod test { use kvproto::metapb::{Region, RegionEpoch}; use raftstore::coprocessor::ObserveHandle; + use resolved_ts::TxnLocks; use txn_types::TimeStamp; use super::{SubscriptionTracer, TwoPhaseResolver}; @@ -674,7 +674,13 @@ mod test { ( region(4, 8, 1), 128.into(), - StartTsOfTxn(Some(Arc::from(b"Alpi".as_slice()))) + StartTsOfTxn(Some(( + TimeStamp::new(128), + TxnLocks { + lock_count: 1, + sample_lock: Some(Arc::from(b"Alpi".as_slice())), + } + ))) ), ] ); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 31cda4b9e729..504eab621ffc 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -575,7 +575,6 @@ mod tests { time::Duration, }; - use collections::HashSet; use engine_rocks::RocksEngine; use engine_traits::{MiscExt, CF_WRITE}; use futures::{executor::block_on, StreamExt}; @@ -584,6 +583,7 @@ mod tests { errorpb::Error as ErrorHeader, }; use raftstore::{coprocessor::ObserveHandle, router::CdcRaftRouter, store::RegionSnapshot}; + use resolved_ts::TxnLocks; use test_raftstore::MockRaftStoreRouter; use tikv::storage::{ kv::Engine, @@ -681,7 +681,7 @@ mod tests { fn test_initializer_build_resolver() { let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); - let mut expected_locks = BTreeMap::>>::new(); + let mut expected_locks = BTreeMap::::new(); // Only observe ["", "b\0x90"] let observed_range = ObservedRange::new( @@ -704,10 +704,12 @@ mod tests { total_bytes += v.len(); let ts = TimeStamp::new(i as _); must_prewrite_put(&mut engine, k, v, k, ts); - expected_locks - .entry(ts) - .or_default() - .insert(k.to_vec().into()); + let txn_locks = expected_locks.entry(ts).or_insert_with(|| { + let mut txn_locks = TxnLocks::default(); + txn_locks.sample_lock = Some(k.to_vec().into()); + txn_locks + }); + txn_locks.lock_count += 1; } let region = Region::default(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 600da207ec43..406d931ed7f5 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -42,7 +42,7 @@ use crate::{ metrics::*, resolver::{LastAttempt, Resolver}, scanner::{ScanEntries, ScanTask, ScannerPool}, - Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, + Error, Result, TsSource, TxnLocks, ON_DROP_WARN_HEAP_SIZE, }; /// grace period for identifying identifying slow resolved-ts and safe-ts. @@ -388,11 +388,11 @@ where E: KvEngine, S: StoreRegionMeta, { - fn is_leader(&self, store_id: Option, leader_store_id: Option) -> bool { - store_id.is_some() && store_id == leader_store_id - } - fn collect_stats(&mut self) -> Stats { + fn is_leader(store_id: Option, leader_store_id: Option) -> bool { + store_id.is_some() && store_id == leader_store_id + } + let store_id = self.get_or_init_store_id(); let mut stats = Stats::default(); self.region_read_progress.with(|registry| { @@ -407,10 +407,10 @@ where continue; } - if self.is_leader(store_id, leader_store_id) { + if is_leader(store_id, leader_store_id) { // leader resolved-ts if resolved_ts < stats.min_leader_resolved_ts.resolved_ts { - let resolver = self.regions.get(region_id).map(|x| &x.resolver); + let resolver = self.regions.get_mut(region_id).map(|x| &mut x.resolver); stats .min_leader_resolved_ts .set(*region_id, resolver, &core, &leader_info); @@ -1186,7 +1186,7 @@ struct LeaderStats { last_resolve_attempt: Option, applied_index: u64, // min lock in LOCK CF - min_lock: Option<(TimeStamp, Key)>, + min_lock: Option<(TimeStamp, TxnLocks)>, lock_num: Option, txn_num: Option, } @@ -1211,7 +1211,7 @@ impl LeaderStats { fn set( &mut self, region_id: u64, - resolver: Option<&Resolver>, + mut resolver: Option<&mut Resolver>, region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, leader_info: &LeaderInfo, ) { @@ -1222,21 +1222,13 @@ impl LeaderStats { duration_to_last_update_ms: region_read_progress .last_instant_of_update_ts() .map(|i| i.saturating_elapsed().as_millis() as u64), - last_resolve_attempt: resolver.and_then(|r| r.last_attempt.clone()), - min_lock: resolver.and_then(|r| { - r.oldest_transaction().map(|(ts, keys)| { - ( - *ts, - keys.iter() - .next() - .map(|k| Key::from_encoded_slice(k.as_ref())) - .unwrap_or_else(|| Key::from_encoded_slice("no_keys_found".as_ref())), - ) - }) - }), + last_resolve_attempt: resolver.as_mut().and_then(|r| r.take_last_attempt()), + min_lock: resolver + .as_ref() + .and_then(|r| r.oldest_transaction().map(|(t, tk)| (*t, tk.clone()))), applied_index: region_read_progress.applied_index(), - lock_num: resolver.map(|r| r.num_locks()), - txn_num: resolver.map(|r| r.num_transactions()), + lock_num: resolver.as_ref().map(|r| r.num_locks()), + txn_num: resolver.as_ref().map(|r| r.num_transactions()), }; } } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 85e7acff4a4a..239ef5666053 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -2,7 +2,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; -use collections::{HashMap, HashSet}; +use collections::{HashMap, HashMapEntry}; use raftstore::store::RegionReadProgress; use tikv_util::{ memory::{HeapSize, MemoryQuota, MemoryQuotaExceeded}, @@ -12,13 +12,12 @@ use txn_types::{Key, TimeStamp}; use crate::metrics::*; -const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB #[derive(Clone)] pub enum TsSource { // A lock in LOCK CF - Lock(Arc<[u8]>), + Lock(TxnLocks), // A memory lock in concurrency manager MemoryLock(Key), PdTso, @@ -41,13 +40,38 @@ impl TsSource { pub fn key(&self) -> Option { match self { - TsSource::Lock(k) => Some(Key::from_encoded_slice(k)), + TsSource::Lock(locks) => locks + .sample_lock + .as_ref() + .map(|k| Key::from_encoded_slice(k)), TsSource::MemoryLock(k) => Some(k.clone()), _ => None, } } } +#[derive(Default, Clone, PartialEq, Eq)] +pub struct TxnLocks { + pub lock_count: usize, + // A sample key in a transaction. + pub sample_lock: Option>, +} + +impl std::fmt::Debug for TxnLocks { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TxnLocks") + .field("lock_count", &self.lock_count) + .field( + "sample_lock", + &self + .sample_lock + .as_ref() + .map(|k| log_wrappers::Value::key(k)), + ) + .finish() + } +} + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -55,7 +79,7 @@ pub struct Resolver { // key -> start_ts locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. - pub(crate) lock_ts_heap: BTreeMap>>, + lock_ts_heap: BTreeMap, // The last shrink time. last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. @@ -71,7 +95,7 @@ pub struct Resolver { // The memory quota for the `Resolver` and its lock keys and timestamps. memory_quota: Arc, // The last attempt of resolve(), used for diagnosis. - pub(crate) last_attempt: Option, + last_attempt: Option, } #[derive(Clone)] @@ -107,13 +131,14 @@ impl std::fmt::Debug for Resolver { let mut dt = f.debug_tuple("Resolver"); dt.field(&format_args!("region={}", self.region_id)); - if let Some((ts, keys)) = far_lock { + if let Some((ts, txn_locks)) = far_lock { + dt.field(&format_args!( + "oldest_lock_count={:?}", + txn_locks.lock_count + )); dt.field(&format_args!( - "oldest_lock={:?}", - keys.iter() - // We must use Display format here or the redact won't take effect. - .map(|k| format!("{}", log_wrappers::Value::key(k))) - .collect::>() + "oldest_lock_sample={:?}", + txn_locks.sample_lock )); dt.field(&format_args!("oldest_lock_ts={:?}", ts)); } @@ -180,7 +205,7 @@ impl Resolver { self.stopped } - pub fn locks(&self) -> &BTreeMap>> { + pub fn locks(&self) -> &BTreeMap { &self.lock_ts_heap } @@ -219,13 +244,13 @@ impl Resolver { } self.locks_by_key.len() * (key_bytes / key_count + std::mem::size_of::()) + self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) + * (std::mem::size_of::() + std::mem::size_of::()) } fn lock_heap_size(&self, key: &[u8]) -> usize { // A resolver has // * locks_by_key: HashMap, TimeStamp> - // * lock_ts_heap: BTreeMap>> + // * lock_ts_heap: BTreeMap // // We only count memory used by locks_by_key. Because the majority of // memory is consumed by keys, locks_by_key and lock_ts_heap shares @@ -235,7 +260,7 @@ impl Resolver { key.heap_size() + std::mem::size_of::() } - fn shrink_ratio(&mut self, ratio: usize, timestamp: Option) { + fn shrink_ratio(&mut self, ratio: usize) { // HashMap load factor is 87% approximately, leave some margin to avoid // frequent rehash. // @@ -246,10 +271,6 @@ impl Resolver { { self.locks_by_key.shrink_to_fit(); } - if let Some(ts) = timestamp && let Some(lock_set) = self.lock_ts_heap.get_mut(&ts) - && lock_set.capacity() > lock_set.len() * cmp::max(MIN_SHRINK_RATIO, ratio) { - lock_set.shrink_to_fit(); - } } pub fn track_lock( @@ -273,8 +294,23 @@ impl Resolver { ); self.memory_quota.alloc(bytes)?; let key: Arc<[u8]> = key.into_boxed_slice().into(); - self.locks_by_key.insert(key.clone(), start_ts); - self.lock_ts_heap.entry(start_ts).or_default().insert(key); + match self.locks_by_key.entry(key) { + HashMapEntry::Occupied(_) => { + // Free memory quota because it's already in the map. + self.memory_quota.free(bytes); + } + HashMapEntry::Vacant(entry) => { + // Add lock count for the start ts. + let txn_locks = self.lock_ts_heap.entry(start_ts).or_insert_with(|| { + let mut txn_locks = TxnLocks::default(); + txn_locks.sample_lock = Some(entry.key().clone()); + txn_locks + }); + txn_locks.lock_count += 1; + + entry.insert(start_ts); + } + } Ok(()) } @@ -301,22 +337,17 @@ impl Resolver { "memory_in_use" => self.memory_quota.in_use(), ); - let mut shrink_ts = None; - if let Some(locked_keys) = self.lock_ts_heap.get_mut(&start_ts) { - // Only shrink large set, because committing a small transaction is - // fast and shrink adds unnecessary overhead. - const SHRINK_SET_CAPACITY: usize = 256; - if locked_keys.capacity() > SHRINK_SET_CAPACITY { - shrink_ts = Some(start_ts); + if let Some(txn_locks) = self.lock_ts_heap.get_mut(&start_ts) { + if txn_locks.lock_count > 0 { + txn_locks.lock_count -= 1; } - locked_keys.remove(key); - if locked_keys.is_empty() { + if txn_locks.lock_count == 0 { self.lock_ts_heap.remove(&start_ts); } - } + }; // Use a large ratio to amortize the cost of rehash. let shrink_ratio = 8; - self.shrink_ratio(shrink_ratio, shrink_ts); + self.shrink_ratio(shrink_ratio); } /// Try to advance resolved ts. @@ -333,7 +364,7 @@ impl Resolver { const AGGRESSIVE_SHRINK_RATIO: usize = 2; const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); if self.last_aggressive_shrink_time.saturating_elapsed() > AGGRESSIVE_SHRINK_INTERVAL { - self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO, None); + self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO); self.last_aggressive_shrink_time = Instant::now_coarse(); } @@ -344,17 +375,17 @@ impl Resolver { } // Find the min start ts. - let min_lock = self - .oldest_transaction() - .and_then(|(ts, locks)| locks.iter().next().map(|lock| (*ts, lock))); + let min_lock = self.oldest_transaction(); let has_lock = min_lock.is_some(); - let min_start_ts = min_lock.map(|(ts, _)| ts).unwrap_or(min_ts); + let min_start_ts = min_lock.as_ref().map(|(ts, _)| **ts).unwrap_or(min_ts); // No more commit happens before the ts. let new_resolved_ts = cmp::min(min_start_ts, min_ts); // reason is the min source of the new resolved ts. let reason = match (min_lock, min_ts) { - (Some(lock), min_ts) if lock.0 < min_ts => TsSource::Lock(lock.1.clone()), + (Some((lock_ts, txn_locks)), min_ts) if *lock_ts < min_ts => { + TsSource::Lock(txn_locks.clone()) + } (Some(_), _) => source, (None, _) => source, }; @@ -400,21 +431,16 @@ impl Resolver { pub(crate) fn log_locks(&self, min_start_ts: u64) { // log lock with the minimum start_ts >= min_start_ts - if let Some((start_ts, keys)) = self + if let Some((start_ts, txn_locks)) = self .lock_ts_heap .range(TimeStamp::new(min_start_ts)..) .next() { - let keys_for_log = keys - .iter() - .map(|key| log_wrappers::Value::key(key)) - .take(MAX_NUMBER_OF_LOCKS_IN_LOG) - .collect::>(); info!( "locks with the minimum start_ts in resolver"; "region_id" => self.region_id, "start_ts" => start_ts, - "sampled_keys" => ?keys_for_log, + "txn_locks" => ?txn_locks, ); } } @@ -431,9 +457,13 @@ impl Resolver { self.read_progress.as_ref() } - pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &HashSet>)> { + pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &TxnLocks)> { self.lock_ts_heap.iter().next() } + + pub(crate) fn take_last_attempt(&mut self) -> Option { + self.last_attempt.take() + } } #[cfg(test)] @@ -608,32 +638,76 @@ mod tests { } #[test] - fn test_untrack_lock_set_shrink_ratio() { + fn test_idempotent_track_and_untrack_lock() { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let mut resolver = Resolver::new(1, memory_quota); let mut key = vec![0; 16]; - let ts = TimeStamp::new(1); - for i in 0..1000usize { - key[0..8].copy_from_slice(&i.to_be_bytes()); - let _ = resolver.track_lock(ts, key.clone(), None); + + // track_lock + let mut ts = TimeStamp::default(); + for c in 0..10 { + ts.incr(); + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + let in_use1 = resolver.memory_quota.in_use(); + let key_count1 = resolver.locks_by_key.len(); + let txn_count1 = resolver.lock_ts_heap.len(); + let txn_lock_count1 = resolver.lock_ts_heap[&ts].lock_count; + assert!(in_use1 > 0); + assert_eq!(key_count1, (c + 1) * 100); + assert_eq!(txn_count1, c + 1); + + // Put same keys again, resolver internal state must be idempotent. + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + let in_use2 = resolver.memory_quota.in_use(); + let key_count2 = resolver.locks_by_key.len(); + let txn_count2 = resolver.lock_ts_heap.len(); + let txn_lock_count2 = resolver.lock_ts_heap[&ts].lock_count; + assert_eq!(in_use1, in_use2); + assert_eq!(key_count1, key_count2); + assert_eq!(txn_count1, txn_count2); + assert_eq!(txn_lock_count1, txn_lock_count2); } - assert!( - resolver.lock_ts_heap[&ts].capacity() >= 1000, - "{}", - resolver.lock_ts_heap[&ts].capacity() - ); + assert_eq!(resolver.resolve(ts, None, TsSource::PdTso), 1.into()); - for i in 0..990usize { - key[0..8].copy_from_slice(&i.to_be_bytes()); - resolver.untrack_lock(&key, None); + // untrack_lock + let mut ts = TimeStamp::default(); + for _ in 0..10 { + ts.incr(); + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + let in_use1 = resolver.memory_quota.in_use(); + let key_count1 = resolver.locks_by_key.len(); + let txn_count1 = resolver.lock_ts_heap.len(); + + // Unlock same keys again, resolver internal state must be idempotent. + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + let in_use2 = resolver.memory_quota.in_use(); + let key_count2 = resolver.locks_by_key.len(); + let txn_count2 = resolver.lock_ts_heap.len(); + assert_eq!(in_use1, in_use2); + assert_eq!(key_count1, key_count2); + assert_eq!(txn_count1, txn_count2); + + assert_eq!(resolver.resolve(ts, None, TsSource::PdTso), ts); } - // shrink_to_fit may reserve some space in accordance with the resize - // policy, but it is expected to be less than 100. - assert!( - resolver.lock_ts_heap[&ts].capacity() < 500, - "{}, {}", - resolver.lock_ts_heap[&ts].capacity(), - resolver.lock_ts_heap[&ts].len(), - ); + + assert_eq!(resolver.memory_quota.in_use(), 0); + assert_eq!(resolver.locks_by_key.len(), 0); + assert_eq!(resolver.lock_ts_heap.len(), 0); } } From 6971a4635b6b3a27b5be3db0fc4c8200d995d605 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 20 Sep 2023 13:09:12 -0700 Subject: [PATCH 058/203] upgrade flatbuffers from 2.1.2 to 23.5.26 to address security issue (#15628) ref tikv/tikv#15621 The security issue is https://github.com/google/flatbuffers/issues/6627. Upgrade flatbuffers from 2.1.2 to 23.5.26 to address it. Signed-off-by: tonyxuqqi Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- Cargo.lock | 539 ++++++++++++++---- components/backup-stream/Cargo.toml | 2 +- components/backup-stream/src/router.rs | 1 + .../src/codec/mysql/time/mod.rs | 3 + .../src/codec/mysql/time/tz.rs | 4 + tests/Cargo.toml | 2 +- 6 files changed, 453 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34f9c3819584..4f35ae6b935b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -59,6 +59,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if 1.0.0", + "const-random", + "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -78,6 +80,21 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc 0.2.146", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -133,28 +150,215 @@ dependencies = [ [[package]] name = "arrow" -version = "13.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" +checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" dependencies = [ - "bitflags", + "ahash 0.8.3", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-array" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" +dependencies = [ + "ahash 0.8.3", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "hashbrown 0.14.0", + "num 0.4.1", +] + +[[package]] +name = "arrow-buffer" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" +dependencies = [ + "bytes", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-cast" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "chrono", + "half 2.3.1", + "lexical-core", + "num 0.4.1", +] + +[[package]] +name = "arrow-csv" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "csv", - "flatbuffers", - "half", - "hex 0.4.2", - "indexmap", + "csv-core", "lazy_static", "lexical-core", - "multiversion", - "num 0.4.0", - "rand 0.8.5", "regex", +] + +[[package]] +name = "arrow-data" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-ipc" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "indexmap 2.0.0", + "lexical-core", + "num 0.4.1", "serde", - "serde_derive", "serde_json", ] +[[package]] +name = "arrow-ord" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-row" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" +dependencies = [ + "ahash 0.8.3", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half 2.3.1", + "hashbrown 0.14.0", +] + +[[package]] +name = "arrow-schema" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" + +[[package]] +name = "arrow-select" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num 0.4.1", +] + +[[package]] +name = "arrow-string" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num 0.4.1", + "regex", + "regex-syntax 0.7.5", +] + [[package]] name = "async-channel" version = "1.6.1" @@ -407,7 +611,7 @@ dependencies = [ "bytes", "dyn-clone", "futures 0.3.15", - "getrandom 0.2.3", + "getrandom 0.2.10", "http-types", "log", "paste", @@ -591,7 +795,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", - "indexmap", + "indexmap 1.9.3", "kvproto", "lazy_static", "log_wrappers", @@ -807,9 +1011,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] name = "bytes" -version = "1.0.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" dependencies = [ "serde", ] @@ -908,11 +1112,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", + "libc 0.2.146", ] [[package]] @@ -984,14 +1189,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.11" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ - "num-integer", + "android-tzdata", + "iana-time-zone", + "js-sys", "num-traits", "serde", - "time 0.1.42", + "wasm-bindgen", + "windows-targets", ] [[package]] @@ -1039,7 +1247,7 @@ dependencies = [ "atty", "bitflags", "clap_derive", - "indexmap", + "indexmap 1.9.3", "lazy_static", "os_str_bytes", "strsim 0.10.0", @@ -1138,6 +1346,28 @@ dependencies = [ "cache-padded", ] +[[package]] +name = "const-random" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +dependencies = [ + "getrandom 0.2.10", + "once_cell", + "proc-macro-hack", + "tiny-keccak", +] + [[package]] name = "const_format" version = "0.2.30" @@ -1179,9 +1409,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpu-time" @@ -1360,6 +1590,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -1781,6 +2017,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.2.8" @@ -2063,13 +2305,12 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "2.1.2" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags", - "smallvec", - "thiserror", + "rustc_version 0.4.0", ] [[package]] @@ -2403,14 +2644,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if 1.0.0", "js-sys", "libc 0.2.146", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2514,7 +2755,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.3", "slab", "tokio", "tokio-util", @@ -2527,11 +2768,22 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" -version = "0.9.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" @@ -2751,6 +3003,29 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2776,12 +3051,22 @@ checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" [[package]] name = "indexmap" -version = "1.6.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown 0.9.1", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", ] [[package]] @@ -2798,7 +3083,7 @@ checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ "ahash 0.7.4", "atty", - "indexmap", + "indexmap 1.9.3", "itoa 1.0.1", "lazy_static", "log", @@ -2949,7 +3234,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" dependencies = [ - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -3085,6 +3370,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + [[package]] name = "libmimalloc-sys" version = "0.1.21" @@ -3438,26 +3729,6 @@ dependencies = [ "serde", ] -[[package]] -name = "multiversion" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" -dependencies = [ - "multiversion-macros", -] - -[[package]] -name = "multiversion-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.103", -] - [[package]] name = "mur3" version = "0.1.0" @@ -3604,15 +3875,15 @@ dependencies = [ [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", - "num-complex 0.4.1", + "num-complex 0.4.4", "num-integer", "num-iter", - "num-rational 0.4.0", + "num-rational 0.4.1", "num-traits", ] @@ -3638,9 +3909,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.1" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" dependencies = [ "num-traits", ] @@ -3668,9 +3939,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -3678,9 +3949,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" dependencies = [ "autocfg", "num-integer", @@ -3700,9 +3971,9 @@ dependencies = [ [[package]] name = "num-rational" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", "num-bigint", @@ -3712,11 +3983,12 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -3746,7 +4018,7 @@ checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ "base64 0.13.0", "chrono", - "getrandom 0.2.3", + "getrandom 0.2.10", "http", "rand 0.8.5", "serde", @@ -4037,7 +4309,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -4769,7 +5041,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", ] [[package]] @@ -4862,19 +5134,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "redox_syscall 0.2.11", ] [[package]] name = "regex" -version = "1.5.6" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.6.29", ] [[package]] @@ -4888,9 +5160,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.26" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "remove_dir_all" @@ -5433,7 +5711,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" dependencies = [ - "half", + "half 1.8.2", "serde", ] @@ -5463,7 +5741,7 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ - "indexmap", + "indexmap 1.9.3", "itoa 0.4.4", "ryu", "serde", @@ -7073,6 +7351,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinytemplate" version = "1.2.0" @@ -7274,7 +7561,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap", + "indexmap 1.9.3", "pin-project", "pin-project-lite", "rand 0.8.5", @@ -7512,7 +7799,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "serde", ] @@ -7522,7 +7809,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", ] [[package]] @@ -7598,12 +7885,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -7742,6 +8023,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.32.0" @@ -7761,21 +8051,42 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -7788,6 +8099,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -7800,6 +8117,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -7812,6 +8135,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -7824,12 +8153,24 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -7842,6 +8183,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winreg" version = "0.7.0" diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 8c1edc89a482..4f53c39b9dba 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -51,7 +51,7 @@ futures-io = "0.3" grpcio = { workspace = true } hex = "0.4" # Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 -indexmap = "=1.6.2" +indexmap = "=1.9.3" kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index b2fd9acc743b..ae4b98b16876 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -731,6 +731,7 @@ impl TempFileKey { } } + #[allow(deprecated)] fn format_date_time(ts: u64, t: FormatType) -> impl Display { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 44228f2d88e8..621d4384bcc0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1342,6 +1342,7 @@ impl Time { Ok((((ymd << 17) | hms) << 24) | u64::from(self.micro())) } + #[allow(deprecated)] pub fn from_duration( ctx: &mut EvalContext, duration: Duration, @@ -1415,6 +1416,7 @@ impl Time { .ok_or_else(|| Error::incorrect_datetime_value(self)) } + #[allow(deprecated)] pub fn normalized(self, ctx: &mut EvalContext) -> Result { if self.get_time_type() == TimeType::Timestamp { return Ok(self); @@ -1500,6 +1502,7 @@ impl Time { + self.day()) as i32 } + #[allow(deprecated)] pub fn weekday(self) -> Weekday { let date = if self.month() == 0 { NaiveDate::from_ymd(self.year() as i32 - 1, 12, 1) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 25b35a90fc0e..9dfc3ebf2886 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -120,6 +120,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_local_date(&self, local: &NaiveDate) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -134,6 +135,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -148,6 +150,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_utc_date(&self, utc: &NaiveDate) -> Date { match *self { Tz::Local(ref offset) => { @@ -165,6 +168,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_utc_datetime(&self, utc: &NaiveDateTime) -> DateTime { match *self { Tz::Local(ref offset) => { diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 158e56abcb1d..0081d5e95bc9 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -120,7 +120,7 @@ uuid = { version = "0.8.1", features = ["serde", "v4"] } procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] -arrow = "13.0" +arrow = "46.0" byteorder = "1.2" # See https://bheisler.github.io/criterion.rs/book/user_guide/known_limitations.html for the usage # of `real_blackbox` feature. From 533b205efd231f13ca716e40a0cc33fa59ee6809 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 21 Sep 2023 14:37:43 +0800 Subject: [PATCH 059/203] raft-engine: update raft-engine to newest version (#15559) close tikv/tikv#15462 Signed-off-by: glorv Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 23 +++++++++++++++++------ components/raft_log_engine/Cargo.toml | 3 +++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f35ae6b935b..f4adccf26fc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3927,6 +3927,17 @@ dependencies = [ "syn 1.0.103", ] +[[package]] +name = "num-derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + [[package]] name = "num-format" version = "0.4.0" @@ -4756,8 +4767,8 @@ dependencies = [ [[package]] name = "raft-engine" -version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" +version = "0.4.1" +source = "git+https://github.com/tikv/raft-engine.git#fa56f891fdf0b1cb5b7849b7bee3c5dadbb96103" dependencies = [ "byteorder", "crc32fast", @@ -4773,7 +4784,7 @@ dependencies = [ "lz4-sys", "memmap2 0.7.0", "nix 0.26.2", - "num-derive", + "num-derive 0.4.0", "num-traits", "parking_lot 0.12.1", "prometheus", @@ -4790,8 +4801,8 @@ dependencies = [ [[package]] name = "raft-engine-ctl" -version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" +version = "0.4.1" +source = "git+https://github.com/tikv/raft-engine.git#fa56f891fdf0b1cb5b7849b7bee3c5dadbb96103" dependencies = [ "clap 3.1.6", "env_logger 0.10.0", @@ -6873,7 +6884,7 @@ dependencies = [ "match-template", "nom 7.1.0", "num 0.3.0", - "num-derive", + "num-derive 0.3.0", "num-traits", "ordered-float", "protobuf", diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index e643089a8728..0e640991eea9 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -4,6 +4,9 @@ version = "0.0.1" publish = false edition = "2021" +[features] +failpoints = ["raft-engine/failpoints"] + [dependencies] encryption = { workspace = true } engine_traits = { workspace = true } From 241b8f53d3b35ba6b0ff5d905527f93528af192a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:22:45 +0800 Subject: [PATCH 060/203] raftstore-v2: support online change lock write buffer limit (#15632) ref tikv/tikv#14320 support online change lock write buffer limit Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 ++-- components/engine_rocks/src/cf_options.rs | 17 ++++++++++ src/config/configurable.rs | 17 ++++++++++ src/config/mod.rs | 38 +++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4adccf26fc6..f05b651b1ad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3388,7 +3388,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3407,7 +3407,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "bzip2-sys", "cc", @@ -5379,7 +5379,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index 1162c67f2106..6a2372fb31fb 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -40,6 +40,23 @@ impl RocksCfOptions { pub fn into_raw(self) -> RawCfOptions { self.0 } + + pub fn set_flush_size(&mut self, f: usize) -> Result<()> { + if let Some(m) = self.0.get_write_buffer_manager() { + m.set_flush_size(f); + } else { + return Err(box_err!("write buffer manager not found")); + } + Ok(()) + } + + pub fn get_flush_size(&self) -> Result { + if let Some(m) = self.0.get_write_buffer_manager() { + return Ok(m.flush_size() as u64); + } + + Err(box_err!("write buffer manager not found")) + } } impl Deref for RocksCfOptions { diff --git a/src/config/configurable.rs b/src/config/configurable.rs index 6fe9409c1c0c..c92b01cf4653 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -15,6 +15,7 @@ pub trait ConfigurableDb { fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes; fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes; fn set_flush_size(&self, f: usize) -> ConfigRes; + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes; fn set_flush_oldest_first(&self, f: bool) -> ConfigRes; fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes; fn set_high_priority_background_threads(&self, n: i32, allow_reduce: bool) -> ConfigRes; @@ -57,6 +58,11 @@ impl ConfigurableDb for RocksEngine { opt.set_flush_size(f).map_err(Box::from) } + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes { + let mut cf_option = self.get_options_cf(cf)?; + cf_option.set_flush_size(f).map_err(Box::from) + } + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { let mut opt = self.get_db_options(); opt.set_flush_oldest_first(f).map_err(Box::from) @@ -171,6 +177,17 @@ impl ConfigurableDb for TabletRegistry { }) } + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_cf_flush_size(cf, f)?; + Ok(false) + } else { + Ok(true) + } + }) + } + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { loop_registry(self, |cache| { if let Some(latest) = cache.latest() { diff --git a/src/config/mod.rs b/src/config/mod.rs index 6b3332fb0151..9b8ecad50f9c 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2031,6 +2031,15 @@ impl ConfigManager for DbConfigManger { cf_change.insert(name, value); } } + if let Some(f) = cf_change.remove("write_buffer_limit") { + if cf_name != CF_LOCK { + return Err( + "cf write buffer manager is only supportted for lock cf now".into() + ); + } + let size: ReadableSize = f.into(); + self.db.set_cf_flush_size(cf_name, size.0 as usize)?; + } if !cf_change.is_empty() { let cf_change = config_value_to_string(cf_change.into_iter().collect()); let cf_change_slice = config_to_slice(&cf_change); @@ -5167,6 +5176,7 @@ mod tests { cfg.rocksdb.defaultcf.block_cache_size = Some(ReadableSize::mb(8)); cfg.rocksdb.rate_bytes_per_sec = ReadableSize::mb(64); cfg.rocksdb.rate_limiter_auto_tuned = false; + cfg.rocksdb.lockcf.write_buffer_limit = Some(ReadableSize::mb(1)); cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); let db = storage.get_engine().get_rocksdb(); @@ -5209,6 +5219,34 @@ mod tests { let flush_size = db.get_db_options().get_flush_size().unwrap(); assert_eq!(flush_size, ReadableSize::mb(10).0); + cfg_controller + .update_config("rocksdb.lockcf.write-buffer-limit", "22MB") + .unwrap(); + let cf_opt = db.get_options_cf("lock").unwrap(); + let flush_size = cf_opt.get_flush_size().unwrap(); + assert_eq!(flush_size, ReadableSize::mb(22).0); + + cfg_controller + .update_config("rocksdb.lockcf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("lock").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + + cfg_controller + .update_config("rocksdb.writecf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("write").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + + cfg_controller + .update_config("rocksdb.defaultcf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("default").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + // update some configs on default cf let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), false); From 9b76ac97e1de01c1b0e70af406720b2c368d9624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:39:15 +0800 Subject: [PATCH 061/203] log-bakcup: make initial scan asynchronous (#15541) ref tikv/tikv#15410 This PR also removed some fields in `Endpoint`, now they should be in the `InitialDataLoader`. The latter will communicate with the former by messages. Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 156 ++++++++----- components/backup-stream/src/event_loader.rs | 215 ++++++++---------- .../backup-stream/src/subscription_manager.rs | 213 +++++++++-------- .../backup-stream/src/subscription_track.rs | 2 + components/backup-stream/src/utils.rs | 64 +----- .../backup-stream/tests/integration/mod.rs | 22 ++ components/backup-stream/tests/suite.rs | 12 +- components/raftstore/src/router.rs | 32 ++- src/config/mod.rs | 5 + 9 files changed, 357 insertions(+), 364 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index b11259d5be68..834a40f8bdd5 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,16 +1,24 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{any::Any, collections::HashSet, fmt, marker::PhantomData, sync::Arc, time::Duration}; +use std::{ + any::Any, + collections::HashSet, + fmt, + marker::PhantomData, + sync::{Arc, Mutex}, + time::Duration, +}; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use error_code::ErrorCodeExt; -use futures::{stream::AbortHandle, FutureExt}; +use futures::{stream::AbortHandle, FutureExt, TryFutureExt}; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, }; use pd_client::PdClient; +use raft::StateRole; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, router::CdcHandle, @@ -30,7 +38,7 @@ use tikv_util::{ use tokio::{ io::Result as TokioResult, runtime::{Handle, Runtime}, - sync::oneshot, + sync::{oneshot, Semaphore}, }; use tokio_stream::StreamExt; use txn_types::TimeStamp; @@ -60,7 +68,7 @@ const SLOW_EVENT_THRESHOLD: f64 = 120.0; /// task has fatal error. const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; -pub struct Endpoint { +pub struct Endpoint { // Note: those fields are more like a shared context between components. // For now, we copied them everywhere, maybe we'd better extract them into a // context type. @@ -69,7 +77,6 @@ pub struct Endpoint { pub(crate) store_id: u64, pub(crate) regions: R, pub(crate) engine: PhantomData, - pub(crate) router: RT, pub(crate) pd_client: Arc, pub(crate) subs: SubscriptionTracer, pub(crate) concurrency_manager: ConcurrencyManager, @@ -78,8 +85,6 @@ pub struct Endpoint { pub range_router: Router, observer: BackupStreamObserver, pool: Runtime, - initial_scan_memory_quota: PendingMemoryQuota, - initial_scan_throughput_quota: Limiter, region_operator: RegionSubscriptionManager, failover_time: Option, // We holds the config before, even it is useless for now, @@ -92,17 +97,17 @@ pub struct Endpoint { /// This is used for simulating an asynchronous background worker. /// Each time we spawn a task, once time goes by, we abort that task. pub abort_last_storage_save: Option, + pub initial_scan_semaphore: Arc, } -impl Endpoint +impl Endpoint where R: RegionInfoProvider + 'static + Clone, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, S: MetaStore + 'static, { - pub fn new( + pub fn new + 'static>( store_id: u64, store: S, config: BackupStreamConfig, @@ -145,17 +150,21 @@ where info!("the endpoint of stream backup started"; "path" => %config.temp_path); let subs = SubscriptionTracer::default(); + let initial_scan_semaphore = Arc::new(Semaphore::new(config.initial_scan_concurrency)); let (region_operator, op_loop) = RegionSubscriptionManager::start( InitialDataLoader::new( - router.clone(), - accessor.clone(), range_router.clone(), subs.clone(), scheduler.clone(), - initial_scan_memory_quota.clone(), - pool.handle().clone(), - initial_scan_throughput_quota.clone(), + initial_scan_memory_quota, + initial_scan_throughput_quota, + // NOTE: in fact we can get rid of the `Arc`. Just need to warp the router when the + // scanner pool is created. But at that time the handle has been sealed in the + // `InitialScan` trait -- we cannot do that. + Arc::new(Mutex::new(router)), + Arc::clone(&initial_scan_semaphore), ), + accessor.clone(), observer.clone(), meta_client.clone(), pd_client.clone(), @@ -166,6 +175,7 @@ where let mut checkpoint_mgr = CheckpointManager::default(); pool.spawn(checkpoint_mgr.spawn_subscription_mgr()); let ep = Endpoint { + initial_scan_semaphore, meta_client, range_router, scheduler, @@ -174,12 +184,9 @@ where store_id, regions: accessor, engine: PhantomData, - router, pd_client, subs, concurrency_manager, - initial_scan_memory_quota, - initial_scan_throughput_quota, region_operator, failover_time: None, config, @@ -191,12 +198,11 @@ where } } -impl Endpoint +impl Endpoint where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, { fn get_meta_client(&self) -> MetadataClient { @@ -494,20 +500,6 @@ where }); } - /// Make an initial data loader using the resource of the endpoint. - pub fn make_initial_loader(&self) -> InitialDataLoader { - InitialDataLoader::new( - self.router.clone(), - self.regions.clone(), - self.range_router.clone(), - self.subs.clone(), - self.scheduler.clone(), - self.initial_scan_memory_quota.clone(), - self.pool.handle().clone(), - self.initial_scan_throughput_quota.clone(), - ) - } - pub fn handle_watch_task(&self, op: TaskOp) { match op { TaskOp::AddTask(task) => { @@ -525,13 +517,12 @@ where } } - async fn observe_and_scan_region( + async fn observe_regions_in_range( &self, - init: InitialDataLoader, task: &StreamTask, start_key: Vec, end_key: Vec, - ) -> Result<()> { + ) { let start = Instant::now_coarse(); let success = self .observer @@ -549,7 +540,9 @@ where // directly and this would be fast. If this gets slow, maybe make it async // again. (Will that bring race conditions? say `Start` handled after // `ResfreshResolver` of some region.) - let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); + let range_init_result = self + .initialize_range(start_key.clone(), end_key.clone()) + .await; match range_init_result { Ok(()) => { info!("backup stream success to initialize"; @@ -561,6 +554,45 @@ where e.report("backup stream initialize failed"); } } + } + + /// initialize a range: it simply scan the regions with leader role and send + /// them to [`initialize_region`]. + pub async fn initialize_range(&self, start_key: Vec, end_key: Vec) -> Result<()> { + // Generally we will be very very fast to consume. + // Directly clone the initial data loader to the background thread looks a + // little heavier than creating a new channel. TODO: Perhaps we need a + // handle to the `InitialDataLoader`. Making it a `Runnable` worker might be a + // good idea. + let (tx, mut rx) = tokio::sync::mpsc::channel(1); + self.regions + .seek_region( + &start_key, + Box::new(move |i| { + // Ignore the error, this can only happen while the server is shutting down, the + // future has been canceled. + let _ = i + .filter(|r| r.role == StateRole::Leader) + .take_while(|r| r.region.start_key < end_key) + .try_for_each(|r| { + tx.blocking_send(ObserveOp::Start { + region: r.region.clone(), + }) + }); + }), + ) + .map_err(|err| { + Error::Other(box_err!( + "failed to seek region for start key {}: {}", + utils::redact(&start_key), + err + )) + })?; + // Don't reschedule this command: or once the endpoint's mailbox gets + // full, the system might deadlock. + while let Some(cmd) = rx.recv().await { + self.region_operator.request(cmd).await; + } Ok(()) } @@ -578,7 +610,6 @@ where /// Load the task into memory: this would make the endpint start to observe. fn load_task(&self, task: StreamTask) { let cli = self.meta_client.clone(); - let init = self.make_initial_loader(); let range_router = self.range_router.clone(); info!( @@ -621,10 +652,8 @@ where .await?; for (start_key, end_key) in ranges { - let init = init.clone(); - - self.observe_and_scan_region(init, &task, start_key, end_key) - .await? + self.observe_regions_in_range(&task, start_key, end_key) + .await } info!( "finish register backup stream ranges"; @@ -859,11 +888,16 @@ where } fn on_update_change_config(&mut self, cfg: BackupStreamConfig) { + let concurrency_diff = + cfg.initial_scan_concurrency as isize - self.config.initial_scan_concurrency as isize; info!( "update log backup config"; "config" => ?cfg, + "concurrency_diff" => concurrency_diff, ); self.range_router.udpate_config(&cfg); + self.update_semaphore_capacity(&self.initial_scan_semaphore, concurrency_diff); + self.config = cfg; } @@ -873,6 +907,24 @@ where self.pool.block_on(self.region_operator.request(op)); } + fn update_semaphore_capacity(&self, sema: &Arc, diff: isize) { + use std::cmp::Ordering::*; + match diff.cmp(&0) { + Less => { + self.pool.spawn( + Arc::clone(sema) + .acquire_many_owned(-diff as _) + // It is OK to trivially ignore the Error case (semaphore has been closed, we are shutting down the server.) + .map_ok(|p| p.forget()), + ); + } + Equal => {} + Greater => { + sema.add_permits(diff as _); + } + } + } + pub fn run_task(&mut self, task: Task) { debug!("run backup stream task"; "task" => ?task, "store_id" => %self.store_id); let now = Instant::now_coarse(); @@ -1279,12 +1331,11 @@ impl Task { } } -impl Runnable for Endpoint +impl Runnable for Endpoint where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, { type Task = Task; @@ -1297,10 +1348,7 @@ where #[cfg(test)] mod test { use engine_rocks::RocksEngine; - use raftstore::{ - coprocessor::region_info_accessor::MockRegionInfoProvider, router::CdcRaftRouter, - }; - use test_raftstore::MockRaftStoreRouter; + use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; use tikv_util::worker::dummy_scheduler; use crate::{ @@ -1315,13 +1363,9 @@ mod test { cli.insert_task_with_range(&task, &[]).await.unwrap(); fail::cfg("failed_to_get_tasks", "1*return").unwrap(); - Endpoint::< - _, - MockRegionInfoProvider, - RocksEngine, - CdcRaftRouter, - MockPdClient, - >::start_and_watch_tasks(cli, sched) + Endpoint::<_, MockRegionInfoProvider, RocksEngine, MockPdClient>::start_and_watch_tasks( + cli, sched, + ) .await .unwrap(); fail::remove("failed_to_get_tasks"); diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 1b663c0e9823..bfb88d5cd5f2 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -3,10 +3,9 @@ use std::{marker::PhantomData, sync::Arc, time::Duration}; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; -use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::CmdType}; use raftstore::{ - coprocessor::{ObserveHandle, RegionInfoProvider}, + coprocessor::ObserveHandle, router::CdcHandle, store::{fsm::ChangeObserver, Callback}, }; @@ -21,22 +20,16 @@ use tikv_util::{ time::{Instant, Limiter}, worker::Scheduler, }; -use tokio::{ - runtime::Handle, - sync::{OwnedSemaphorePermit, Semaphore}, -}; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use txn_types::{Key, Lock, TimeStamp}; use crate::{ annotate, debug, - endpoint::ObserveOp, errors::{ContextualResultExt, Error, Result}, metrics, router::{ApplyEvent, ApplyEvents, Router}, subscription_track::{Ref, RefMut, SubscriptionTracer, TwoPhaseResolver}, - try_send, - utils::{self, RegionPager}, - Task, + utils, Task, }; const MAX_GET_SNAPSHOT_RETRY: usize = 5; @@ -60,10 +53,12 @@ impl PendingMemoryQuota { Self(Arc::new(Semaphore::new(quota))) } - pub fn pending(&self, size: usize) -> PendingMemory { + pub async fn pending(&self, size: usize) -> PendingMemory { PendingMemory( - Handle::current() - .block_on(self.0.clone().acquire_many_owned(size as _)) + self.0 + .clone() + .acquire_many_owned(size as _) + .await .expect("BUG: the semaphore is closed unexpectedly."), ) } @@ -175,121 +170,64 @@ impl EventLoader { } /// The context for loading incremental data between range. -/// Like [`cdc::Initializer`], but supports initialize over range. +/// Like [`cdc::Initializer`]. /// Note: maybe we can merge those two structures? -/// Note': maybe extract more fields to trait so it would be easier to test. #[derive(Clone)] -pub struct InitialDataLoader { +pub struct InitialDataLoader { // Note: maybe we can make it an abstract thing like `EventSink` with // method `async (KvEvent) -> Result<()>`? pub(crate) sink: Router, pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, - // Note: this is only for `init_range`, maybe make it an argument? - pub(crate) regions: R, - // Note: Maybe move those fields about initial scanning into some trait? - pub(crate) router: RT, + pub(crate) quota: PendingMemoryQuota, pub(crate) limit: Limiter, + // If there are too many concurrent initial scanning, the limit of disk speed or pending memory + // quota will probably be triggered. Then the whole scanning will be pretty slow. And when + // we are holding a iterator for a long time, the memtable may not be able to be flushed. + // Using this to restrict the possibility of that. + concurrency_limit: Arc, + + cdc_handle: H, - pub(crate) handle: Handle, _engine: PhantomData, } -impl InitialDataLoader +impl InitialDataLoader where E: KvEngine, - R: RegionInfoProvider + Clone + 'static, - RT: CdcHandle, + H: CdcHandle + Sync, { pub fn new( - router: RT, - regions: R, sink: Router, tracing: SubscriptionTracer, sched: Scheduler, quota: PendingMemoryQuota, - handle: Handle, limiter: Limiter, + cdc_handle: H, + concurrency_limit: Arc, ) -> Self { Self { - router, - regions, sink, tracing, scheduler: sched, _engine: PhantomData, quota, - handle, + cdc_handle, + concurrency_limit, limit: limiter, } } - pub fn observe_over_with_retry( + pub async fn capture_change( &self, region: &Region, - mut cmd: impl FnMut() -> ChangeObserver, + cmd: ChangeObserver, ) -> Result { - let mut last_err = None; - for _ in 0..MAX_GET_SNAPSHOT_RETRY { - let c = cmd(); - let r = self.observe_over(region, c); - match r { - Ok(s) => { - return Ok(s); - } - Err(e) => { - let can_retry = match e.without_context() { - Error::RaftRequest(pbe) => { - !(pbe.has_epoch_not_match() - || pbe.has_not_leader() - || pbe.get_message().contains("stale observe id") - || pbe.has_region_not_found()) - } - Error::RaftStore(raftstore::Error::RegionNotFound(_)) - | Error::RaftStore(raftstore::Error::NotLeader(..)) => false, - _ => true, - }; - e.report(format_args!( - "during getting initial snapshot for region {:?}; can retry = {}", - region, can_retry - )); - last_err = match last_err { - None => Some(e), - Some(err) => Some(Error::Contextual { - context: format!("and error {}", err), - inner_error: Box::new(e), - }), - }; - - if !can_retry { - break; - } - std::thread::sleep(Duration::from_secs(1)); - continue; - } - } - } - Err(last_err.expect("BUG: max retry time exceed but no error")) - } - - /// Start observe over some region. - /// This will register the region to the raftstore as observing, - /// and return the current snapshot of that region. - fn observe_over(&self, region: &Region, cmd: ChangeObserver) -> Result { - // There are 2 ways for getting the initial snapshot of a region: - // - the BR method: use the interface in the RaftKv interface, read the - // key-values directly. - // - the CDC method: use the raftstore message `SignificantMsg::CaptureChange` - // to register the region to CDC observer and get a snapshot at the same time. - // Registering the observer to the raftstore is necessary because we should only - // listen events from leader. In CDC, the change observer is - // per-delegate(i.e. per-region), we can create the command per-region here too. - let (callback, fut) = tikv_util::future::paired_future_callback::>(); - self.router + self.cdc_handle .capture_change( region.get_id(), region.get_region_epoch().clone(), @@ -315,7 +253,8 @@ where region.get_id() ))?; - let snap = block_on(fut) + let snap = fut + .await .map_err(|err| { annotate!( err, @@ -332,6 +271,54 @@ where Ok(snap) } + pub async fn observe_over_with_retry( + &self, + region: &Region, + mut cmd: impl FnMut() -> ChangeObserver, + ) -> Result { + let mut last_err = None; + for _ in 0..MAX_GET_SNAPSHOT_RETRY { + let c = cmd(); + let r = self.capture_change(region, c).await; + match r { + Ok(s) => { + return Ok(s); + } + Err(e) => { + let can_retry = match e.without_context() { + Error::RaftRequest(pbe) => { + !(pbe.has_epoch_not_match() + || pbe.has_not_leader() + || pbe.get_message().contains("stale observe id") + || pbe.has_region_not_found()) + } + Error::RaftStore(raftstore::Error::RegionNotFound(_)) + | Error::RaftStore(raftstore::Error::NotLeader(..)) => false, + _ => true, + }; + e.report(format_args!( + "during getting initial snapshot for region {:?}; can retry = {}", + region, can_retry + )); + last_err = match last_err { + None => Some(e), + Some(err) => Some(Error::Contextual { + context: format!("and error {}", err), + inner_error: Box::new(e), + }), + }; + + if !can_retry { + break; + } + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + } + } + Err(last_err.expect("BUG: max retry time exceed but no error")) + } + fn with_resolver( &self, region: &Region, @@ -381,7 +368,7 @@ where f(v.value_mut().resolver()) } - fn scan_and_async_send( + async fn scan_and_async_send( &self, region: &Region, handle: &ObserveHandle, @@ -419,8 +406,8 @@ where let sink = self.sink.clone(); let event_size = events.size(); let sched = self.scheduler.clone(); - let permit = self.quota.pending(event_size); - self.limit.blocking_consume(disk_read as _); + let permit = self.quota.pending(event_size).await; + self.limit.consume(disk_read as _).await; debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); metrics::INCREMENTAL_SCAN_DISK_READ.inc_by(disk_read as f64); @@ -434,7 +421,7 @@ where } } - pub fn do_initial_scan( + pub async fn do_initial_scan( &self, region: &Region, // We are using this handle for checking whether the initial scan is stale. @@ -442,18 +429,25 @@ where start_ts: TimeStamp, snap: impl Snapshot, ) -> Result { - let _guard = self.handle.enter(); let tr = self.tracing.clone(); let region_id = region.get_id(); let mut join_handles = Vec::with_capacity(8); + let permit = self + .concurrency_limit + .acquire() + .await + .expect("BUG: semaphore closed"); // It is ok to sink more data than needed. So scan to +inf TS for convenance. let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; - let stats = self.scan_and_async_send(region, &handle, event_loader, &mut join_handles)?; + let stats = self + .scan_and_async_send(region, &handle, event_loader, &mut join_handles) + .await?; + drop(permit); - Handle::current() - .block_on(futures::future::try_join_all(join_handles)) + futures::future::try_join_all(join_handles) + .await .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; Self::with_resolver_by(&tr, region, &handle, |r| { @@ -467,31 +461,6 @@ where Ok(stats) } - - /// initialize a range: it simply scan the regions with leader role and send - /// them to [`initialize_region`]. - pub fn initialize_range(&self, start_key: Vec, end_key: Vec) -> Result<()> { - let mut pager = RegionPager::scan_from(self.regions.clone(), start_key, end_key); - loop { - let regions = pager.next_page(8)?; - debug!("scanning for entries in region."; "regions" => ?regions); - if regions.is_empty() { - break; - } - for r in regions { - // Note: Even we did the initial scanning, and blocking resolved ts from - // advancing, if the next_backup_ts was updated in some extreme condition, there - // is still little chance to lost data: For example, if a region cannot elect - // the leader for long time. (say, net work partition) At that time, we have - // nowhere to record the lock status of this region. - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::Start { region: r.region }) - ); - } - } - Ok(()) - } } #[cfg(test)] diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index e418d59029d3..7aeecb775ccf 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -1,15 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, -}; +use std::{sync::Arc, time::Duration}; -use crossbeam::channel::{Receiver as SyncReceiver, Sender as SyncSender}; -use crossbeam_channel::SendError; use engine_traits::KvEngine; use error_code::ErrorCodeExt; use futures::FutureExt; @@ -22,10 +14,11 @@ use raftstore::{ store::fsm::ChangeObserver, }; use tikv::storage::Statistics; -use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; -use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tikv_util::{ + box_err, debug, info, sys::thread::ThreadBuildWrapper, time::Instant, warn, worker::Scheduler, +}; +use tokio::sync::mpsc::{channel, error::SendError, Receiver, Sender}; use txn_types::TimeStamp; -use yatp::task::callback::Handle as YatpHandle; use crate::{ annotate, @@ -43,7 +36,7 @@ use crate::{ Task, }; -type ScanPool = yatp::ThreadPool; +type ScanPool = tokio::runtime::Runtime; const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; @@ -128,8 +121,9 @@ fn should_retry(err: &Error) -> bool { } /// the abstraction over a "DB" which provides the initial scanning. -trait InitialScan: Clone { - fn do_initial_scan( +#[async_trait::async_trait] +trait InitialScan: Clone + Sync + Send + 'static { + async fn do_initial_scan( &self, region: &Region, start_ts: TimeStamp, @@ -139,13 +133,13 @@ trait InitialScan: Clone { fn handle_fatal_error(&self, region: &Region, err: Error); } -impl InitialScan for InitialDataLoader +#[async_trait::async_trait] +impl InitialScan for InitialDataLoader where E: KvEngine, - R: RegionInfoProvider + Clone + 'static, - RT: CdcHandle, + RT: CdcHandle + Sync + 'static, { - fn do_initial_scan( + async fn do_initial_scan( &self, region: &Region, start_ts: TimeStamp, @@ -155,12 +149,14 @@ where let h = handle.clone(); // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep // retrying here? - let snap = self.observe_over_with_retry(region, move || { - ChangeObserver::from_pitr(region_id, handle.clone()) - })?; + let snap = self + .observe_over_with_retry(region, move || { + ChangeObserver::from_pitr(region_id, handle.clone()) + }) + .await?; #[cfg(feature = "failpoints")] fail::fail_point!("scan_after_get_snapshot"); - let stat = self.do_initial_scan(region, h, start_ts, snap)?; + let stat = self.do_initial_scan(region, h, start_ts, snap).await?; Ok(stat) } @@ -180,7 +176,7 @@ where impl ScanCmd { /// execute the initial scanning via the specificated [`InitialDataLoader`]. - fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { + async fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { let Self { region, handle, @@ -188,7 +184,9 @@ impl ScanCmd { .. } = self; let begin = Instant::now_coarse(); - let stat = initial_scan.do_initial_scan(region, *last_checkpoint, handle.clone())?; + let stat = initial_scan + .do_initial_scan(region, *last_checkpoint, handle.clone()) + .await?; info!("initial scanning finished!"; "takes" => ?begin.saturating_elapsed(), "from_ts" => %last_checkpoint, utils::slog_region(region)); utils::record_cf_stat("lock", &stat.lock); utils::record_cf_stat("write", &stat.write); @@ -197,17 +195,12 @@ impl ScanCmd { } /// execute the command, when meeting error, retrying. - fn exec_by_with_retry(self, init: impl InitialScan, cancel: &AtomicBool) { + async fn exec_by_with_retry(self, init: impl InitialScan) { let mut retry_time = INITIAL_SCAN_FAILURE_MAX_RETRY_TIME; loop { - if cancel.load(Ordering::SeqCst) { - return; - } - match self.exec_by(init.clone()) { + match self.exec_by(init.clone()).await { Err(err) if should_retry(&err) && retry_time > 0 => { - // NOTE: blocking this thread may stick the process. - // Maybe spawn a task to tokio and reschedule the task then? - std::thread::sleep(Duration::from_millis(500)); + tokio::time::sleep(Duration::from_millis(500)).await; warn!("meet retryable error"; "err" => %err, "retry_time" => retry_time); retry_time -= 1; continue; @@ -223,82 +216,62 @@ impl ScanCmd { } } -fn scan_executor_loop( - init: impl InitialScan, - cmds: SyncReceiver, - canceled: Arc, -) { - while let Ok(cmd) = cmds.recv() { - fail::fail_point!("execute_scan_command"); +async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) { + while let Some(cmd) = cmds.recv().await { debug!("handling initial scan request"; "region_id" => %cmd.region.get_id()); metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["queuing"]) .dec(); - if canceled.load(Ordering::Acquire) { - return; + #[cfg(feature = "failpoints")] + { + let sleep = (|| { + fail::fail_point!("execute_scan_command_sleep_100", |_| { 100 }); + 0 + })(); + tokio::time::sleep(std::time::Duration::from_secs(sleep)).await; } - metrics::PENDING_INITIAL_SCAN_LEN - .with_label_values(&["executing"]) - .inc(); - cmd.exec_by_with_retry(init.clone(), &canceled); - metrics::PENDING_INITIAL_SCAN_LEN - .with_label_values(&["executing"]) - .dec(); + let init = init.clone(); + tokio::task::spawn(async move { + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .inc(); + cmd.exec_by_with_retry(init).await; + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .dec(); + }); } } /// spawn the executors in the scan pool. -/// we make workers thread instead of spawn scan task directly into the pool -/// because the [`InitialDataLoader`] isn't `Sync` hence we must use it very -/// carefully or rustc (along with tokio) would complain that we made a `!Send` -/// future. so we have moved the data loader to the synchronous context so its -/// reference won't be shared between threads any more. -fn spawn_executors(init: impl InitialScan + Send + 'static, number: usize) -> ScanPoolHandle { - let (tx, rx) = crossbeam::channel::bounded(MESSAGE_BUFFER_SIZE); +fn spawn_executors( + init: impl InitialScan + Send + Sync + 'static, + number: usize, +) -> ScanPoolHandle { + let (tx, rx) = tokio::sync::mpsc::channel(MESSAGE_BUFFER_SIZE); let pool = create_scan_pool(number); - let stopped = Arc::new(AtomicBool::new(false)); - for _ in 0..number { - let init = init.clone(); - let rx = rx.clone(); - let stopped = stopped.clone(); - pool.spawn(move |_: &mut YatpHandle<'_>| { - let _io_guard = file_system::WithIoType::new(file_system::IoType::Replication); - scan_executor_loop(init, rx, stopped); - }) - } - ScanPoolHandle { - tx, - _pool: pool, - stopped, - } + pool.spawn(async move { + scan_executor_loop(init, rx).await; + }); + ScanPoolHandle { tx, _pool: pool } } struct ScanPoolHandle { - tx: SyncSender, - stopped: Arc, + // Theoretically, we can get rid of the sender, and spawn a new task via initial loader in each + // thread. But that will make `SubscribeManager` holds a reference to the implementation of + // `InitialScan`, which will get the type information a mass. + tx: Sender, - // in fact, we won't use the pool any more. - // but we should hold the reference to the pool so it won't try to join the threads running. _pool: ScanPool, } -impl Drop for ScanPoolHandle { - fn drop(&mut self) { - self.stopped.store(true, Ordering::Release); - } -} - impl ScanPoolHandle { - fn request(&self, cmd: ScanCmd) -> std::result::Result<(), SendError> { - if self.stopped.load(Ordering::Acquire) { - warn!("scan pool is stopped, ignore the scan command"; "region" => %cmd.region.get_id()); - return Ok(()); - } + async fn request(&self, cmd: ScanCmd) -> std::result::Result<(), SendError> { metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["queuing"]) .inc(); - self.tx.send(cmd) + self.tx.send(cmd).await } } @@ -348,11 +321,20 @@ where } } -/// Create a yatp pool for doing initial scanning. +/// Create a pool for doing initial scanning. fn create_scan_pool(num_threads: usize) -> ScanPool { - yatp::Builder::new("log-backup-scan") - .max_thread_count(num_threads) - .build_callback_pool() + tokio::runtime::Builder::new_multi_thread() + .with_sys_and_custom_hooks( + move || { + file_system::set_io_type(file_system::IoType::Replication); + }, + || {}, + ) + .thread_name("log-backup-scan") + .enable_time() + .worker_threads(num_threads) + .build() + .unwrap() } impl RegionSubscriptionManager @@ -367,22 +349,24 @@ where /// /// a two-tuple, the first is the handle to the manager, the second is the /// operator loop future. - pub fn start( - initial_loader: InitialDataLoader, + pub fn start( + initial_loader: InitialDataLoader, + regions: R, observer: BackupStreamObserver, meta_cli: MetadataClient, pd_client: Arc, scan_pool_size: usize, - resolver: BackupStreamResolver, + resolver: BackupStreamResolver, ) -> (Self, future![()]) where E: KvEngine, - RT: CdcHandle + 'static, + HInit: CdcHandle + Sync + 'static, + HChkLd: CdcHandle + 'static, { let (tx, rx) = channel(MESSAGE_BUFFER_SIZE); let scan_pool_handle = spawn_executors(initial_loader.clone(), scan_pool_size); let op = Self { - regions: initial_loader.regions.clone(), + regions, meta_cli, pd_client, range_router: initial_loader.sink.clone(), @@ -522,7 +506,8 @@ where region, self.get_last_checkpoint_of(&for_task, region).await?, handle.clone(), - ); + ) + .await; Result::Ok(()) } .await; @@ -567,7 +552,8 @@ where Err(Error::Other(box_err!("Nature is boring"))) }); let tso = self.get_last_checkpoint_of(&for_task, region).await?; - self.observe_over_with_initial_data_from_checkpoint(region, tso, handle.clone()); + self.observe_over_with_initial_data_from_checkpoint(region, tso, handle.clone()) + .await; } } Ok(()) @@ -702,13 +688,13 @@ where Ok(cp.ts) } - fn spawn_scan(&self, cmd: ScanCmd) { + async fn spawn_scan(&self, cmd: ScanCmd) { // we should not spawn initial scanning tasks to the tokio blocking pool // because it is also used for converting sync File I/O to async. (for now!) // In that condition, if we blocking for some resources(for example, the // `MemoryQuota`) at the block threads, we may meet some ghosty // deadlock. - let s = self.scan_pool_handle.request(cmd); + let s = self.scan_pool_handle.request(cmd).await; if let Err(err) = s { let region_id = err.0.region.get_id(); annotate!(err, "BUG: scan_pool closed") @@ -716,7 +702,7 @@ where } } - fn observe_over_with_initial_data_from_checkpoint( + async fn observe_over_with_initial_data_from_checkpoint( &self, region: &Region, last_checkpoint: TimeStamp, @@ -730,6 +716,7 @@ where last_checkpoint, _work: self.scans.clone().work(), }) + .await } fn find_task_by_region(&self, r: &Region) -> Option { @@ -748,8 +735,9 @@ mod test { #[derive(Clone, Copy)] struct NoopInitialScan; + #[async_trait::async_trait] impl InitialScan for NoopInitialScan { - fn do_initial_scan( + async fn do_initial_scan( &self, _region: &Region, _start_ts: txn_types::TimeStamp, @@ -787,17 +775,20 @@ mod test { let pool = spawn_executors(NoopInitialScan, 1); let wg = CallbackWaitGroup::new(); - fail::cfg("execute_scan_command", "sleep(100)").unwrap(); + fail::cfg("execute_scan_command_sleep_100", "return").unwrap(); for _ in 0..100 { let wg = wg.clone(); - pool.request(ScanCmd { - region: Default::default(), - handle: Default::default(), - last_checkpoint: Default::default(), - // Note: Maybe make here a Box or some other trait? - _work: wg.work(), - }) - .unwrap() + assert!( + pool._pool + .block_on(pool.request(ScanCmd { + region: Default::default(), + handle: Default::default(), + last_checkpoint: Default::default(), + // Note: Maybe make here a Box or some other trait? + _work: wg.work(), + })) + .is_ok() + ) } should_finish_in(move || drop(pool), Duration::from_secs(5)); diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 2dae8ce745d9..5a6b2e0753b4 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -82,6 +82,7 @@ impl ActiveSubscription { self.handle.stop_observing(); } + #[cfg(test)] pub fn is_observing(&self) -> bool { self.handle.is_observing() } @@ -319,6 +320,7 @@ impl SubscriptionTracer { } /// check whether the region_id should be observed by this observer. + #[cfg(test)] pub fn is_observing(&self, region_id: u64) -> bool { let sub = self.0.get_mut(®ion_id); match sub { diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 52b6f0e93914..5e798a8428c5 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -18,14 +18,12 @@ use std::{ use async_compression::{tokio::write::ZstdEncoder, Level}; use engine_rocks::ReadPerfInstant; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; -use futures::{channel::mpsc, executor::block_on, ready, task::Poll, FutureExt, StreamExt}; +use futures::{ready, task::Poll, FutureExt}; use kvproto::{ brpb::CompressionType, metapb::Region, raft_cmdpb::{CmdType, Request}, }; -use raft::StateRole; -use raftstore::{coprocessor::RegionInfoProvider, RegionInfo}; use tikv::storage::CfStatistics; use tikv_util::{ box_err, @@ -33,7 +31,6 @@ use tikv_util::{ self_thread_inspector, IoStat, ThreadInspector, ThreadInspectorImpl as OsInspector, }, time::Instant, - warn, worker::Scheduler, Either, }; @@ -79,65 +76,6 @@ pub fn redact(key: &impl AsRef<[u8]>) -> log_wrappers::Value<'_> { log_wrappers::Value::key(key.as_ref()) } -/// RegionPager seeks regions with leader role in the range. -pub struct RegionPager

{ - regions: P, - start_key: Vec, - end_key: Vec, - reach_last_region: bool, -} - -impl RegionPager

{ - pub fn scan_from(regions: P, start_key: Vec, end_key: Vec) -> Self { - Self { - regions, - start_key, - end_key, - reach_last_region: false, - } - } - - pub fn next_page(&mut self, size: usize) -> Result> { - if self.start_key >= self.end_key || self.reach_last_region { - return Ok(vec![]); - } - - let (mut tx, rx) = mpsc::channel(size); - let end_key = self.end_key.clone(); - self.regions - .seek_region( - &self.start_key, - Box::new(move |i| { - let r = i - .filter(|r| r.role == StateRole::Leader) - .take(size) - .take_while(|r| r.region.start_key < end_key) - .try_for_each(|r| tx.try_send(r.clone())); - if let Err(_err) = r { - warn!("failed to scan region and send to initlizer") - } - }), - ) - .map_err(|err| { - Error::Other(box_err!( - "failed to seek region for start key {}: {}", - redact(&self.start_key), - err - )) - })?; - let collected_regions = block_on(rx.collect::>()); - self.start_key = collected_regions - .last() - .map(|region| region.region.end_key.to_owned()) - // no leader region found. - .unwrap_or_default(); - if self.start_key.is_empty() { - self.reach_last_region = true; - } - Ok(collected_regions) - } -} - /// StopWatch is a utility for record time cost in multi-stage tasks. /// NOTE: Maybe it should be generic over somewhat Clock type? pub struct StopWatch(Instant); diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index a209572c6d8b..79a756f684dd 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -16,6 +16,7 @@ mod all { use futures::{Stream, StreamExt}; use pd_client::PdClient; use test_raftstore::IsolationFilterFactory; + use tikv::config::BackupStreamConfig; use tikv_util::{box_err, defer, info, HandyRwLock}; use tokio::time::timeout; use txn_types::{Key, TimeStamp}; @@ -430,4 +431,25 @@ mod all { round1.iter().map(|k| k.as_slice()), )) } + + #[test] + fn update_config() { + let suite = SuiteBuilder::new_named("network_partition") + .nodes(1) + .build(); + let mut basic_config = BackupStreamConfig::default(); + basic_config.initial_scan_concurrency = 4; + suite.run(|| Task::ChangeConfig(basic_config.clone())); + suite.wait_with(|e| { + assert_eq!(e.initial_scan_semaphore.available_permits(), 4,); + true + }); + + basic_config.initial_scan_concurrency = 16; + suite.run(|| Task::ChangeConfig(basic_config.clone())); + suite.wait_with(|e| { + assert_eq!(e.initial_scan_semaphore.available_permits(), 16,); + true + }); + } } diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index e1df628d76b6..41a57f5858b1 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -31,14 +31,11 @@ use kvproto::{ }; use pd_client::PdClient; use protobuf::parse_from_bytes; -use raftstore::{ - router::{CdcRaftRouter, ServerRaftStoreRouter}, - RegionInfoAccessor, -}; +use raftstore::{router::CdcRaftRouter, RegionInfoAccessor}; use resolved_ts::LeadershipResolver; use tempdir::TempDir; use test_pd_client::TestPdClient; -use test_raftstore::{new_server_cluster, Cluster, ServerCluster, SimulateTransport}; +use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; use test_util::retry; use tikv::config::BackupStreamConfig; use tikv_util::{ @@ -57,11 +54,6 @@ pub type TestEndpoint = Endpoint< ErrorStore, RegionInfoAccessor, engine_test::kv::KvTestEngine, - CdcRaftRouter< - SimulateTransport< - ServerRaftStoreRouter, - >, - >, TestPdClient, >; diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 09f389a22307..77d3a35e3068 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -1,6 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::borrow::Cow; +use std::{ + borrow::Cow, + sync::{Arc, Mutex}, +}; // #[PerformanceCriticalPath] use crossbeam::channel::TrySendError; @@ -406,6 +409,33 @@ where ) -> RaftStoreResult<()>; } +impl> CdcHandle for Arc> { + fn capture_change( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + change_observer: ChangeObserver, + callback: Callback<::Snapshot>, + ) -> RaftStoreResult<()> { + Mutex::lock(self).unwrap().capture_change( + region_id, + region_epoch, + change_observer, + callback, + ) + } + + fn check_leadership( + &self, + region_id: u64, + callback: Callback<::Snapshot>, + ) -> RaftStoreResult<()> { + Mutex::lock(self) + .unwrap() + .check_leadership(region_id, callback) + } +} + /// A wrapper of SignificantRouter that is specialized for implementing /// CdcHandle. #[derive(Clone)] diff --git a/src/config/mod.rs b/src/config/mod.rs index 9b8ecad50f9c..8a2fa291ff19 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2833,6 +2833,7 @@ pub struct BackupStreamConfig { pub initial_scan_pending_memory_quota: ReadableSize, #[online_config(skip)] pub initial_scan_rate_limit: ReadableSize, + pub initial_scan_concurrency: usize, } impl BackupStreamConfig { @@ -2860,6 +2861,9 @@ impl BackupStreamConfig { ) .into()); } + if self.initial_scan_concurrency == 0 { + return Err("the `initial_scan_concurrency` shouldn't be zero".into()); + } Ok(()) } } @@ -2887,6 +2891,7 @@ impl Default for BackupStreamConfig { file_size_limit, initial_scan_pending_memory_quota: ReadableSize(quota_size as _), initial_scan_rate_limit: ReadableSize::mb(60), + initial_scan_concurrency: 6, temp_file_memory_quota: cache_size, } } From 6ff85fcc7a6384da445ef166b745ab998cc20b8d Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 22 Sep 2023 11:28:45 +0800 Subject: [PATCH 062/203] tests: fix unstable test_query_stats test (#15657) close tikv/tikv#15656 tests: fix unstable test_query_stats test Signed-off-by: nolouch --- tests/Cargo.toml | 2 +- tests/integrations/raftstore/test_stats.rs | 78 +++++++++++----------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 0081d5e95bc9..f3928e97eb8c 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -41,7 +41,7 @@ path = "benches/deadlock_detector/mod.rs" [features] default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] +failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints", "raft_log_engine/failpoints"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 13e718b269d7..073382ced179 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -262,19 +262,10 @@ fn test_raw_query_stats_tmpl() { req.set_raw_get(get_req); req }); - batch_commands(&ctx, &client, get_command, &start_key); - assert!(check_split_key( - cluster, - F::encode_raw_key_owned(start_key.clone(), None).into_encoded(), - None - )); - if check_query_num_read( - cluster, - store_id, - region_id, - QueryKind::Get, - (i + 1) * 1000, - ) { + if i == 0 { + batch_commands(&ctx, &client, get_command, &start_key); + } + if check_query_num_read(cluster, store_id, region_id, QueryKind::Get, 1000) { flag = true; break; } @@ -284,14 +275,16 @@ fn test_raw_query_stats_tmpl() { fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); - test_query_num::(raw_get, true); - test_query_num::(raw_batch_get, true); - test_query_num::(raw_scan, true); - test_query_num::(raw_batch_scan, true); + test_query_num::(raw_get, true, true); + test_query_num::(raw_batch_get, true, true); + test_query_num::(raw_scan, true, true); + test_query_num::(raw_batch_scan, true, true); if F::IS_TTL_ENABLED { - test_query_num::(raw_get_key_ttl, true); + test_query_num::(raw_get_key_ttl, true, true); } - test_query_num::(raw_batch_get_command, true); + // requests may failed caused by `EpochNotMatch` after split when auto split is + // enabled, disable it. + test_query_num::(raw_batch_get_command, true, false); test_raw_delete_query::(); fail::remove("mock_tick_interval"); fail::remove("mock_hotspot_threshold"); @@ -385,19 +378,10 @@ fn test_txn_query_stats_tmpl() { req.set_get(get_req); req }); - batch_commands(&ctx, &client, get_command, &start_key); - assert!(check_split_key( - cluster, - Key::from_raw(&start_key).as_encoded().to_vec(), - None - )); - if check_query_num_read( - cluster, - store_id, - region_id, - QueryKind::Get, - (i + 1) * 1000, - ) { + if i == 0 { + batch_commands(&ctx, &client, get_command, &start_key); + } + if check_query_num_read(cluster, store_id, region_id, QueryKind::Get, 1000) { flag = true; break; } @@ -407,11 +391,13 @@ fn test_txn_query_stats_tmpl() { fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); - test_query_num::(get, false); - test_query_num::(batch_get, false); - test_query_num::(scan, false); - test_query_num::(scan_lock, false); - test_query_num::(batch_get_command, false); + test_query_num::(get, false, true); + test_query_num::(batch_get, false, true); + test_query_num::(scan, false, true); + test_query_num::(scan_lock, false, true); + // requests may failed caused by `EpochNotMatch` after split when auto split is + // enabled, disable it. + test_query_num::(batch_get_command, false, false); test_txn_delete_query::(); test_pessimistic_lock(); test_rollback(); @@ -573,15 +559,20 @@ pub fn test_rollback() { )); } -fn test_query_num(query: Box, is_raw_kv: bool) { +fn test_query_num(query: Box, is_raw_kv: bool, auto_split: bool) { let (mut cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); - cluster.cfg.split.qps_threshold = Some(0); + if auto_split { + cluster.cfg.split.qps_threshold = Some(0); + } else { + cluster.cfg.split.qps_threshold = Some(1000000); + } cluster.cfg.split.split_balance_score = 2.0; cluster.cfg.split.split_contained_score = 2.0; cluster.cfg.split.detect_times = 1; cluster.cfg.split.sample_threshold = 0; cluster.cfg.storage.set_api_version(F::TAG); + cluster.cfg.server.enable_request_batch = false; }); ctx.set_api_version(F::CLIENT_TAG); @@ -763,4 +754,13 @@ fn batch_commands( } }); rx.recv_timeout(Duration::from_secs(10)).unwrap(); + sleep_ms(100); + // triage metrics flush + for _ in 0..10 { + let mut req = ScanRequest::default(); + req.set_context(ctx.to_owned()); + req.start_key = start_key.to_owned(); + req.end_key = vec![]; + client.kv_scan(&req).unwrap(); + } } From 15d2c7dcd1780d11ee118e0b9b68ca06bf2bf388 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 22 Sep 2023 13:43:44 +0800 Subject: [PATCH 063/203] raftstore-v2: fix incorrect GC peer requests to source peer after merge (#15643) close tikv/tikv#15623 After merge, target region sends GC peer requests to removed source peers, however the region_id in requests is set to target region id incorrectly. As results, source region removed peers may be left forever. This commit fixes above issue by putting source removed_records to merged_records, so that region id can be set correctly. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 3 +- .../operation/command/admin/conf_change.rs | 15 +++- .../operation/command/admin/merge/commit.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 65 ++++++++++++--- .../raftstore-v2/src/operation/ready/mod.rs | 11 ++- components/raftstore/src/store/config.rs | 7 ++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + tests/integrations/raftstore/test_life.rs | 4 +- tests/integrations/raftstore/test_merge.rs | 81 +++++++++++++++++++ 11 files changed, 168 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f05b651b1ad3..0ba7b9d34990 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3253,7 +3253,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#ecdbf1f8c130089392a9bb5f86f7577deddfbed5" +source = "git+https://github.com/pingcap/kvproto.git#090f247be15c00a6000a4d23669ac3e95ea9fcd5" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 5f036c610206..73b65bc09041 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -1,7 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - cmp, ops::{Deref, DerefMut}, path::Path, sync::{ @@ -140,7 +139,7 @@ impl StoreContext { self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = self.cfg.check_long_uncommitted_interval.0; self.tick_batch[PeerTick::GcPeer as usize].wait_duration = - 60 * cmp::min(Duration::from_secs(1), self.cfg.raft_base_tick_interval.0); + self.cfg.gc_peer_check_interval.0; } // Return None means it has passed unsafe vote period. diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index c7b8481aa7cf..77ef6c823c14 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -609,10 +609,17 @@ impl Apply { ); removed_records.retain(|p| !updates.contains(&p.get_id())); merged_records.retain_mut(|r| { - let mut sources: Vec<_> = r.take_source_peers().into(); - sources.retain(|p| !updates.contains(&p.get_id())); - r.set_source_peers(sources.into()); - !r.get_source_peers().is_empty() + // Clean up source peers if they acknowledge GcPeerRequest. + let mut source_peers: Vec<_> = r.take_source_peers().into(); + source_peers.retain(|p| !updates.contains(&p.get_id())); + r.set_source_peers(source_peers.into()); + // Clean up source removed records (peers) if they acknowledge GcPeerRequest. + let mut source_removed_records: Vec<_> = r.take_source_removed_records().into(); + source_removed_records.retain(|p| !updates.contains(&p.get_id())); + r.set_source_removed_records(source_removed_records.into()); + // Clean up merged records if all source peers and source removed records are + // empty. + !r.get_source_peers().is_empty() || !r.get_source_removed_records().is_empty() }); self.region_state_mut() .set_removed_records(removed_records.into()); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 5208dcc96a86..8e55f89a7d21 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -540,9 +540,6 @@ impl Apply { state.set_state(PeerState::Normal); assert!(!state.has_merge_state()); state.set_tablet_index(index); - let mut removed_records: Vec<_> = state.take_removed_records().into(); - removed_records.append(&mut source_state.get_removed_records().into()); - state.set_removed_records(removed_records.into()); let mut merged_records: Vec<_> = state.take_merged_records().into(); merged_records.append(&mut source_state.get_merged_records().into()); state.set_merged_records(merged_records.into()); @@ -550,6 +547,7 @@ impl Apply { merged_record.set_source_region_id(source_region.get_id()); merged_record.set_source_epoch(source_region.get_region_epoch().clone()); merged_record.set_source_peers(source_region.get_peers().into()); + merged_record.set_source_removed_records(source_state.get_removed_records().into()); merged_record.set_target_region_id(region.get_id()); merged_record.set_target_epoch(region.get_region_epoch().clone()); merged_record.set_target_peers(region.get_peers().into()); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 395774e17f13..6b778ad6c4af 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -424,7 +424,13 @@ impl Store { }; if destroyed { if msg.get_is_tombstone() { + let msg_region_epoch = msg.get_region_epoch().clone(); if let Some(msg) = build_peer_destroyed_report(&mut msg) { + info!(self.logger(), "peer reports destroyed"; + "from_peer" => ?msg.get_from_peer(), + "from_region_epoch" => ?msg_region_epoch, + "region_id" => ?msg.get_region_id(), + "to_peer_id" => ?msg.get_to_peer().get_id()); let _ = ctx.trans.send(msg); } return false; @@ -581,7 +587,11 @@ impl Peer { .iter() .find(|p| p.id == msg.get_from_peer().get_id()) { - let tombstone_msg = self.tombstone_message_for_same_region(peer.clone()); + let tombstone_msg = self.tombstone_message( + self.region_id(), + self.region().get_region_epoch().clone(), + peer.clone(), + ); self.add_message(tombstone_msg); true } else { @@ -589,13 +599,24 @@ impl Peer { } } - fn tombstone_message_for_same_region(&self, peer: metapb::Peer) -> RaftMessage { - let region_id = self.region_id(); + fn tombstone_message( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + peer: metapb::Peer, + ) -> RaftMessage { let mut tombstone_message = RaftMessage::default(); + if self.region_id() != region_id { + // After merge, target region needs to GC peers of source region. + let extra_msg = tombstone_message.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgGcPeerRequest); + let check_peer = extra_msg.mut_check_gc_peer(); + check_peer.set_from_region_id(self.region_id()); + } tombstone_message.set_region_id(region_id); tombstone_message.set_from_peer(self.peer().clone()); tombstone_message.set_to_peer(peer); - tombstone_message.set_region_epoch(self.region().get_region_epoch().clone()); + tombstone_message.set_region_epoch(region_epoch); tombstone_message.set_is_tombstone(true); tombstone_message } @@ -604,6 +625,10 @@ impl Peer { match msg.get_to_peer().get_id().cmp(&self.peer_id()) { cmp::Ordering::Less => { if let Some(msg) = build_peer_destroyed_report(msg) { + info!(self.logger, "peer reports destroyed"; + "from_peer" => ?msg.get_from_peer(), + "from_region_epoch" => ?msg.get_region_epoch(), + "to_peer_id" => ?msg.get_to_peer().get_id()); self.add_message(msg); } } @@ -675,6 +700,7 @@ impl Peer { && state.get_merged_records().iter().all(|p| { p.get_source_peers() .iter() + .chain(p.get_source_removed_records()) .all(|p| p.get_id() != gc_peer_id) }) { @@ -699,18 +725,33 @@ impl Peer { } let mut need_gc_ids = Vec::with_capacity(5); let gc_context = self.gc_peer_context(); + let mut tombstone_removed_records = + |region_id, region_epoch: &metapb::RegionEpoch, peer: &metapb::Peer| { + need_gc_ids.push(peer.get_id()); + if gc_context.confirmed_ids.contains(&peer.get_id()) { + return; + } + + let msg = self.tombstone_message(region_id, region_epoch.clone(), peer.clone()); + // For leader, it's OK to send gc message immediately. + let _ = ctx.trans.send(msg); + }; for peer in state.get_removed_records() { - need_gc_ids.push(peer.get_id()); - if gc_context.confirmed_ids.contains(&peer.get_id()) { - continue; + tombstone_removed_records(self.region_id(), self.region().get_region_epoch(), peer); + } + // For merge, we need to + // 1. ask source removed peers to destroy. + for record in state.get_merged_records() { + for peer in record.get_source_removed_records() { + tombstone_removed_records( + record.get_source_region_id(), + record.get_source_epoch(), + peer, + ); } - - let msg = self.tombstone_message_for_same_region(peer.clone()); - // For leader, it's OK to send gc message immediately. - let _ = ctx.trans.send(msg); } + // 2. ask target to check whether source should be deleted. for record in state.get_merged_records() { - // For merge, we ask target to check whether source should be deleted. for (source, target) in record .get_source_peers() .iter() diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index ba7170ac8c87..17845b5d0b8c 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -418,9 +418,10 @@ impl Peer { return; } + let msg_type = msg.get_message().get_msg_type(); // This can be a message that sent when it's still a follower. Nevertheleast, // it's meaningless to continue to handle the request as callbacks are cleared. - if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + if msg_type == MessageType::MsgReadIndex && self.is_leader() && (msg.get_message().get_from() == raft::INVALID_ID || msg.get_message().get_from() == self.peer_id()) @@ -429,14 +430,18 @@ impl Peer { return; } - if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + if msg_type == MessageType::MsgReadIndex && self.is_leader() && self.on_step_read_index(ctx, msg.mut_message()) { // Read index has respond in `on_step_read_index`, // No need to step again. } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { - error!(self.logger, "raft step error"; "err" => ?e); + error!(self.logger, "raft step error"; + "from_peer" => ?msg.get_from_peer(), + "region_epoch" => ?msg.get_region_epoch(), + "message_type" => ?msg_type, + "err" => ?e); } else { let committed_index = self.raft_group().raft.raft_log.committed; self.report_commit_log_duration(ctx, pre_committed_index, committed_index); diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index f96ed2b7a45a..95c4aed93499 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -169,6 +169,9 @@ pub struct Config { /// and try to alert monitoring systems, if there is any. pub abnormal_leader_missing_duration: ReadableDuration, pub peer_stale_state_check_interval: ReadableDuration, + /// Interval to check GC peers. + #[doc(hidden)] + pub gc_peer_check_interval: ReadableDuration, #[online_config(hidden)] pub leader_transfer_max_log_lag: u64, @@ -510,6 +513,7 @@ impl Default for Config { renew_leader_lease_advance_duration: ReadableDuration::secs(0), allow_unsafe_vote_after_start: false, report_region_buckets_tick_interval: ReadableDuration::secs(10), + gc_peer_check_interval: ReadableDuration::secs(60), max_snapshot_file_raw_size: ReadableSize::mb(100), unreachable_backoff: ReadableDuration::secs(10), // TODO: make its value reasonable @@ -1060,6 +1064,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["leader_transfer_max_log_lag"]) .set(self.leader_transfer_max_log_lag as f64); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["gc_peer_check_interval"]) + .set(self.gc_peer_check_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["snap_apply_batch_size"]) diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index c6f787df9a79..1ac6e3840f1a 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -212,6 +212,7 @@ fn test_serde_custom_tikv_config() { max_leader_missing_duration: ReadableDuration::hours(12), abnormal_leader_missing_duration: ReadableDuration::hours(6), peer_stale_state_check_interval: ReadableDuration::hours(2), + gc_peer_check_interval: ReadableDuration::days(1), leader_transfer_max_log_lag: 123, snap_apply_batch_size: ReadableSize::mb(12), snap_apply_copy_symlink: true, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ece8cabae497..fe1fa066ae8d 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -183,6 +183,7 @@ max-peer-down-duration = "12m" max-leader-missing-duration = "12h" abnormal-leader-missing-duration = "6h" peer-stale-state-check-interval = "2h" +gc-peer-check-interval = "1d" leader-transfer-max-log-lag = 123 snap-apply-batch-size = "12MB" snap-apply-copy-symlink = true diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index e940ca30a7c4..f3b5704a586f 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -11,7 +11,7 @@ use test_raftstore::{ new_learner_peer, new_peer, sleep_ms, Filter, FilterFactory, Simulator as S1, }; use test_raftstore_v2::Simulator as S2; -use tikv_util::{time::Instant, HandyRwLock}; +use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; struct ForwardFactory { node_id: u64, @@ -64,6 +64,7 @@ fn test_gc_peer_tiflash_engine() { let mut cluster_v1 = test_raftstore::new_node_cluster(1, 2); let mut cluster_v2 = test_raftstore_v2::new_node_cluster(1, 2); cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + cluster_v2.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); cluster_v1.pd_client.disable_default_operator(); cluster_v2.pd_client.disable_default_operator(); let r11 = cluster_v1.run_conf_change(); @@ -144,6 +145,7 @@ fn test_gc_peer_tiflash_engine() { fn test_gc_removed_peer() { let mut cluster = test_raftstore::new_node_cluster(1, 2); cluster.cfg.raft_store.enable_v2_compatible_learner = true; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); cluster.pd_client.disable_default_operator(); let region_id = cluster.run_conf_change(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index ceb888a2b22a..0b17ff72ae72 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1731,3 +1731,84 @@ fn test_prepare_merge_with_5_nodes_snapshot() { // Now leader should replicate more logs and figure out a safe index. pd_client.must_merge(left.get_id(), right.get_id()); } + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_peer_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + let left_peer_on_store3 = find_peer(&left, 3).unwrap().clone(); + pd_client.must_remove_peer(left.get_id(), left_peer_on_store3); + must_get_none(&cluster.get_engine(3), b"k1"); + + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + cluster.add_send_filter(IsolationFilterFactory::new(3)); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3.clone()); + + // So cluster becomes + // left region: 1(leader) 2 | + // right region: 1(leader) 2 | 3 (removed but not yet destroyed) + // | means isolation. + + // Merge right to left. + pd_client.must_merge(right.get_id(), left.get_id()); + let region_state = cluster.region_local_state(left.get_id(), 1); + assert!( + !region_state.get_merged_records()[0] + .get_source_removed_records() + .is_empty(), + "{:?}", + region_state + ); + assert!( + !region_state + .get_removed_records() + .iter() + .any(|p| p.get_id() == right_peer_on_store3.get_id()), + "{:?}", + region_state + ); + + // Cluster filters and wait for gc peer ticks. + cluster.clear_send_filters(); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Right region replica on store 3 must be removed. + cluster.must_region_not_exist(right.get_id(), 3); + + let start = Instant::now(); + loop { + sleep_ms(cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + let region_state = cluster.region_local_state(left.get_id(), 1); + if (region_state.get_merged_records().is_empty() + || region_state.get_merged_records()[0] + .get_source_removed_records() + .is_empty()) + && region_state.get_removed_records().is_empty() + { + break; + } + if start.elapsed() > Duration::from_secs(5) { + panic!( + "source removed records and removed records must be empty, {:?}", + region_state + ); + } + } +} From bbfedd409b5965c04b9edcb34f0a0907c75d6dd2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:36:13 +0800 Subject: [PATCH 064/203] upgrade lz4-sys to 1.9.4 to tackle security issue (#15652) ref tikv/tikv#15621 upgrade lz4-sys to 1.9.4 to tackle security issue Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ba7b9d34990..e9f937e32662 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3483,9 +3483,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.2" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ "cc", "libc 0.2.146", From 384aaeb381ffc8f9ac881432a00e437933777c55 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Mon, 25 Sep 2023 13:42:15 +0800 Subject: [PATCH 065/203] copr: fix cannot get the request source for resource control (#15606) close tikv/tikv#15663 copr: fix cannot get the request source for analyze with resource control Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../resource_control/src/resource_group.rs | 4 ++ src/server/service/kv.rs | 8 ++-- tests/integrations/raftstore/test_stats.rs | 40 +++++++++++++++++-- .../resource_metering/test_cpu.rs | 6 ++- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 0e40255b3544..09e90e9dd018 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -239,6 +239,10 @@ impl ResourceGroupManager { rg: &str, request_source: &str, ) -> Option> { + fail_point!("only_check_source_task_name", |name| { + assert_eq!(name.clone().unwrap(), request_source.to_string()); + None + }); if let Some(group) = self.resource_groups.get(rg) { if !group.fallback_default { return group.get_resource_limiter(request_source); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 6f1cf0eaa1f2..4a961eedf191 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1190,7 +1190,7 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { + Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1199,7 +1199,7 @@ fn handle_batch_commands_request( .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_copr(copr, Some(peer.to_string()), req) .map_ok(|resp| { resp.map(oneof!(batch_commands_response::response::Cmd::Coprocessor)) @@ -1224,7 +1224,7 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1233,7 +1233,7 @@ fn handle_batch_commands_request( .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = $future_fn($($arg,)* req) .map_ok(oneof!(batch_commands_response::response::Cmd::$cmd)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.$metric_name.inc()); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 073382ced179..7701fe167c8b 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -12,6 +12,7 @@ use futures::{executor::block_on, SinkExt, StreamExt}; use grpcio::*; use kvproto::{kvrpcpb::*, pdpb::QueryKind, tikvpb::*, tikvpb_grpc::TikvClient}; use pd_client::PdClient; +use test_coprocessor::{DagSelect, ProductTable}; use test_raftstore::*; use tikv_util::{config::*, store::QueryStats}; use txn_types::Key; @@ -388,9 +389,34 @@ fn test_txn_query_stats_tmpl() { } assert!(flag); }); + let batch_coprocessor: Box = + Box::new(|ctx, cluster, client, store_id, region_id, start_key| { + let mut flag = false; + for i in 0..3 { + let coprocessor: Box = Box::new(|ctx, _start_key| { + let mut req = BatchCommandsRequestRequest::new(); + let table = ProductTable::new(); + let mut cop_req = DagSelect::from(&table).build(); + cop_req.set_context(ctx.clone()); + req.set_coprocessor(cop_req); + req + }); + if i == 0 { + batch_commands(&ctx, &client, coprocessor, &start_key); + } + // here cannot read any data, so expect is 0. may need fix. here mainly used to + // verify the request source is as expect. + if check_query_num_read(cluster, store_id, region_id, QueryKind::Coprocessor, 0) { + flag = true; + break; + } + } + assert!(flag); + }); fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); + fail::cfg("only_check_source_task_name", "return(test_stats)").unwrap(); test_query_num::(get, false, true); test_query_num::(batch_get, false, true); test_query_num::(scan, false, true); @@ -398,12 +424,14 @@ fn test_txn_query_stats_tmpl() { // requests may failed caused by `EpochNotMatch` after split when auto split is // enabled, disable it. test_query_num::(batch_get_command, false, false); + test_query_num::(batch_coprocessor, false, false); test_txn_delete_query::(); test_pessimistic_lock(); test_rollback(); fail::remove("mock_tick_interval"); fail::remove("mock_hotspot_threshold"); fail::remove("mock_collect_tick_interval"); + fail::remove("only_check_source_task_name"); } #[allow(clippy::extra_unused_type_parameters)] @@ -488,10 +516,11 @@ fn put( } fn test_pessimistic_lock() { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); + ctx.set_request_source("test_stats".to_owned()); let key = b"key2".to_vec(); let store_id = 1; put(&cluster, &client, &ctx, store_id, key.clone()); @@ -528,9 +557,10 @@ fn test_pessimistic_lock() { } pub fn test_rollback() { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); + ctx.set_request_source("test_stats".to_owned()); let key = b"key2".to_vec(); let store_id = 1; put(&cluster, &client, &ctx, store_id, key.clone()); @@ -575,6 +605,7 @@ fn test_query_num(query: Box, is_raw_kv: bool, auto_split: b cluster.cfg.server.enable_request_batch = false; }); ctx.set_api_version(F::CLIENT_TAG); + ctx.set_request_source("test_stats".to_owned()); let mut k = b"key".to_vec(); // When a peer becomes leader, it can't read before committing to current term. @@ -602,6 +633,7 @@ fn test_raw_delete_query() { cluster.cfg.storage.set_api_version(F::TAG); }); ctx.set_api_version(F::CLIENT_TAG); + ctx.set_request_source("test_stats".to_owned()); raw_put::(&cluster, &client, &ctx, store_id, k.clone()); // Raw Delete @@ -627,10 +659,10 @@ fn test_txn_delete_query() { let store_id = 1; { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); - + ctx.set_request_source("test_stats".to_owned()); put(&cluster, &client, &ctx, store_id, k.clone()); // DeleteRange let mut delete_req = DeleteRangeRequest::default(); diff --git a/tests/integrations/resource_metering/test_cpu.rs b/tests/integrations/resource_metering/test_cpu.rs index c15bf445ed3c..12d6fa4fbe08 100644 --- a/tests/integrations/resource_metering/test_cpu.rs +++ b/tests/integrations/resource_metering/test_cpu.rs @@ -12,6 +12,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use futures::{executor::block_on, StreamExt}; use kvproto::kvrpcpb::Context; +use resource_control::ResourceGroupManager; use test_coprocessor::{DagSelect, Insert, ProductTable, Store}; use tidb_query_datatype::codec::Datum; use tikv::{ @@ -95,7 +96,10 @@ pub fn test_reschedule_coprocessor() { let mut req = DagSelect::from(&table).build(); let mut ctx = Context::default(); ctx.set_resource_group_tag(tag.as_bytes().to_vec()); + ctx.set_request_source("test".to_owned()); req.set_context(ctx); + fail::cfg("only_check_source_task_name", "return(test)").unwrap(); + defer!(fail::remove("only_check_source_task_name")); assert!( !block_on(endpoint.parse_and_handle_unary_request(req, None)) .consume() @@ -229,7 +233,7 @@ fn setup_test_suite() -> (TestSuite, Store, Endpoint) cm, test_suite.get_tag_factory(), Arc::new(QuotaLimiter::default()), - None, + Some(Arc::new(ResourceGroupManager::default())), ); (test_suite, store, endpoint) } From e01c97891e6520f48e93a507d21c1f2ae0915dbf Mon Sep 17 00:00:00 2001 From: qupeng Date: Mon, 25 Sep 2023 16:42:16 +0800 Subject: [PATCH 066/203] resolved-ts: speed up advancing when stores get partitioned (#15567) close tikv/tikv#15679 Signed-off-by: qupeng --- components/resolved_ts/src/advance.rs | 100 ++++++++++-------- .../resolved_ts/tests/integrations/mod.rs | 31 +++++- 2 files changed, 86 insertions(+), 45 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 59478f5affba..dd6e9c2002c4 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -167,10 +167,7 @@ pub struct LeadershipResolver { // store_id -> check leader request, record the request to each stores. store_req_map: HashMap, - // region_id -> region, cache the information of regions. - region_map: HashMap>, - // region_id -> peers id, record the responses. - resp_map: HashMap>, + progresses: HashMap, checking_regions: HashSet, valid_regions: HashSet, @@ -196,8 +193,7 @@ impl LeadershipResolver { region_read_progress, store_req_map: HashMap::default(), - region_map: HashMap::default(), - resp_map: HashMap::default(), + progresses: HashMap::default(), valid_regions: HashSet::default(), checking_regions: HashSet::default(), last_gc_time: Instant::now_coarse(), @@ -209,8 +205,7 @@ impl LeadershipResolver { let now = Instant::now_coarse(); if now - self.last_gc_time > self.gc_interval { self.store_req_map = HashMap::default(); - self.region_map = HashMap::default(); - self.resp_map = HashMap::default(); + self.progresses = HashMap::default(); self.valid_regions = HashSet::default(); self.checking_regions = HashSet::default(); self.last_gc_time = now; @@ -222,10 +217,7 @@ impl LeadershipResolver { v.regions.clear(); v.ts = 0; } - for v in self.region_map.values_mut() { - v.clear(); - } - for v in self.resp_map.values_mut() { + for v in self.progresses.values_mut() { v.clear(); } self.checking_regions.clear(); @@ -252,8 +244,7 @@ impl LeadershipResolver { let store_id = self.store_id; let valid_regions = &mut self.valid_regions; - let region_map = &mut self.region_map; - let resp_map = &mut self.resp_map; + let progresses = &mut self.progresses; let store_req_map = &mut self.store_req_map; let checking_regions = &mut self.checking_regions; for region_id in ®ions { @@ -275,13 +266,13 @@ impl LeadershipResolver { } let leader_info = core.get_leader_info(); + let prog = progresses + .entry(*region_id) + .or_insert_with(|| RegionProgress::new(peer_list.len())); let mut unvotes = 0; for peer in peer_list { if peer.store_id == store_id && peer.id == leader_id { - resp_map - .entry(*region_id) - .or_insert_with(|| Vec::with_capacity(peer_list.len())) - .push(store_id); + prog.resps.push(store_id); } else { // It's still necessary to check leader on learners even if they don't vote // because performing stale read on learners require it. @@ -299,15 +290,14 @@ impl LeadershipResolver { } } } + // Check `region_has_quorum` here because `store_map` can be empty, // in which case `region_has_quorum` won't be called any more. - if unvotes == 0 && region_has_quorum(peer_list, &resp_map[region_id]) { + if unvotes == 0 && region_has_quorum(peer_list, &prog.resps) { + prog.resolved = true; valid_regions.insert(*region_id); } else { - region_map - .entry(*region_id) - .or_insert_with(|| Vec::with_capacity(peer_list.len())) - .extend_from_slice(peer_list); + prog.peers.extend_from_slice(peer_list); } } }); @@ -321,7 +311,6 @@ impl LeadershipResolver { .values() .find(|req| !req.regions.is_empty()) .map_or(0, |req| req.regions[0].compute_size()); - let store_count = store_req_map.len(); let mut check_leader_rpcs = Vec::with_capacity(store_req_map.len()); for (store_id, req) in store_req_map { if req.regions.is_empty() { @@ -387,6 +376,7 @@ impl LeadershipResolver { .with_label_values(&["all"]) .observe(start.saturating_elapsed_secs()); }); + let rpc_count = check_leader_rpcs.len(); for _ in 0..rpc_count { // Use `select_all` to avoid the process getting blocked when some @@ -396,10 +386,16 @@ impl LeadershipResolver { match res { Ok((to_store, resp)) => { for region_id in resp.regions { - resp_map - .entry(region_id) - .or_insert_with(|| Vec::with_capacity(store_count)) - .push(to_store); + if let Some(prog) = progresses.get_mut(®ion_id) { + if prog.resolved { + continue; + } + prog.resps.push(to_store); + if region_has_quorum(&prog.peers, &prog.resps) { + prog.resolved = true; + valid_regions.insert(region_id); + } + } } } Err((to_store, reconnect, err)) => { @@ -409,24 +405,19 @@ impl LeadershipResolver { } } } - } - for (region_id, prs) in region_map { - if prs.is_empty() { - // The peer had the leadership before, but now it's no longer - // the case. Skip checking the region. - continue; - } - if let Some(resp) = resp_map.get(region_id) { - if resp.is_empty() { - // No response, maybe the peer lost leadership. - continue; - } - if region_has_quorum(prs, resp) { - valid_regions.insert(*region_id); - } + if valid_regions.len() >= progresses.len() { + break; } } - self.valid_regions.drain().collect() + let res: Vec = self.valid_regions.drain().collect(); + if res.len() != checking_regions.len() { + warn!( + "check leader returns valid regions different from checking regions"; + "valid_regions" => res.len(), + "checking_regions" => checking_regions.len(), + ); + } + res } } @@ -552,6 +543,27 @@ async fn get_tikv_client( Ok(cli) } +struct RegionProgress { + resolved: bool, + peers: Vec, + resps: Vec, +} + +impl RegionProgress { + fn new(len: usize) -> Self { + RegionProgress { + resolved: false, + peers: Vec::with_capacity(len), + resps: Vec::with_capacity(len), + } + } + fn clear(&mut self) { + self.resolved = false; + self.peers.clear(); + self.resps.clear(); + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 634aa66c6014..881d0b299f1f 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -9,9 +9,10 @@ use kvproto::{kvrpcpb::*, metapb::RegionEpoch}; use pd_client::PdClient; use resolved_ts::Task; use tempfile::Builder; -use test_raftstore::sleep_ms; +use test_raftstore::{sleep_ms, IsolationFilterFactory}; use test_sst_importer::*; pub use testsuite::*; +use tikv_util::store::new_peer; #[test] fn test_resolved_ts_basic() { @@ -231,3 +232,31 @@ fn test_scan_log_memory_quota_exceeded() { suite.stop(); } + +// This case checks resolved ts can still be advanced quickly even if some TiKV +// stores are partitioned. +#[test] +fn test_store_partitioned() { + let mut suite = TestSuite::new(3); + let r = suite.cluster.get_region(&[]); + suite.cluster.must_transfer_leader(r.id, new_peer(1, 1)); + suite.must_get_rts_ge(r.id, block_on(suite.cluster.pd_client.get_tso()).unwrap()); + + suite + .cluster + .add_send_filter(IsolationFilterFactory::new(3)); + let tso = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + for _ in 0..50 { + let rts = suite.region_resolved_ts(r.id).unwrap(); + if rts > tso { + if rts.physical() - tso.physical() < 3000 { + break; + } else { + panic!("resolved ts doesn't advance in time") + } + } + sleep_ms(100); + } + + suite.stop(); +} From b95f5cd0353506d728d0a50b7a898b503de072e1 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 25 Sep 2023 17:07:47 +0800 Subject: [PATCH 067/203] build: add missing failpoint feature for raft-engine (#15676) ref tikv/tikv#15462 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- tests/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c4c70e999be9..81be4d36906f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ snmalloc = ["tikv_alloc/snmalloc"] portable = ["engine_rocks/portable"] sse = ["engine_rocks/sse"] mem-profiling = ["tikv_alloc/mem-profiling"] -failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints"] +failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints", "raft_log_engine/failpoints"] cloud-aws = ["encryption_export/cloud-aws", "sst_importer/cloud-aws"] cloud-gcp = ["encryption_export/cloud-gcp", "sst_importer/cloud-gcp"] cloud-azure = ["encryption_export/cloud-azure", "sst_importer/cloud-azure"] diff --git a/tests/Cargo.toml b/tests/Cargo.toml index f3928e97eb8c..0081d5e95bc9 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -41,7 +41,7 @@ path = "benches/deadlock_detector/mod.rs" [features] default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints", "raft_log_engine/failpoints"] +failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] From 8fb721ef18a9e1ba354e5a91d780ed6647641ab9 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 26 Sep 2023 07:01:45 +0800 Subject: [PATCH 068/203] raftstore-v2: adjust lockcf default write buffer size and limit (#15678) close tikv/tikv#15630 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- src/config/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 8a2fa291ff19..63e36a543dc0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,7 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; -const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(32); +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(128); /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -1403,7 +1403,7 @@ impl DbConfig { self.writecf.max_compactions.get_or_insert(1); self.lockcf .write_buffer_size - .get_or_insert(ReadableSize::mb(4)); + .get_or_insert(ReadableSize::mb(32)); self.lockcf .write_buffer_limit .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); From 312e0fb7f9f77e6002d0a336a58e84f3c4c12216 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 26 Sep 2023 13:22:46 +0800 Subject: [PATCH 069/203] *: Revert "*: update rust-toolchain (#15584)" (#15683) close tikv/tikv#15653 Signed-off-by: glorv Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi --- Cargo.lock | 627 ++---------------- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 1 - cmd/tikv-ctl/src/main.rs | 2 +- components/backup-stream/Cargo.toml | 2 +- components/backup-stream/src/errors.rs | 4 +- .../backup-stream/src/metadata/client.rs | 5 +- components/backup-stream/src/router.rs | 5 +- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 4 +- components/backup/src/endpoint.rs | 6 +- components/batch-system/src/fsm.rs | 8 +- components/case_macros/src/lib.rs | 10 +- components/cdc/src/delegate.rs | 2 +- components/cdc/src/endpoint.rs | 6 +- .../concurrency_manager/src/lock_table.rs | 4 +- components/coprocessor_plugin_api/src/util.rs | 4 - components/encryption/src/config.rs | 9 +- components/engine_rocks/src/logger.rs | 2 + components/engine_rocks/src/properties.rs | 15 +- .../engine_tirocks/src/properties/mvcc.rs | 2 +- .../engine_tirocks/src/properties/range.rs | 10 +- components/engine_traits/src/flush.rs | 2 +- components/engine_traits/src/lib.rs | 4 +- components/engine_traits/src/tablet.rs | 2 +- .../online_config_derive/src/lib.rs | 14 +- components/raftstore-v2/src/batch/store.rs | 6 +- components/raftstore-v2/src/lib.rs | 1 - .../operation/command/admin/merge/prepare.rs | 4 +- .../src/operation/command/admin/split.rs | 4 +- .../command/admin/transfer_leader.rs | 20 +- components/raftstore-v2/src/operation/life.rs | 8 +- .../raftstore-v2/src/operation/query/local.rs | 4 +- .../src/operation/ready/apply_trace.rs | 2 +- .../src/operation/ready/snapshot.rs | 14 +- .../raftstore-v2/src/operation/txn_ext.rs | 4 +- .../src/operation/unsafe_recovery/demote.rs | 5 +- .../src/worker/cleanup/compact.rs | 16 +- .../raftstore-v2/src/worker/pd/region.rs | 15 +- .../raftstore-v2/src/worker/pd/split.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 13 +- .../tests/integrations/cluster.rs | 4 +- .../raftstore/src/coprocessor/dispatcher.rs | 5 +- components/raftstore/src/errors.rs | 2 +- components/raftstore/src/lib.rs | 4 +- .../raftstore/src/store/async_io/write.rs | 6 +- .../raftstore/src/store/entry_storage.rs | 8 +- components/raftstore/src/store/fsm/apply.rs | 18 +- components/raftstore/src/store/fsm/peer.rs | 8 +- components/raftstore/src/store/msg.rs | 24 +- components/raftstore/src/store/peer.rs | 48 +- .../raftstore/src/store/peer_storage.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 6 +- .../raftstore/src/store/simple_write.rs | 24 +- components/raftstore/src/store/snap.rs | 4 +- components/raftstore/src/store/snap/io.rs | 4 +- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 3 +- components/raftstore/src/store/worker/pd.rs | 20 +- components/raftstore/src/store/worker/read.rs | 3 +- .../raftstore/src/store/worker/region.rs | 4 +- .../raftstore/src/store/worker/split_check.rs | 8 +- .../src/store/worker/split_controller.rs | 11 +- components/resolved_ts/src/cmd.rs | 6 +- components/resolved_ts/src/endpoint.rs | 20 +- components/resolved_ts/src/scanner.rs | 3 +- .../resource_control/src/resource_group.rs | 6 +- components/resource_metering/src/lib.rs | 2 +- components/resource_metering/src/model.rs | 2 +- .../src/recorder/sub_recorder/cpu.rs | 4 +- .../resource_metering/tests/recorder_test.rs | 12 +- components/server/src/common.rs | 4 +- components/snap_recovery/src/leader_keeper.rs | 4 +- components/sst_importer/src/import_mode2.rs | 2 +- components/sst_importer/src/sst_importer.rs | 17 +- components/sst_importer/src/util.rs | 3 +- components/test_coprocessor/src/store.rs | 2 +- .../example_plugin/src/lib.rs | 2 +- components/test_pd/src/server.rs | 8 +- components/test_pd_client/src/pd.rs | 2 +- components/test_raftstore-v2/src/cluster.rs | 3 +- components/test_raftstore-v2/src/lib.rs | 2 - components/test_raftstore-v2/src/node.rs | 2 +- components/test_raftstore-v2/src/server.rs | 14 +- components/test_raftstore/src/lib.rs | 2 - components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 8 +- .../tidb_query_codegen/src/rpn_function.rs | 35 +- .../src/codec/collation/mod.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 12 +- .../src/codec/data_type/mod.rs | 2 +- .../src/codec/data_type/scalar.rs | 17 +- .../tidb_query_datatype/src/codec/datum.rs | 8 +- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 4 +- .../src/codec/mysql/json/comparison.rs | 4 +- .../src/codec/mysql/json/jcodec.rs | 8 +- .../src/codec/mysql/json/json_modify.rs | 2 +- .../src/codec/mysql/time/mod.rs | 13 +- .../src/codec/mysql/time/tz.rs | 4 - .../src/codec/row/v2/row_slice.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../src/index_scan_executor.rs | 4 +- components/tidb_query_executors/src/runner.rs | 18 +- .../src/selection_executor.rs | 4 +- .../src/util/aggr_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- components/tidb_query_expr/src/impl_cast.rs | 2 +- .../tidb_query_expr/src/impl_miscellaneous.rs | 5 +- components/tidb_query_expr/src/impl_string.rs | 6 +- components/tidb_query_expr/src/lib.rs | 2 - .../tidb_query_expr/src/types/expr_eval.rs | 11 +- components/tikv_kv/src/cursor.rs | 2 +- components/tikv_kv/src/lib.rs | 1 - components/tikv_util/src/logger/formatter.rs | 6 +- components/tikv_util/src/lru.rs | 2 +- components/tikv_util/src/memory.rs | 2 +- .../src/metrics/allocator_metrics.rs | 2 +- components/tikv_util/src/mpsc/future.rs | 2 - components/tikv_util/src/sys/cpu_time.rs | 2 +- components/tikv_util/src/timer.rs | 4 +- components/txn_types/src/timestamp.rs | 10 +- components/txn_types/src/types.rs | 18 +- rust-toolchain | 2 +- src/config/mod.rs | 20 +- src/coprocessor/metrics.rs | 2 +- src/coprocessor/mod.rs | 2 - src/import/sst_service.rs | 6 +- src/lib.rs | 3 +- src/server/debug2.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 1 - src/server/gc_worker/gc_manager.rs | 8 +- src/server/gc_worker/gc_worker.rs | 14 +- src/server/lock_manager/deadlock.rs | 9 +- src/server/raftkv/mod.rs | 5 +- src/server/raftkv2/mod.rs | 4 +- src/server/raftkv2/node.rs | 4 +- src/server/service/debug.rs | 1 + src/server/service/diagnostics/log.rs | 18 +- src/server/service/diagnostics/sys.rs | 2 +- src/server/service/kv.rs | 1 + src/storage/lock_manager/lock_wait_context.rs | 12 +- .../lock_manager/lock_waiting_queue.rs | 7 +- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 32 +- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 21 +- src/storage/mvcc/reader/scanner/forward.rs | 4 +- src/storage/raw/raw_mvcc.rs | 2 +- src/storage/txn/actions/prewrite.rs | 2 + src/storage/txn/commands/atomic_store.rs | 4 +- src/storage/txn/commands/prewrite.rs | 26 +- src/storage/txn/latch.rs | 20 +- src/storage/txn/sched_pool.rs | 2 +- tests/Cargo.toml | 1 - .../benches/coprocessor_executors/util/mod.rs | 2 +- tests/benches/hierarchy/mvcc/mod.rs | 2 +- .../misc/coprocessor/codec/chunk/chunk.rs | 176 ----- .../misc/coprocessor/codec/chunk/mod.rs | 140 ---- tests/benches/misc/coprocessor/codec/mod.rs | 1 - tests/benches/misc/raftkv/mod.rs | 2 - tests/benches/raftstore/mod.rs | 2 +- tests/failpoints/cases/mod.rs | 3 - tests/failpoints/cases/test_disk_full.rs | 8 +- tests/failpoints/cases/test_engine.rs | 1 - tests/failpoints/cases/test_hibernate.rs | 1 - tests/failpoints/cases/test_merge.rs | 13 +- tests/failpoints/cases/test_pd_client.rs | 1 - .../failpoints/cases/test_pd_client_legacy.rs | 1 - tests/failpoints/cases/test_rawkv.rs | 2 +- .../cases/test_read_execution_tracker.rs | 11 +- tests/failpoints/cases/test_split_region.rs | 3 +- tests/failpoints/cases/test_storage.rs | 4 +- tests/failpoints/cases/test_transaction.rs | 2 +- .../failpoints/cases/test_transfer_leader.rs | 4 +- tests/integrations/backup/mod.rs | 1 - tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/mod.rs | 2 - .../integrations/raftstore/test_bootstrap.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 4 +- tests/integrations/raftstore/test_stats.rs | 1 - 180 files changed, 600 insertions(+), 1486 deletions(-) delete mode 100644 tests/benches/misc/coprocessor/codec/chunk/chunk.rs delete mode 100644 tests/benches/misc/coprocessor/codec/chunk/mod.rs diff --git a/Cargo.lock b/Cargo.lock index e9f937e32662..124a87f069eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "once_cell", "version_check 0.9.4", ] @@ -59,8 +59,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if 1.0.0", - "const-random", - "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -80,21 +78,6 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc 0.2.146", -] - [[package]] name = "ansi_term" version = "0.11.0" @@ -148,217 +131,6 @@ dependencies = [ "nodrop", ] -[[package]] -name = "arrow" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" -dependencies = [ - "ahash 0.8.3", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-array" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" -dependencies = [ - "ahash 0.8.3", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "hashbrown 0.14.0", - "num 0.4.1", -] - -[[package]] -name = "arrow-buffer" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" -dependencies = [ - "bytes", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-cast" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "chrono", - "half 2.3.1", - "lexical-core", - "num 0.4.1", -] - -[[package]] -name = "arrow-csv" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-ipc" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", -] - -[[package]] -name = "arrow-json" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "indexmap 2.0.0", - "lexical-core", - "num 0.4.1", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-ord" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-row" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" -dependencies = [ - "ahash 0.8.3", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half 2.3.1", - "hashbrown 0.14.0", -] - -[[package]] -name = "arrow-schema" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" - -[[package]] -name = "arrow-select" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num 0.4.1", -] - -[[package]] -name = "arrow-string" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "num 0.4.1", - "regex", - "regex-syntax 0.7.5", -] - [[package]] name = "async-channel" version = "1.6.1" @@ -611,7 +383,7 @@ dependencies = [ "bytes", "dyn-clone", "futures 0.3.15", - "getrandom 0.2.10", + "getrandom 0.2.3", "http-types", "log", "paste", @@ -795,7 +567,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", - "indexmap 1.9.3", + "indexmap", "kvproto", "lazy_static", "log_wrappers", @@ -1011,9 +783,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] name = "bytes" -version = "1.5.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" dependencies = [ "serde", ] @@ -1112,12 +884,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" dependencies = [ "jobserver", - "libc 0.2.146", ] [[package]] @@ -1189,17 +960,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", + "num-integer", "num-traits", "serde", - "wasm-bindgen", - "windows-targets", + "time 0.1.42", ] [[package]] @@ -1247,7 +1015,7 @@ dependencies = [ "atty", "bitflags", "clap_derive", - "indexmap 1.9.3", + "indexmap", "lazy_static", "os_str_bytes", "strsim 0.10.0", @@ -1346,28 +1114,6 @@ dependencies = [ "cache-padded", ] -[[package]] -name = "const-random" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" -dependencies = [ - "getrandom 0.2.10", - "once_cell", - "proc-macro-hack", - "tiny-keccak", -] - [[package]] name = "const_format" version = "0.2.30" @@ -1409,9 +1155,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" [[package]] name = "cpu-time" @@ -1590,12 +1336,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - [[package]] name = "crypto-common" version = "0.1.6" @@ -2017,12 +1757,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "errno" version = "0.2.8" @@ -2303,16 +2037,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags", - "rustc_version 0.4.0", -] - [[package]] name = "flate2" version = "1.0.11" @@ -2644,14 +2368,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if 1.0.0", "js-sys", "libc 0.2.146", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.10.2+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2755,7 +2479,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap", "slab", "tokio", "tokio-util", @@ -2768,22 +2492,11 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" -[[package]] -name = "half" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" -dependencies = [ - "cfg-if 1.0.0", - "crunchy", - "num-traits", -] - [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "hashbrown" @@ -3003,29 +2716,6 @@ dependencies = [ "tokio-native-tls", ] -[[package]] -name = "iana-time-zone" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "ident_case" version = "1.0.1" @@ -3051,22 +2741,12 @@ checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" [[package]] name = "indexmap" -version = "1.9.3" +version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.9.1", ] [[package]] @@ -3083,7 +2763,7 @@ checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ "ahash 0.7.4", "atty", - "indexmap 1.9.3", + "indexmap", "itoa 1.0.1", "lazy_static", "log", @@ -3234,7 +2914,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" dependencies = [ - "indexmap 1.9.3", + "indexmap", ] [[package]] @@ -3274,70 +2954,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" -[[package]] -name = "lexical-core" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" -dependencies = [ - "lexical-util", - "static_assertions", -] - [[package]] name = "libc" version = "0.1.12" @@ -3370,12 +2986,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "libm" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" - [[package]] name = "libmimalloc-sys" version = "0.1.21" @@ -3866,35 +3476,10 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab3e176191bc4faad357e3122c4747aa098ac880e88b168f106386128736cf4a" dependencies = [ - "num-complex 0.3.0", - "num-integer", - "num-iter", - "num-rational 0.3.0", - "num-traits", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex 0.4.4", + "num-complex", "num-integer", "num-iter", - "num-rational 0.4.1", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", + "num-rational", "num-traits", ] @@ -3907,15 +3492,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-complex" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" -dependencies = [ - "num-traits", -] - [[package]] name = "num-derive" version = "0.3.0" @@ -3950,9 +3526,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" dependencies = [ "autocfg", "num-traits", @@ -3960,9 +3536,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" dependencies = [ "autocfg", "num-integer", @@ -3980,26 +3556,13 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ "autocfg", - "libm", ] [[package]] @@ -4029,7 +3592,7 @@ checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ "base64 0.13.0", "chrono", - "getrandom 0.2.10", + "getrandom 0.2.3", "http", "rand 0.8.5", "serde", @@ -4320,7 +3883,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", - "indexmap 1.9.3", + "indexmap", ] [[package]] @@ -5052,7 +4615,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", ] [[package]] @@ -5145,19 +4708,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "redox_syscall 0.2.11", ] [[package]] name = "regex" -version = "1.7.3" +version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.6.29", + "regex-syntax", ] [[package]] @@ -5171,15 +4734,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" [[package]] name = "remove_dir_all" @@ -5722,7 +5279,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" dependencies = [ - "half 1.8.2", + "half", "serde", ] @@ -5752,7 +5309,7 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ - "indexmap 1.9.3", + "indexmap", "itoa 0.4.4", "ryu", "serde", @@ -6684,7 +6241,6 @@ name = "tests" version = "0.0.1" dependencies = [ "api_version", - "arrow", "async-trait", "batch-system", "byteorder", @@ -6883,7 +6439,7 @@ dependencies = [ "log_wrappers", "match-template", "nom 7.1.0", - "num 0.3.0", + "num", "num-derive 0.3.0", "num-traits", "ordered-float", @@ -6945,7 +6501,7 @@ dependencies = [ "hex 0.4.2", "log_wrappers", "match-template", - "num 0.3.0", + "num", "num-traits", "openssl", "panic_hook", @@ -7362,15 +6918,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinytemplate" version = "1.2.0" @@ -7572,7 +7119,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap 1.9.3", + "indexmap", "pin-project", "pin-project-lite", "rand 0.8.5", @@ -7810,7 +7357,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "serde", ] @@ -7820,7 +7367,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", ] [[package]] @@ -7896,6 +7443,12 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -8034,15 +7587,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.32.0" @@ -8062,42 +7606,21 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm 0.42.0", + "windows_aarch64_gnullvm", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm 0.42.0", + "windows_x86_64_gnullvm", "windows_x86_64_msvc 0.42.0", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -8110,12 +7633,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -8128,12 +7645,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -8146,12 +7657,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -8164,24 +7669,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -8194,12 +7687,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "winreg" version = "0.7.0" diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index d1a917f5624b..ef3ae7f80232 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -265,7 +265,6 @@ where .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) } -#[allow(clippy::permissions_set_readonly_false)] fn add_write_permission>(path: P) -> Result<(), String> { let path = path.as_ref(); let mut pmt = std::fs::metadata(path) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index c1ab11cc507d..6baa1fe6c39d 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(once_cell)] #![feature(let_chains)] -#![feature(lazy_cell)] #[macro_use] extern crate log; diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 4f53c39b9dba..8c1edc89a482 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -51,7 +51,7 @@ futures-io = "0.3" grpcio = { workspace = true } hex = "0.4" # Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 -indexmap = "=1.9.3" +indexmap = "=1.6.2" kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index cc720d5aecc1..c3cc91da9ff5 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -158,7 +158,7 @@ where /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. -#[macro_export] +#[macro_export(crate)] macro_rules! annotate { ($inner: expr, $message: expr) => { { @@ -242,7 +242,6 @@ mod test { #[bench] // 2,685 ns/iter (+/- 194) - #[allow(clippy::unnecessary_literal_unwrap)] fn contextual_add_format_strings_directly(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( @@ -306,7 +305,6 @@ mod test { #[bench] // 773 ns/iter (+/- 8) - #[allow(clippy::unnecessary_literal_unwrap)] fn baseline(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index df8f0f025b19..1fdc1b3b1e8d 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -663,10 +663,11 @@ impl MetadataClient { let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; - match global_cp { + let cp = match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, - } + }; + cp } _ => Checkpoint::from_kv(&r[0])?, }; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index ae4b98b16876..1786d513dc88 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -731,7 +731,6 @@ impl TempFileKey { } } - #[allow(deprecated)] fn format_date_time(ts: u64, t: FormatType) -> impl Display { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); @@ -956,9 +955,7 @@ impl StreamTaskInfo { .last_flush_time .swap(Box::into_raw(Box::new(Instant::now())), Ordering::SeqCst); // manual gc last instant - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; } pub fn should_flush(&self, flush_interval: &Duration) -> bool { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 5a6b2e0753b4..c70ad9c8038b 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -307,7 +307,7 @@ impl SubscriptionTracer { } }; - let subscription = sub.value_mut(); + let mut subscription = sub.value_mut(); let old_epoch = subscription.meta.get_region_epoch(); let new_epoch = new_region.get_region_epoch(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 5e798a8428c5..974b1762cf26 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -280,7 +280,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. -#[macro_export] +#[macro_export(crate)] macro_rules! try_send { ($s:expr, $task:expr) => { match $s.schedule($task) { @@ -304,7 +304,7 @@ macro_rules! try_send { /// `backup_stream_debug`. because once we enable debug log for all crates, it /// would soon get too verbose to read. using this macro now we can enable debug /// log level for the crate only (even compile time...). -#[macro_export] +#[macro_export(crate)] macro_rules! debug { ($($t: tt)+) => { if cfg!(feature = "backup-stream-debug") { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index d6330f499667..a4efc162092c 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -2493,8 +2493,8 @@ pub mod tests { fn test_backup_file_name() { let region = metapb::Region::default(); let store_id = 1; - let test_cases = ["s3", "local", "gcs", "azure", "hdfs"]; - let test_target = [ + let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = vec![ "1/0_0_000", "1/0_0_000", "1_0_0_000", @@ -2513,7 +2513,7 @@ pub mod tests { assert_eq!(target.to_string(), prefix_arr.join(delimiter)); } - let test_target = ["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { let key = None; let filename = backup_file_name(store_id, ®ion, key, storage_name); diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 16113dde8e2a..3fa5ad15a640 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -149,9 +149,7 @@ impl FsmState { Ok(_) => return, Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; return; } Err(s) => s, @@ -181,9 +179,7 @@ impl Drop for FsmState { fn drop(&mut self) { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; } self.state_cnt.fetch_sub(1, Ordering::Relaxed); } diff --git a/components/case_macros/src/lib.rs b/components/case_macros/src/lib.rs index b779373a59d2..057b68065d2f 100644 --- a/components/case_macros/src/lib.rs +++ b/components/case_macros/src/lib.rs @@ -5,12 +5,12 @@ use proc_macro::{Group, Literal, TokenStream, TokenTree}; macro_rules! transform_idents_in_stream_to_string { - ($stream:ident, $transform:ident) => { + ($stream:ident, $transform:expr) => { $stream .into_iter() .map(|token_tree| match token_tree { TokenTree::Ident(ref ident) => { - Literal::string(&$transform(&ident.to_string())).into() + Literal::string(&$transform(ident.to_string())).into() } // find all idents in `TokenGroup` apply and reconstruct the group TokenTree::Group(ref group) => TokenTree::Group(Group::new( @@ -20,7 +20,7 @@ macro_rules! transform_idents_in_stream_to_string { .into_iter() .map(|group_token_tree| { if let TokenTree::Ident(ref ident) = group_token_tree { - Literal::string(&$transform(&ident.to_string())).into() + Literal::string(&$transform(ident.to_string())).into() } else { group_token_tree } @@ -53,7 +53,7 @@ fn to_snake(s: &str) -> String { /// e.g. `HelloWorld` -> `hello-world` #[proc_macro] pub fn kebab_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, to_kebab) + transform_idents_in_stream_to_string!(stream, |s: String| to_kebab(&s)) } /// Expands idents in the input stream as snake-case string literal @@ -61,5 +61,5 @@ pub fn kebab_case(stream: TokenStream) -> TokenStream { /// e.g. `HelloWorld` -> `hello_world` #[proc_macro] pub fn snake_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, to_snake) + transform_idents_in_stream_to_string!(stream, |s: String| to_snake(&s)) } diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 18528fd08e94..c82c4cb6f13e 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1437,7 +1437,7 @@ mod tests { #[test] fn test_observed_range() { - for case in [ + for case in vec![ (b"".as_slice(), b"".as_slice(), false), (b"a", b"", false), (b"", b"b", false), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9d5601eba84d..a5f00a08028f 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1015,10 +1015,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint( - &self, + pub fn find_first<'m, T>( + &'m self, start_key: Option<&Key>, end_key: Option<&Key>, mut pred: impl FnMut(Arc) -> Option, diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 06e8847402f1..31d75610d75b 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -19,14 +19,10 @@ pub type PluginConstructorSignature = /// Type signature of the exported function with symbol /// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. -// emit this warn because to fix it need to change the data type which is a breaking change. -#[allow(improper_ctypes_definitions)] pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; /// Type signature of the exported function with symbol /// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. -// emit this warn because to fix it need to change the data type which is a breaking change. -#[allow(improper_ctypes_definitions)] pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// Automatically collected build information about the plugin that is exposed diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 4455e4ce7cc9..23e049e0df42 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -134,12 +134,11 @@ impl KmsConfig { } } -#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if // encryption is enabled. (i.e. when encryption_config.method != Plaintext). - #[default] Plaintext, // Pass master key from a file, with key encoded as a readable hex string. The file should end @@ -157,6 +156,12 @@ pub enum MasterKeyConfig { }, } +impl Default for MasterKeyConfig { + fn default() -> Self { + MasterKeyConfig::Plaintext + } +} + mod encryption_method_serde { use std::fmt; diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index 185411dcacfb..85f4de713acc 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -3,6 +3,7 @@ use rocksdb::{DBInfoLogLevel as InfoLogLevel, Logger}; use tikv_util::{crit, debug, error, info, warn}; // TODO(yiwu): abstract the Logger interface. +#[derive(Default)] pub struct RocksdbLogger; impl Logger for RocksdbLogger { @@ -43,6 +44,7 @@ impl Logger for TabletLogger { } } +#[derive(Default)] pub struct RaftDbLogger; impl Logger for RaftDbLogger { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 700d7621dc64..87ccab9e5ab4 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -144,7 +144,10 @@ pub struct RangeProperties { impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { - let idx = self.offsets.binary_search_by_key(&key, |(k, _)| k).unwrap(); + let idx = self + .offsets + .binary_search_by_key(&key, |&(ref k, _)| k) + .unwrap(); &self.offsets[idx].1 } @@ -202,11 +205,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -224,7 +227,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |(ref k, _)| k) + .binary_search_by_key(&start_key, |&(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -236,7 +239,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; @@ -866,7 +869,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(KeyMode::Txn); b.iter(|| { - for (k, v) in &entries { + for &(ref k, ref v) in &entries { collector.add(k, v, DBEntryType::Put, 0, 0); } }); diff --git a/components/engine_tirocks/src/properties/mvcc.rs b/components/engine_tirocks/src/properties/mvcc.rs index 66c96284ea3e..1ca170f33d58 100644 --- a/components/engine_tirocks/src/properties/mvcc.rs +++ b/components/engine_tirocks/src/properties/mvcc.rs @@ -356,7 +356,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); b.iter(|| { - for (k, v) in &entries { + for &(ref k, ref v) in &entries { collector.add(k, v, EntryType::kEntryPut, 0, 0).unwrap(); } }); diff --git a/components/engine_tirocks/src/properties/range.rs b/components/engine_tirocks/src/properties/range.rs index e8a3411b02f9..59b9e68a6bbb 100644 --- a/components/engine_tirocks/src/properties/range.rs +++ b/components/engine_tirocks/src/properties/range.rs @@ -53,7 +53,7 @@ impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { let idx = self .offsets - .binary_search_by_key(&key, |(k, _)| k) + .binary_search_by_key(&key, |&(ref k, _)| k) .unwrap(); &self.offsets[idx].1 } @@ -112,11 +112,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -134,7 +134,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |(k, _)| k) + .binary_search_by_key(&start_key, |&(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -146,7 +146,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 6449399cef8b..8590236e1265 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -119,7 +119,7 @@ impl SstApplyState { for sst in ssts { let cf_index = data_cf_offset(sst.get_cf_name()); if let Some(metas) = sst_list.get_mut(cf_index) { - let _ = metas.extract_if(|entry| entry.sst.get_uuid() == sst.get_uuid()); + metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); } } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 0f89776e7fdb..e09b1b52733d 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,8 +253,8 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] -#![feature(str_split_remainder)] -#![feature(extract_if)] +#![feature(str_split_as_str)] +#![feature(drain_filter)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 64e6dcbd4b45..c88f1548513f 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -241,7 +241,7 @@ impl TabletRegistry { let mut parts = name.rsplit('_'); let suffix = parts.next()?.parse().ok()?; let id = parts.next()?.parse().ok()?; - let prefix = parts.remainder().unwrap_or(""); + let prefix = parts.as_str(); Some((prefix, id, suffix)) } diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index e48a540c6b80..bb37aad5924b 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -330,11 +330,15 @@ fn is_option_type(ty: &Type) -> bool { // TODO store (with lazy static) the vec of string // TODO maybe optimization, reverse the order of segments fn extract_option_segment(path: &Path) -> Option<&PathSegment> { - let idents_of_path = path.segments.iter().fold(String::new(), |mut acc, v| { - acc.push_str(&v.ident.to_string()); - acc.push('|'); - acc - }); + let idents_of_path = path + .segments + .iter() + .into_iter() + .fold(String::new(), |mut acc, v| { + acc.push_str(&v.ident.to_string()); + acc.push('|'); + acc + }); vec!["Option|", "std|option|Option|", "core|option|Option|"] .into_iter() .find(|s| idents_of_path == *s) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 73b65bc09041..cd5ae8f42f79 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -488,11 +488,7 @@ impl StorePollerBuilder { self.remove_dir(&path)?; continue; } - let Some((prefix, region_id, tablet_index)) = - self.tablet_registry.parse_tablet_name(&path) - else { - continue; - }; + let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 697d0525169a..5b5e132b9ceb 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -27,7 +27,6 @@ #![feature(box_into_inner)] #![feature(assert_matches)] #![feature(option_get_or_insert_default)] -#![allow(clippy::needless_pass_by_ref_mut)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 76b71a8906c2..d3d1896287c4 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -343,9 +343,7 @@ impl Peer { entry.get_data(), entry.get_index(), entry.get_term(), - ) else { - continue; - }; + ) else { continue }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 2fe2b4b57356..0f9cae7218df 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -1098,9 +1098,7 @@ mod test { } } - let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { - panic!() - }; + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; // update cache let mut cache = apply.tablet_registry().get(parent_id).unwrap(); cache.set(*tablet.downcast().unwrap()); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index f60b9828bbb9..4cdeba3bc411 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -50,21 +50,21 @@ impl Peer { /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader - /// to follower. - /// 2. execute_transfer_leader on follower If follower passes all necessary - /// checks, it will reply an ACK with type MsgTransferLeader and its - /// promised applied index. - /// 3. ready_to_transfer_leader on leader: Leader checks if it's appropriate - /// to transfer leadership. If it does, it calls raft transfer_leader API - /// to do the remaining work. + /// 1. pre_transfer_leader on leader: + /// Leader will send a MsgTransferLeader to follower. + /// 2. execute_transfer_leader on follower + /// If follower passes all necessary checks, it will reply an + /// ACK with type MsgTransferLeader and its promised applied index. + /// 3. ready_to_transfer_leader on leader: + /// Leader checks if it's appropriate to transfer leadership. If it + /// does, it calls raft transfer_leader API to do the remaining work. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. The follower applies the TransferLeader command and replies an ACK - /// with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// /// See also: tikv/rfcs#37. pub fn propose_transfer_leader( diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 6b778ad6c4af..4d1a59de0a6f 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -662,12 +662,8 @@ impl Peer { let check_peer_id = check.get_check_peer().get_id(); let records = self.storage().region_state().get_merged_records(); let Some(record) = records.iter().find(|r| { - r.get_source_peers() - .iter() - .any(|p| p.get_id() == check_peer_id) - }) else { - return; - }; + r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) + }) else { return }; let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { let source_checkpoint = super::merge_source_path( diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index fcc93636640d..ea802650f3d8 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -351,9 +351,7 @@ where match fut.await? { Some(query_res) => { if query_res.read().is_none() { - let QueryResult::Response(res) = query_res else { - unreachable!() - }; + let QueryResult::Response(res) = query_res else { unreachable!() }; // Get an error explicitly in header, // or leader reports KeyIsLocked error via read index. assert!( diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 2b6c9c666e6f..af0257e763f1 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -774,7 +774,7 @@ impl Peer { flushed = true; let flush_state = self.flush_state().clone(); - let apply_trace = self.storage_mut().apply_trace_mut(); + let mut apply_trace = self.storage_mut().apply_trace_mut(); let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 15caf5f0c847..9e0ed449cef5 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -570,9 +570,10 @@ impl Storage { pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { let mut states = self.snap_states.borrow_mut(); states.retain(|id, state| { - let SnapState::Generating { ref index, .. } = *state else { - return true; - }; + let SnapState::Generating { + ref index, + .. + } = *state else { return true; }; let snap_index = index.load(Ordering::SeqCst); if snap_index == 0 || compact_to <= snap_index + 1 { return true; @@ -599,9 +600,10 @@ impl Storage { } let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { - let SnapState::Generating { ref index, .. } = *state else { - return false; - }; + let SnapState::Generating { + ref index, + .. + } = *state else { return false }; if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( self.logger(), diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 6c3a9269a7f8..272b2526b392 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -266,9 +266,7 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else { - unreachable!() - }; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; self.on_simple_write(ctx, write.header, write.data, write.ch); true } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index e7b3c8e62b8c..37962a454527 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -100,10 +100,7 @@ impl Peer { failed_voters, target_index, demote_after_exit, - }) = self.unsafe_recovery_state() - else { - return; - }; + }) = self.unsafe_recovery_state() else { return }; if self.raft_group().raft.raft_log.applied < *target_index { return; diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index feb519a04add..7acdb943b917 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -97,12 +97,8 @@ where ) { Ok(mut region_ids) => { for region_id in region_ids.drain(..) { - let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else { - continue; - }; - let Some(tablet) = tablet_cache.latest() else { - continue; - }; + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; for cf in &cf_names { if let Err(e) = tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) @@ -147,12 +143,8 @@ fn collect_regions_to_compact( ); let mut regions_to_compact = vec![]; for id in region_ids { - let Some(mut tablet_cache) = reg.get(id) else { - continue; - }; - let Some(tablet) = tablet_cache.latest() else { - continue; - }; + let Some(mut tablet_cache) = reg.get(id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; if tablet.auto_compactions_is_disabled().expect("cf") { info!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 999eccb49629..763e12fff072 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -113,7 +113,10 @@ where let approximate_keys = task.approximate_keys.unwrap_or_default(); let region_id = task.region.get_id(); - let peer_stat = self.region_peers.entry(region_id).or_default(); + let peer_stat = self + .region_peers + .entry(region_id) + .or_insert_with(PeerStat::default); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; @@ -370,7 +373,10 @@ where pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -392,7 +398,10 @@ where pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 7bafb6c442ad..7fec5a31bb60 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -142,10 +142,8 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await - else { - continue; - }; + let Ok(Some(region)) = + pd_client.get_region_by_id(split_info.region_id).await else { continue }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::ask_batch_split_imp( diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index ef9739226e75..206e87b3a8e7 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -467,8 +467,7 @@ impl Runner { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) - else { + .map(|mut cache| cache.latest().cloned()) else { warn!( self.logger, "flush memtable failed to acquire tablet"; @@ -556,15 +555,7 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { - region_id, - tablet, - cf, - start_key, - end_key, - cb, - } = delete_range - else { + let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index a949725090d2..5b3cc5feb930 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -137,9 +137,7 @@ impl TestRouter { match res { Ok(_) => return block_on(sub.result()).is_some(), Err(TrySendError::Disconnected(m)) => { - let PeerMsg::WaitFlush(ch) = m else { - unreachable!() - }; + let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; match self .store_router() .send_control(StoreMsg::WaitFlush { region_id, ch }) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 756b7dc399e9..d082013cd2c5 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -471,7 +471,10 @@ impl CoprocessorHost { BoxSplitCheckObserver::new(KeysCheckObserver::new(ch)), ); registry.register_split_check_observer(100, BoxSplitCheckObserver::new(HalfCheckObserver)); - registry.register_split_check_observer(400, BoxSplitCheckObserver::new(TableCheckObserver)); + registry.register_split_check_observer( + 400, + BoxSplitCheckObserver::new(TableCheckObserver::default()), + ); registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 6cf83a6cf846..d1597a77121e 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -223,7 +223,7 @@ impl From for errorpb::Error { .mut_proposal_in_merging_mode() .set_region_id(region_id); } - Error::Transport(DiscardReason::Full) => { + Error::Transport(reason) if reason == DiscardReason::Full => { let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(RAFTSTORE_IS_BUSY.to_owned()); errorpb.set_server_is_busy(server_is_busy_err); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 197eaefeac78..1db5f79d2268 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -5,13 +5,11 @@ #![feature(div_duration)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(hash_extract_if)] +#![feature(hash_drain_filter)] #![feature(let_chains)] #![feature(assert_matches)] #![feature(type_alias_impl_trait)] -#![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] -#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(test)] extern crate test; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 12617bc28a24..eedd5052bbbb 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -419,11 +419,7 @@ where } self.state_size = 0; if let ExtraBatchWrite::V2(_) = self.extra_batch_write { - let ExtraBatchWrite::V2(lb) = - mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) - else { - unreachable!() - }; + let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; wb.merge(lb).unwrap(); } } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 95f099f77a77..c91c68538dd6 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1338,14 +1338,14 @@ pub mod tests { // Test the initial data structure size. let (tx, rx) = mpsc::sync_channel(8); let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 0); + assert_eq!(rx.try_recv().unwrap(), 896); cache.append( 0, 0, &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], ); - assert_eq!(rx.try_recv().unwrap(), 419); + assert_eq!(rx.try_recv().unwrap(), 3); cache.prepend(vec![new_padded_entry(100, 1, 1)]); assert_eq!(rx.try_recv().unwrap(), 1); @@ -1371,7 +1371,7 @@ pub mod tests { // Test trace a dangle entry. let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 97); + assert_eq!(rx.try_recv().unwrap(), 1); // Test trace an entry which is still in cache. let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); @@ -1398,7 +1398,7 @@ pub mod tests { assert_eq!(rx.try_recv().unwrap(), -7); drop(cache); - assert_eq!(rx.try_recv().unwrap(), -512); + assert_eq!(rx.try_recv().unwrap(), -896); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 406c8d79d18c..c170e5a35f98 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1262,9 +1262,9 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, it will also - // propose an empty entry. But that entry will not contain any associated - // callback. So no need to clear callback. + // 2. When a leader tries to read index during transferring leader, + // it will also propose an empty entry. But that entry will not contain + // any associated callback. So no need to clear callback. while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { if let Some(cb) = cmd.cb.take() { apply_ctx @@ -4787,12 +4787,12 @@ where // command may not read the writes of previous commands and break ACID. If // it's still leader, there are two possibility that mailbox is closed: // 1. The process is shutting down. - // 2. The leader is destroyed. A leader won't propose to destroy itself, so it - // should either destroyed by older leaders or newer leaders. Leader won't - // respond to read until it has applied to current term, so no command will - // be proposed until command from older leaders have applied, which will then - // stop it from accepting proposals. If the command is proposed by new - // leader, then it won't be able to propose new proposals. + // 2. The leader is destroyed. A leader won't propose to destroy itself, so + // it should either destroyed by older leaders or newer leaders. Leader + // won't respond to read until it has applied to current term, so no + // command will be proposed until command from older leaders have applied, + // which will then stop it from accepting proposals. If the command is + // proposed by new leader, then it won't be able to propose new proposals. // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 371e8cd8eb51..30ba0c3059d4 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1015,10 +1015,10 @@ where // in snapshot recovery after we stopped all conf changes from PD. // if the follower slow than leader and has the pending conf change. // that's means - // 1. if the follower didn't finished the conf change => it cannot be chosen to - // be leader during recovery. - // 2. if the follower has been chosen to be leader => it already apply the - // pending conf change already. + // 1. if the follower didn't finished the conf change + // => it cannot be chosen to be leader during recovery. + // 2. if the follower has been chosen to be leader + // => it already apply the pending conf change already. return; } debug!( diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a858b5afddde..64c5be6d7e15 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -169,25 +169,19 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { - return false; - }; + let Callback::Write { proposed_cb, .. } = self else { return false; }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { - return; - }; + let Callback::Write { proposed_cb, .. } = self else { return; }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { - return; - }; + let Callback::Write { committed_cb, .. } = self else { return; }; if let Some(cb) = committed_cb.take() { cb(); } @@ -201,16 +195,12 @@ where } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { - return None; - }; + let Callback::Write { proposed_cb, .. } = self else { return None; }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { - return None; - }; + let Callback::Write { committed_cb, .. } = self else { return None; }; committed_cb.take() } } @@ -268,9 +258,7 @@ impl ReadCallback for Callback { } fn read_tracker(&self) -> Option { - let Callback::Read { tracker, .. } = self else { - return None; - }; + let Callback::Read { tracker, .. } = self else { return None; }; Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index aafd2f9695b9..8ef857bfa129 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2314,14 +2314,14 @@ where CheckApplyingSnapStatus::Applying => { // If this peer is applying snapshot, we should not get a new ready. // There are two reasons in my opinion: - // 1. If we handle a new ready and persist the data(e.g. entries), we can not - // tell raft-rs that this ready has been persisted because the ready need - // to be persisted one by one from raft-rs's view. - // 2. When this peer is applying snapshot, the response msg should not be sent - // to leader, thus the leader will not send new entries to this peer. - // Although it's possible a new leader may send a AppendEntries msg to this - // peer, this possibility is very low. In most cases, there is no msg need - // to be handled. + // 1. If we handle a new ready and persist the data(e.g. entries), + // we can not tell raft-rs that this ready has been persisted because + // the ready need to be persisted one by one from raft-rs's view. + // 2. When this peer is applying snapshot, the response msg should not + // be sent to leader, thus the leader will not send new entries to + // this peer. Although it's possible a new leader may send a AppendEntries + // msg to this peer, this possibility is very low. In most cases, there + // is no msg need to be handled. // So we choose to not get a new ready which makes the logic more clear. debug!( "still applying snapshot, skip further handling"; @@ -4467,25 +4467,27 @@ where /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader - /// to follower. - /// 2. pre_ack_transfer_leader_msg on follower: If follower passes all - /// necessary checks, it will try to warmup the entry cache. - /// 3. ack_transfer_leader_msg on follower: When the entry cache has been - /// warmed up or the operator is timeout, the follower reply an ACK with - /// type MsgTransferLeader and its promised persistent index. + /// 1. pre_transfer_leader on leader: + /// Leader will send a MsgTransferLeader to follower. + /// 2. pre_ack_transfer_leader_msg on follower: + /// If follower passes all necessary checks, it will try to warmup + /// the entry cache. + /// 3. ack_transfer_leader_msg on follower: + /// When the entry cache has been warmed up or the operator is timeout, + /// the follower reply an ACK with type MsgTransferLeader and + /// its promised persistent index. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. ack_transfer_leader_msg on follower again: The follower applies - /// the TransferLeader command and replies an ACK with special context - /// TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. ack_transfer_leader_msg on follower again: + /// The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// - /// 4. ready_to_transfer_leader on leader: Leader checks if it's appropriate - /// to transfer leadership. If it does, it calls raft transfer_leader API - /// to do the remaining work. + /// 4. ready_to_transfer_leader on leader: + /// Leader checks if it's appropriate to transfer leadership. If it + /// does, it calls raft transfer_leader API to do the remaining work. /// /// See also: tikv/rfcs#37. fn propose_transfer_leader( @@ -5818,7 +5820,7 @@ mod tests { admin_req.clear_transfer_leader(); req.clear_admin_request(); - for (op, policy) in [ + for (op, policy) in vec![ (CmdType::Get, RequestPolicy::ReadLocal), (CmdType::Snap, RequestPolicy::ReadLocal), (CmdType::Put, RequestPolicy::ProposeNormal), @@ -5971,7 +5973,7 @@ mod tests { // (1, 4) and (1, 5) is not committed let entries = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)]; - let committed = [(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; + let committed = vec![(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; for (index, term) in entries.clone() { if term != 1 { continue; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 1556338e9c0c..a888929ca985 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -96,7 +96,7 @@ impl PartialEq for SnapState { (&SnapState::Relax, &SnapState::Relax) | (&SnapState::ApplyAborted, &SnapState::ApplyAborted) | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (SnapState::Applying(b1), SnapState::Applying(b2)) => { + (&SnapState::Applying(ref b1), &SnapState::Applying(ref b2)) => { b1.load(Ordering::Relaxed) == b2.load(Ordering::Relaxed) } _ => false, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 40168707f6ab..bc22dfbf5866 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -438,7 +438,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for (k, v) in &base_data { + for &(ref k, ref v) in &base_data { engines.kv.put(&data_key(k), v).unwrap(); } let store = new_peer_storage(engines, &r); @@ -482,11 +482,11 @@ mod tests { let mut data = vec![]; { let db = &engines.kv; - for (k, level) in &levels { + for &(ref k, level) in &levels { db.put(&data_key(k), k).unwrap(); db.flush_cfs(&[], true).unwrap(); data.push((k.to_vec(), k.to_vec())); - db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(*level)) + db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) .unwrap(); } } diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index 1d8341c1c0b2..a303a5869356 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -579,17 +579,13 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); - let SimpleWrite::Put(put) = write else { - panic!("should be put") - }; + let SimpleWrite::Put(put) = write else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); let write = decoder.next().unwrap(); - let SimpleWrite::Delete(delete) = write else { - panic!("should be delete") - }; + let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); assert_matches!(decoder.next(), None); @@ -597,18 +593,14 @@ mod tests { let (bytes, _) = req_encoder2.encode(); decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { - panic!("should be delete range") - }; + let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, CF_LOCK); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); assert!(dr.notify_only); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { - panic!("should be delete range") - }; + let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, "cf"); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); @@ -634,9 +626,7 @@ mod tests { let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::Ingest(ssts) = write else { - panic!("should be ingest") - }; + let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; assert_eq!(exp, ssts); assert_matches!(decoder.next(), None); } @@ -725,9 +715,7 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); - let SimpleWrite::Put(put) = req else { - panic!("should be put") - }; + let SimpleWrite::Put(put) = req else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index dcb98dd9cb2e..6fe21fe97502 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1323,7 +1323,7 @@ impl Write for Snapshot { } assert!(cf_file.size[self.cf_file_index] != 0); - let file_for_recving = cf_file + let mut file_for_recving = cf_file .file_for_recving .get_mut(self.cf_file_index) .unwrap(); @@ -2162,7 +2162,7 @@ impl TabletSnapManager { .stats .lock() .unwrap() - .extract_if(|_, (_, stat)| stat.get_region_id() > 0) + .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) .map(|(_, (_, stat))| stat) .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 8fcaf826c6ab..3cdee1e40f1c 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -327,7 +327,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_plain_files_enc"); - for db_opt in [None, Some(enc_opts)] { + for db_opt in vec![None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db: KvTestEngine = db_creater(dir.path(), db_opt.clone(), None).unwrap(); // Collect keys via the key_callback into a collection. @@ -408,7 +408,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_sst_files_enc"); - for db_opt in [None, Some(enc_opts)] { + for db_opt in vec![None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db = db_creater(dir.path(), db_opt.clone(), None).unwrap(); let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 9c73be2b9eba..0091fd4e7bb8 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -244,7 +244,7 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.extract_if(|key, _| { + let removed_locks = self.map.drain_filter(|key, _| { let key = &**key.as_encoded(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); key < start_key || (!end_key.is_empty() && key >= end_key) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index ed2c70822c99..3f34fe691ee0 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -124,7 +124,8 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { /// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet -// 2. Target peer is added newly due to member change or region split, but it's not created yet +// 2. Target peer is added newly due to member change or region split, but it's not +// created yet // For both cases the region start key and end key are attached in RequestVote and // Heartbeat message for the store of that peer to check whether to create a new peer // when receiving these messages, or just to wait for a pending region split to perform diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 6aa192bd28e2..606576b22e49 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1704,7 +1704,10 @@ where fn handle_read_stats(&mut self, mut read_stats: ReadStats) { for (region_id, region_info) in read_stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -1726,7 +1729,10 @@ where fn handle_write_stats(&mut self, mut write_stats: WriteStats) { for (region_id, region_info) in write_stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num @@ -2084,10 +2090,7 @@ where let f = async move { for split_info in split_infos { let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await - else { - continue; - }; + pd_client.get_region_by_id(split_info.region_id).await else { continue }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( @@ -2152,7 +2155,10 @@ where cpu_usage, ) = { let region_id = hb_task.region.get_id(); - let peer_stat = self.region_peers.entry(region_id).or_default(); + let peer_stat = self + .region_peers + .entry(region_id) + .or_insert_with(PeerStat::default); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5a6e641f5dcd..5d6ede9c1936 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -2155,12 +2155,11 @@ mod tests { let (notify_tx, notify_rx) = channel(); let (wait_spawn_tx, wait_spawn_rx) = channel(); let runtime = tokio::runtime::Runtime::new().unwrap(); - let handler = runtime.spawn(async move { + let _ = runtime.spawn(async move { wait_spawn_tx.send(()).unwrap(); notify.notified().await; notify_tx.send(()).unwrap(); }); - drop(handler); wait_spawn_rx.recv().unwrap(); thread::sleep(std::time::Duration::from_millis(500)); // Prevent lost notify. must_not_redirect(&mut reader, &rx, task); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 7a675646f5cd..068904b2a677 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -179,7 +179,7 @@ impl PendingDeleteRanges { ) -> Vec<(u64, Vec, Vec, u64)> { let ranges = self.find_overlap_ranges(start_key, end_key); - for (_, s_key, ..) in &ranges { + for &(_, ref s_key, ..) in &ranges { self.ranges.remove(s_key).unwrap(); } ranges @@ -1293,7 +1293,7 @@ pub(crate) mod tests { } }; - #[cfg(feature = "failpoints")] + #[allow(dead_code)] let must_not_finish = |ids: &[u64]| { for id in ids { let region_key = keys::region_state_key(*id); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 468c06febd46..4ff853f70a05 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -64,14 +64,14 @@ impl KeyEntry { impl PartialOrd for KeyEntry { fn partial_cmp(&self, rhs: &KeyEntry) -> Option { - Some(self.cmp(rhs)) + // BinaryHeap is max heap, so we have to reverse order to get a min heap. + Some(self.key.cmp(&rhs.key).reverse()) } } impl Ord for KeyEntry { fn cmp(&self, rhs: &KeyEntry) -> Ordering { - // BinaryHeap is max heap, so we have to reverse order to get a min heap. - self.key.cmp(&rhs.key).reverse() + self.partial_cmp(rhs).unwrap() } } @@ -287,7 +287,7 @@ impl Runner { region: &Region, bucket_ranges: &Vec, ) { - for (bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { + for (mut bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { let mut bucket_region = region.clone(); bucket_region.set_start_key(bucket_range.0.clone()); bucket_region.set_end_key(bucket_range.1.clone()); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 9cf534c62b0e..4bbcc7737638 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -178,7 +178,7 @@ impl Samples { // evaluate the samples according to the given key range, it will update the // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { - for sample in self.0.iter_mut() { + for mut sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { Ordering::Greater } else { @@ -496,7 +496,10 @@ pub struct WriteStats { impl WriteStats { pub fn add_query_num(&mut self, region_id: u64, kind: QueryKind) { - let query_stats = self.region_infos.entry(region_id).or_default(); + let query_stats = self + .region_infos + .entry(region_id) + .or_insert_with(QueryStats::default); query_stats.add_query_num(kind, 1); } @@ -985,8 +988,8 @@ mod tests { #[test] fn test_prefix_sum() { - let v = [1, 2, 3, 4, 5, 6, 7, 8, 9]; - let expect = [1, 3, 6, 10, 15, 21, 28, 36, 45]; + let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expect = vec![1, 3, 6, 10, 15, 21, 28, 36, 45]; let pre = prefix_sum(v.iter(), |x| *x); for i in 0..v.len() { assert_eq!(expect[i], pre[i]); diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 328f725edaaa..47d14304112f 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -213,13 +213,13 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) CF_WRITE => { if let Ok(ts) = key.decode_ts() { let key = key.truncate_ts().unwrap(); - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); assert!(row.write.is_none()); row.write = Some(KeyOp::Put(Some(ts), value)); } } CF_LOCK => { - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); assert!(row.lock.is_none()); row.lock = Some(KeyOp::Put(None, value)); } @@ -239,7 +239,7 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) match delete.cf.as_str() { CF_LOCK => { let key = Key::from_encoded(delete.take_key()); - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); row.lock = Some(KeyOp::Delete); } "" | CF_WRITE | CF_DEFAULT => {} diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 406d931ed7f5..9de21b27d9e1 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -65,8 +65,7 @@ impl Drop for ResolverStatus { locks, memory_quota, .. - } = self - else { + } = self else { return; }; if locks.is_empty() { @@ -97,8 +96,7 @@ impl ResolverStatus { locks, memory_quota, .. - } = self - else { + } = self else { panic!("region {:?} resolver has ready", region_id) }; // Check if adding a new lock or unlock will exceed the memory @@ -112,7 +110,10 @@ impl ResolverStatus { } fn update_tracked_index(&mut self, index: u64, region_id: u64) { - let ResolverStatus::Pending { tracked_index, .. } = self else { + let ResolverStatus::Pending { + tracked_index, + .. + } = self else { panic!("region {:?} resolver has ready", region_id) }; assert!( @@ -134,8 +135,7 @@ impl ResolverStatus { memory_quota, tracked_index, .. - } = self - else { + } = self else { panic!("region {:?} resolver has ready", region_id) }; // Must take locks, otherwise it may double free memory quota on drop. @@ -687,7 +687,7 @@ where scanner_pool, scan_concurrency_semaphore, regions: HashMap::default(), - _phantom: PhantomData, + _phantom: PhantomData::default(), }; ep.handle_advance_resolved_ts(leader_resolver); ep @@ -870,6 +870,7 @@ where // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. + #[allow(clippy::drop_ref)] fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); @@ -883,6 +884,7 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { + drop(observe_region); let backoff = match e { Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, @@ -928,7 +930,7 @@ where } fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { - let regions = self.regions.keys().copied().collect(); + let regions = self.regions.keys().into_iter().copied().collect(); self.advance_worker.advance_ts_for_regions( regions, leader_resolver, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index ad052338fa2a..6c8c90dc38f0 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -100,7 +100,7 @@ impl, E: KvEngine> ScannerPool { Self { workers, cdc_handle, - _phantom: PhantomData, + _phantom: PhantomData::default(), } } @@ -168,7 +168,6 @@ impl, E: KvEngine> ScannerPool { self.workers.spawn(fut); } - #[allow(clippy::needless_pass_by_ref_mut)] async fn get_snapshot( task: &mut ScanTask, cdc_handle: T, diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 09e90e9dd018..a4b30e3d4adf 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -240,7 +240,7 @@ impl ResourceGroupManager { request_source: &str, ) -> Option> { fail_point!("only_check_source_task_name", |name| { - assert_eq!(name.clone().unwrap(), request_source.to_string()); + assert_eq!(&name.unwrap(), request_source); None }); if let Some(group) = self.resource_groups.get(rg) { @@ -311,8 +311,8 @@ pub struct ResourceController { // 1. the priority factor is calculate based on read/write RU settings. // 2. for read request, we increase a constant virtual time delta at each `get_priority` call // because the cost can't be calculated at start, so we only increase a constant delta and - // increase the real cost after task is executed; but don't increase it at write because the - // cost is known so we just pre-consume it. + // increase the real cost after task is executed; but don't increase it at write because + // the cost is known so we just pre-consume it. is_read: bool, // Track the maximum ru quota used to calculate the factor of each resource group. // factor = max_ru_quota / group_ru_quota * 10.0 diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index 7b437ea43037..ba8e2174e198 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -2,7 +2,7 @@ // TODO(mornyx): crate doc. -#![feature(hash_extract_if)] +#![feature(hash_drain_filter)] #![feature(core_intrinsics)] use std::{ diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 03cd500eb2e9..6f7118ef9e1c 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -87,7 +87,7 @@ impl RawRecords { pdqselect::select_by(&mut buf, k, |a, b| b.cmp(a)); let kth = buf[k]; // Evict records with cpu time less or equal than `kth` - let evicted_records = self.records.extract_if(|_, r| r.cpu_time <= kth); + let evicted_records = self.records.drain_filter(|_, r| r.cpu_time <= kth); // Record evicted into others for (_, record) in evicted_records { others.merge(&record); diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index 08675bb6153f..8c4053a80ab2 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -9,7 +9,7 @@ use crate::{ localstorage::{LocalStorage, SharedTagInfos}, SubRecorder, }, - RawRecords, + RawRecord, RawRecords, }; /// An implementation of [SubRecorder] for collecting cpu statistics. @@ -37,7 +37,7 @@ impl SubRecorder for CpuRecorder { if *last_stat != cur_stat { let delta_ms = (cur_stat.total_cpu_time() - last_stat.total_cpu_time()) * 1_000.; - let record = records.entry(cur_tag).or_default(); + let record = records.entry(cur_tag).or_insert_with(RawRecord::default); record.cpu_time += delta_ms as u32; } thread_stat.stat = cur_stat; diff --git a/components/resource_metering/tests/recorder_test.rs b/components/resource_metering/tests/recorder_test.rs index 6e164b8e5e81..daa371e74771 100644 --- a/components/resource_metering/tests/recorder_test.rs +++ b/components/resource_metering/tests/recorder_test.rs @@ -55,7 +55,7 @@ mod tests { if let Some(tag) = self.current_ctx { self.records .entry(tag.as_bytes().to_vec()) - .or_default() + .or_insert_with(RawRecord::default) .cpu_time += ms; } self.ops.push(op); @@ -140,7 +140,7 @@ mod tests { if let Ok(mut r) = self.records.lock() { for (tag, record) in records.records.iter() { r.entry(tag.extra_attachment.to_vec()) - .or_default() + .or_insert_with(RawRecord::default) .merge(record); } } @@ -156,10 +156,10 @@ mod tests { let mut records = self.records.lock().unwrap(); for k in expected.keys() { - records.entry(k.clone()).or_default(); + records.entry(k.clone()).or_insert_with(RawRecord::default); } for k in records.keys() { - expected.entry(k.clone()).or_default(); + expected.entry(k.clone()).or_insert_with(RawRecord::default); } for (k, expected_value) in expected { let value = records.get(&k).unwrap(); @@ -324,10 +324,10 @@ mod tests { fn merge( maps: impl IntoIterator, RawRecord>>, ) -> HashMap, RawRecord> { - let mut map: HashMap, RawRecord> = HashMap::default(); + let mut map = HashMap::default(); for m in maps { for (k, v) in m { - map.entry(k).or_default().merge(&v); + map.entry(k).or_insert_with(RawRecord::default).merge(&v); } } map diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 43b0314cbbe2..c8cf879d9052 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -558,9 +558,7 @@ impl EnginesResourceInfo { }); for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { - continue; - }; + let Some(tablet) = cache.latest() else { continue }; for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 48344fe50128..417d5becca31 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -206,7 +206,7 @@ mod test { #[test] fn test_basic() { - let leaders = [1, 2, 3]; + let leaders = vec![1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, leaders); @@ -217,7 +217,7 @@ mod test { #[test] fn test_failure() { - let leaders = [1, 2, 3]; + let leaders = vec![1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 4db29c47a6f7..70b7d7fac5e1 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -139,7 +139,7 @@ impl ImportModeSwitcherV2 { pub fn ranges_in_import(&self) -> HashSet { let inner = self.inner.lock().unwrap(); - HashSet::from_iter(inner.import_mode_ranges.keys().cloned()) + HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 910cfa602dd7..5530862e6a39 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -384,8 +384,8 @@ impl SstImporter { // This method is blocking. It performs the following transformations before // writing to disk: // - // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. (set - // the range to `["", ""]` to import everything). + // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. + // (set the range to `["", ""]` to import everything). // 2. keys are rewritten according to the given rewrite rule. // // Both the range and rewrite keys are specified using origin keys. However, @@ -1558,7 +1558,7 @@ mod tests { let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); - let cases = [(0, 10), (5, 15), (10, 20), (0, 100)]; + let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; let mut ingested = Vec::new(); @@ -2072,10 +2072,13 @@ mod tests { false, ) .unwrap(); - let ext_storage = importer.wrap_kms( - importer.external_storage_or_cache(&backend, "").unwrap(), - false, - ); + let ext_storage = { + let inner = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); + inner + }; // test do_read_kv_file() let output = block_on_external_io(importer.do_read_kv_file( diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 654971b0d411..ff7526172d51 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -97,8 +97,7 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { - use std::os::unix::fs::PermissionsExt; - pmts.set_mode(0o644); + pmts.set_readonly(false); file_system::set_permissions(clone, pmts)?; } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 6763ea7bb1a1..96f405d8f39e 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -203,7 +203,7 @@ impl Store { } pub fn put(&mut self, ctx: Context, mut kv: Vec<(Vec, Vec)>) { - self.handles.extend(kv.iter().map(|(k, _)| k.clone())); + self.handles.extend(kv.iter().map(|&(ref k, _)| k.clone())); let pk = kv[0].0.clone(); let kv = kv .drain(..) diff --git a/components/test_coprocessor_plugin/example_plugin/src/lib.rs b/components/test_coprocessor_plugin/example_plugin/src/lib.rs index d383797c0692..afcaa4962b94 100644 --- a/components/test_coprocessor_plugin/example_plugin/src/lib.rs +++ b/components/test_coprocessor_plugin/example_plugin/src/lib.rs @@ -18,4 +18,4 @@ impl CoprocessorPlugin for ExamplePlugin { } } -declare_plugin!(ExamplePlugin); +declare_plugin!(ExamplePlugin::default()); diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 02833e030eb8..90a420fbba08 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -128,8 +128,12 @@ impl Server { } #[allow(unused_mut)] -fn hijack_unary(mock: &PdMock, ctx: RpcContext<'_>, sink: UnarySink, f: F) -where +fn hijack_unary( + mock: &mut PdMock, + ctx: RpcContext<'_>, + sink: UnarySink, + f: F, +) where R: Send + 'static, F: Fn(&dyn PdMocker) -> Option>, { diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 58df59987583..c81230f6a163 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1438,7 +1438,7 @@ impl TestPdClient { pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); - let dr = status.mut_dr_auto_sync(); + let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state); dr.available_stores = available_stores; diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 346813e7d1fc..8ede32901671 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -220,7 +220,7 @@ pub trait Simulator { None => { error!("call_query_on_node receives none response"; "request" => ?request); // Do not unwrap here, sometimes raftstore v2 may return none. - Err(box_err!("receives none response {:?}", request)) + return Err(box_err!("receives none response {:?}", request)); } } } @@ -1612,7 +1612,6 @@ impl, EK: KvEngine> Cluster { ) } - #[allow(clippy::let_underscore_future)] pub fn merge_region(&mut self, source: u64, target: u64, _cb: Callback) { // FIXME: callback is ignored. let mut req = self.new_prepare_merge(source, target); diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 45642df1e7fa..685affe45d0f 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -3,8 +3,6 @@ #![feature(type_alias_impl_trait)] #![feature(return_position_impl_trait_in_trait)] #![feature(let_chains)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::arc_with_non_send_sync)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 70b6ccb14077..d63ca0aa2f28 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -258,7 +258,7 @@ impl Simulator for NodeCluster { ) } else { let trans = self.trans.core.lock().unwrap(); - let (snap_mgr, _) = &trans.snap_paths[&node_id]; + let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; self.snap_mgrs.insert(node_id, snap_mgr.clone()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index a7d64591fe1d..7b5d501a59f4 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1006,18 +1006,7 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) -} - -pub fn must_new_cluster_with_cfg_and_kv_client_mul( - count: usize, - configure: impl FnMut(&mut Cluster, RocksEngine>), -) -> ( - Cluster, RocksEngine>, - TikvClient, - Context, -) { - let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); + let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); let channel = @@ -1026,7 +1015,6 @@ pub fn must_new_cluster_with_cfg_and_kv_client_mul( (cluster, client, ctx) } - pub fn must_new_cluster_mul( count: usize, ) -> ( diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 6f48c17190af..04dfbd24de17 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,8 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::arc_with_non_send_sync)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 8a9969c19137..f429f27ff8b6 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -281,7 +281,7 @@ impl Simulator for NodeCluster { (snap_mgr, Some(tmp)) } else { let trans = self.trans.core.lock().unwrap(); - let (snap_mgr, _) = &trans.snap_paths[&node_id]; + let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0df44b4e7843..8d26bae968d8 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -918,14 +918,8 @@ pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, pub fn must_new_cluster_and_kv_client_mul( count: usize, ) -> (Cluster, TikvClient, Context) { - must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) -} + let (cluster, leader, ctx) = must_new_cluster_mul(count); -pub fn must_new_cluster_with_cfg_and_kv_client_mul( - count: usize, - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { - let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index ea3017d5d027..33976939c830 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -1739,24 +1739,27 @@ mod tests_normal { /// Compare TokenStream with all white chars trimmed. fn assert_token_stream_equal(l: TokenStream, r: TokenStream) { - let result = l.clone().into_iter().eq_by(r.clone(), |x, y| match x { - TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), - TokenTree::Literal(x) => { - matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) - } - TokenTree::Punct(x) => { - matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) - } - TokenTree::Group(x) => { - if let TokenTree::Group(y) = y { - assert_token_stream_equal(x.stream(), y.stream()); + let result = l + .clone() + .into_iter() + .eq_by(r.clone().into_iter(), |x, y| match x { + TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), + TokenTree::Literal(x) => { + matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) + } + TokenTree::Punct(x) => { + matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) + } + TokenTree::Group(x) => { + if let TokenTree::Group(y) = y { + assert_token_stream_equal(x.stream(), y.stream()); - true - } else { - false + true + } else { + false + } } - } - }); + }); assert!(result, "expect: {:#?}, actual: {:#?}", &l, &r); } diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 738e0020de7f..22127e62f49f 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -251,7 +251,7 @@ where { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + C::sort_compare(self.inner.as_ref(), other.inner.as_ref()).ok() } } diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index d2bbee78078b..418841547cac 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -574,13 +574,13 @@ pub fn bytes_to_int_without_context(bytes: &[u8]) -> Result { if let Some(&c) = trimed.next() { if c == b'-' { negative = true; - } else if c.is_ascii_digit() { + } else if (b'0'..=b'9').contains(&c) { r = Some(i64::from(c) - i64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| c.is_ascii_digit()) { + for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { let cur = i64::from(*c - b'0'); r = r.and_then(|r| r.checked_mul(10)).and_then(|r| { if negative { @@ -605,13 +605,13 @@ pub fn bytes_to_uint_without_context(bytes: &[u8]) -> Result { let mut trimed = bytes.iter().skip_while(|&&b| b == b' ' || b == b'\t'); let mut r = Some(0u64); if let Some(&c) = trimed.next() { - if c.is_ascii_digit() { + if (b'0'..=b'9').contains(&c) { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| c.is_ascii_digit()) { + for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -856,7 +856,7 @@ pub fn get_valid_int_prefix_helper<'a>( if (c == '+' || c == '-') && i == 0 { continue; } - if c.is_ascii_digit() { + if ('0'..='9').contains(&c) { valid_len = i + 1; continue; } @@ -917,7 +917,7 @@ pub fn get_valid_float_prefix_helper<'a>( break; } e_idx = i - } else if !c.is_ascii_digit() { + } else if !('0'..='9').contains(&c) { break; } else { saw_digit = true; diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index b464b1119c81..8ca367908247 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -248,7 +248,7 @@ macro_rules! impl_evaluable_type { } #[inline] - fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self> { + fn borrow_scalar_value_ref<'a>(v: ScalarValueRef<'a>) -> Option<&'a Self> { match v { ScalarValueRef::$ty(x) => x, other => panic!( diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index ff66ddc42eeb..c74423107e4f 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -467,23 +467,24 @@ impl<'a> ScalarValueRef<'a> { impl<'a> Ord for ScalarValueRef<'a> { fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other) + .expect("Cannot compare two ScalarValueRef in different type") + } +} + +impl<'a> PartialOrd for ScalarValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { match_template_evaltype! { TT, match (self, other) { // v1 and v2 are `Option`. However, in MySQL NULL values are considered lower // than any non-NULL value, so using `Option::PartialOrd` directly is fine. - (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => v1.cmp(v2), - _ => panic!("Cannot compare two ScalarValueRef in different type"), + (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => Some(v1.cmp(v2)), + _ => None, } } } } -impl<'a> PartialOrd for ScalarValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - impl<'a> PartialEq for ScalarValueRef<'a> { fn eq(&self, other: &ScalarValue) -> bool { self == &other.as_scalar_value_ref() diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index f91d204b3b07..dde98003475c 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -668,7 +668,7 @@ impl Datum { Datum::F64(res) } } - (Datum::Dec(l), Datum::Dec(r)) => { + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { let dec: Result = (l + r).into(); return dec.map(Datum::Dec); } @@ -700,7 +700,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_sub(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l - r)), - (Datum::Dec(l), Datum::Dec(r)) => { + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { let dec: Result = (l - r).into(); return dec.map(Datum::Dec); } @@ -724,7 +724,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_mul(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l * r)), - (Datum::Dec(l), Datum::Dec(r)) => return Ok(Datum::Dec((l * r).unwrap())), + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => return Ok(Datum::Dec((l * r).unwrap())), (l, r) => return Err(invalid_type!("{} can't multiply {}", l, r)), }; @@ -1179,7 +1179,7 @@ mod tests { | (&Datum::Null, &Datum::Null) | (&Datum::Time(_), &Datum::Time(_)) | (&Datum::Json(_), &Datum::Json(_)) => true, - (Datum::Dec(d1), Datum::Dec(d2)) => d1.prec_and_frac() == d2.prec_and_frac(), + (&Datum::Dec(ref d1), &Datum::Dec(ref d2)) => d1.prec_and_frac() == d2.prec_and_frac(), _ => false, } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 8853a1d6a164..143ec6c77608 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1872,7 +1872,7 @@ impl<'a> ConvertTo for JsonRef<'a> { fn first_non_digit(bs: &[u8], start_idx: usize) -> usize { bs.iter() .skip(start_idx) - .position(|c| !c.is_ascii_digit()) + .position(|c| !(b'0'..=b'9').contains(c)) .map_or_else(|| bs.len(), |s| s + start_idx) } diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 4b7359777120..7279f7881462 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -629,14 +629,14 @@ impl Eq for Duration {} impl PartialOrd for Duration { #[inline] fn partial_cmp(&self, rhs: &Duration) -> Option { - Some(self.cmp(rhs)) + self.nanos.partial_cmp(&rhs.nanos) } } impl Ord for Duration { #[inline] fn cmp(&self, rhs: &Duration) -> Ordering { - self.nanos.partial_cmp(&rhs.nanos).unwrap() + self.partial_cmp(rhs).unwrap() } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index 73e048858901..d9104385bc6c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -77,8 +77,6 @@ impl<'a> PartialEq for JsonRef<'a> { .map_or(false, |r| r == Ordering::Equal) } } - -#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)] impl<'a> PartialOrd for JsonRef<'a> { // See `CompareBinary` in TiDB `types/json/binary_functions.go` fn partial_cmp(&self, right: &JsonRef<'_>) -> Option { @@ -199,7 +197,7 @@ impl PartialEq for Json { impl PartialOrd for Json { fn partial_cmp(&self, right: &Json) -> Option { - Some(self.cmp(right)) + self.as_ref().partial_cmp(&right.as_ref()) } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index f76b29790f9f..867d8ec2c202 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -28,9 +28,9 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryObject` in TiDB `types/json/binary.go` - fn write_json_obj_from_keys_values( + fn write_json_obj_from_keys_values<'a>( &mut self, - mut entries: Vec<(&[u8], JsonRef<'_>)>, + mut entries: Vec<(&[u8], JsonRef<'a>)>, ) -> Result<()> { entries.sort_by(|a, b| a.0.cmp(b.0)); // object: element-count size key-entry* value-entry* key* value* @@ -122,7 +122,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryArray` in TiDB `types/json/binary.go` - fn write_json_ref_array(&mut self, data: &[JsonRef<'_>]) -> Result<()> { + fn write_json_ref_array<'a>(&mut self, data: &[JsonRef<'a>]) -> Result<()> { let element_count = data.len(); let value_entries_len = VALUE_ENTRY_LEN * element_count; let values_len = data.iter().fold(0, |acc, v| acc + v.encoded_len()); @@ -167,7 +167,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryValElem` in TiDB `types/json/binary.go` - fn write_value_entry(&mut self, value_offset: &mut u32, v: &JsonRef<'_>) -> Result<()> { + fn write_value_entry<'a>(&mut self, value_offset: &mut u32, v: &JsonRef<'a>) -> Result<()> { let tp = v.get_type(); self.write_u8(tp as u8)?; match tp { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs index 3cc78270d60c..b359158d06b8 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs @@ -41,7 +41,7 @@ impl<'a> JsonRef<'a> { } } let mut res = self.to_owned(); - for (expr, value) in path_expr_list.iter().zip(values) { + for (expr, value) in path_expr_list.iter().zip(values.into_iter()) { let modifier = BinaryModifier::new(res.as_ref()); res = match mt { ModifyType::Insert => modifier.insert(expr, value)?, diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 621d4384bcc0..4c6c2f676d7a 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1094,7 +1094,7 @@ impl Time { ) } - fn try_into_chrono_datetime(self, ctx: &EvalContext) -> Result> { + fn try_into_chrono_datetime(self, ctx: &mut EvalContext) -> Result> { chrono_datetime( &ctx.cfg.tz, self.year(), @@ -1342,7 +1342,6 @@ impl Time { Ok((((ymd << 17) | hms) << 24) | u64::from(self.micro())) } - #[allow(deprecated)] pub fn from_duration( ctx: &mut EvalContext, duration: Duration, @@ -1416,7 +1415,6 @@ impl Time { .ok_or_else(|| Error::incorrect_datetime_value(self)) } - #[allow(deprecated)] pub fn normalized(self, ctx: &mut EvalContext) -> Result { if self.get_time_type() == TimeType::Timestamp { return Ok(self); @@ -1502,7 +1500,6 @@ impl Time { + self.day()) as i32 } - #[allow(deprecated)] pub fn weekday(self) -> Weekday { let date = if self.month() == 0 { NaiveDate::from_ymd(self.year() as i32 - 1, 12, 1) @@ -2673,9 +2670,9 @@ mod tests { #[test] fn test_no_zero_in_date() -> Result<()> { - let cases = ["2019-01-00", "2019-00-01"]; + let cases = vec!["2019-01-00", "2019-00-01"]; - for case in cases { + for &case in cases.iter() { // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is // produced. let mut ctx = EvalContext::from(TimeEnv { @@ -2820,7 +2817,7 @@ mod tests { let actual = Time::from_duration(&mut ctx, duration, TimeType::DateTime)?; let today = actual - .try_into_chrono_datetime(&ctx)? + .try_into_chrono_datetime(&mut ctx)? .checked_sub_signed(chrono::Duration::nanoseconds(duration.to_nanos())) .unwrap(); @@ -2840,7 +2837,7 @@ mod tests { let mut ctx = EvalContext::default(); for i in 2..10 { let actual = Time::from_local_time(&mut ctx, TimeType::DateTime, i % MAX_FSP)?; - let c_datetime = actual.try_into_chrono_datetime(&ctx)?; + let c_datetime = actual.try_into_chrono_datetime(&mut ctx)?; let now0 = c_datetime.timestamp_millis() as u64; let now1 = Utc::now().timestamp_millis() as u64; diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 9dfc3ebf2886..25b35a90fc0e 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -120,7 +120,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_local_date(&self, local: &NaiveDate) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -135,7 +134,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -150,7 +148,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_utc_date(&self, utc: &NaiveDate) -> Date { match *self { Tz::Local(ref offset) => { @@ -168,7 +165,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_utc_datetime(&self, utc: &NaiveDateTime) -> DateTime { match *self { Tz::Local(ref offset) => { diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index aa5eb3fc56f2..da117c96e2c0 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -298,7 +298,7 @@ impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, - _marker: PhantomData, + _marker: PhantomData::default(), } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 81ef4b072c62..37becbfb801b 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -528,7 +528,7 @@ pub fn generate_index_data_for_test( let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() - .map(|(cid, value)| { + .map(|&(ref cid, ref value)| { expect_row.insert( *cid, datum::encode_key(&mut EvalContext::default(), &[value.clone()]).unwrap(), diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 5ebf8a031d3e..3a5c53a4d095 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -611,8 +611,8 @@ impl IndexScanExecutorImpl { } #[inline] - fn build_operations<'a>( - &self, + fn build_operations<'a, 'b>( + &'b self, mut key_payload: &'a [u8], index_value: &'a [u8], ) -> Result<(DecodeHandleOp<'a>, DecodePartitionIdOp<'a>, RestoreData<'a>)> { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 27e52dde2885..7c410befb257 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -137,31 +137,31 @@ impl BatchExecutorsRunner<()> { .map_err(|e| other_err!("BatchProjectionExecutor: {}", e))?; } ExecType::TypeJoin => { - return Err(other_err!("Join executor not implemented")); + other_err!("Join executor not implemented"); } ExecType::TypeKill => { - return Err(other_err!("Kill executor not implemented")); + other_err!("Kill executor not implemented"); } ExecType::TypeExchangeSender => { - return Err(other_err!("ExchangeSender executor not implemented")); + other_err!("ExchangeSender executor not implemented"); } ExecType::TypeExchangeReceiver => { - return Err(other_err!("ExchangeReceiver executor not implemented")); + other_err!("ExchangeReceiver executor not implemented"); } ExecType::TypePartitionTableScan => { - return Err(other_err!("PartitionTableScan executor not implemented")); + other_err!("PartitionTableScan executor not implemented"); } ExecType::TypeSort => { - return Err(other_err!("Sort executor not implemented")); + other_err!("Sort executor not implemented"); } ExecType::TypeWindow => { - return Err(other_err!("Window executor not implemented")); + other_err!("Window executor not implemented"); } ExecType::TypeExpand => { - return Err(other_err!("Expand executor not implemented")); + other_err!("Expand executor not implemented"); } ExecType::TypeExpand2 => { - return Err(other_err!("Expand2 executor not implemented")); + other_err!("Expand2 executor not implemented"); } } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index ffcb22671da6..bd65547109d3 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -537,7 +537,7 @@ mod tests { }) .collect(); - for predicates in [ + for predicates in vec![ // Swap predicates should produce same results. vec![predicate[0](), predicate[1]()], vec![predicate[1](), predicate[0]()], @@ -572,7 +572,7 @@ mod tests { }) .collect(); - for predicates in [ + for predicates in vec![ // Swap predicates should produce same results. vec![predicate[0](), predicate[1](), predicate[2]()], vec![predicate[1](), predicate[2](), predicate[0]()], diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index a5d760dc80d5..0535e8dbd83d 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -641,8 +641,8 @@ pub mod tests { )) as Box> }; - let test_paging_size = [2, 5, 7]; - let expect_call_num = [1, 3, 4]; + let test_paging_size = vec![2, 5, 7]; + let expect_call_num = vec![1, 3, 4]; let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index db456a848832..ca05e49fcd3d 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -28,13 +28,13 @@ pub fn ensure_columns_decoded( /// Evaluates expressions and outputs the result into the given Vec. Lifetime of /// the expressions are erased. -pub unsafe fn eval_exprs_decoded_no_lifetime( +pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( ctx: &mut EvalContext, exprs: &[RpnExpression], schema: &[FieldType], input_physical_columns: &LazyBatchColumnVec, input_logical_rows: &[usize], - output: &mut Vec>, + output: &mut Vec>, ) -> Result<()> { unsafe fn erase_lifetime<'a, T: ?Sized>(v: &T) -> &'a T { &*(v as *const T) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index b6619f9d8ccb..76e90f79c5bf 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -6528,7 +6528,7 @@ mod tests { "cast_decimal_as_duration", ); - let values = [ + let values = vec![ Decimal::from_bytes(b"9995959").unwrap().unwrap(), Decimal::from_bytes(b"-9995959").unwrap().unwrap(), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 663571804ae2..5d2daed7f9ae 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -58,7 +58,7 @@ pub fn inet_aton(addr: BytesRef) -> Result> { } let (mut byte_result, mut result, mut dot_count): (u64, u64, usize) = (0, 0, 0); for c in addr.chars() { - if c.is_ascii_digit() { + if ('0'..='9').contains(&c) { let digit = c as u64 - '0' as u64; byte_result = byte_result * 10 + digit; if byte_result > 255 { @@ -501,9 +501,8 @@ mod tests { (Some(hex("00000000")), Some(b"0.0.0.0".to_vec())), (Some(hex("0A000509")), Some(b"10.0.5.9".to_vec())), ( - // the output format has changed, see: https://github.com/rust-lang/rust/pull/112606 Some(hex("00000000000000000000000001020304")), - Some(b"::102:304".to_vec()), + Some(b"::1.2.3.4".to_vec()), ), ( Some(hex("00000000000000000000FFFF01020304")), diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index 45754d0a101e..f3b9b03c287d 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -63,13 +63,13 @@ pub fn oct_string(s: BytesRef, writer: BytesWriter) -> Result { if let Some(&c) = trimmed.next() { if c == b'-' { negative = true; - } else if c.is_ascii_digit() { + } else if (b'0'..=b'9').contains(&c) { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(writer.write(Some(b"0".to_vec()))); } - for c in trimmed.take_while(|&c| c.is_ascii_digit()) { + for c in trimmed.take_while(|&c| (b'0'..=b'9').contains(c)) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -879,7 +879,7 @@ impl TrimDirection { } #[inline] -fn trim<'a>(string: &'a [u8], pattern: &[u8], direction: TrimDirection) -> &'a [u8] { +fn trim<'a, 'b>(string: &'a [u8], pattern: &'b [u8], direction: TrimDirection) -> &'a [u8] { if pattern.is_empty() { return string; } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 40c1f485e544..c2ef67221486 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -10,8 +10,6 @@ #![allow(elided_lifetimes_in_paths)] // Necessary until rpn_fn accepts functions annotated with lifetimes. #![allow(incomplete_features)] -#![allow(clippy::needless_raw_string_hashes)] -#![allow(clippy::needless_return_with_question_mark)] #![feature(proc_macro_hygiene)] #![feature(specialization)] #![feature(test)] diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index e3ab7d352971..b892333b0ef3 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1091,13 +1091,16 @@ mod tests { use tipb::{Expr, ScalarFuncSig}; #[allow(clippy::trivially_copy_pass_by_ref)] - #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a)] - fn fn_a_nonnull(metadata: &i64, v: &Int) -> Result> { + #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a::)] + fn fn_a_nonnull( + metadata: &i64, + v: &Int, + ) -> Result> { assert_eq!(*metadata, 42); Ok(Some(v + *metadata)) } - fn prepare_a(_expr: &mut Expr) -> Result { + fn prepare_a(_expr: &mut Expr) -> Result { Ok(42) } @@ -1133,7 +1136,7 @@ mod tests { // fn_b: CastIntAsReal // fn_c: CastIntAsString Ok(match expr.get_sig() { - ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta(), + ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta::(), ScalarFuncSig::CastIntAsReal => fn_b_fn_meta::(), ScalarFuncSig::CastIntAsString => fn_c_fn_meta::(), _ => unreachable!(), diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 858edfffec26..576aa5cfa768 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -605,7 +605,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for (k, v) in &base_data { + for &(ref k, ref v) in &base_data { engine.put(&data_key(k), v).unwrap(); } (r, base_data) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 43e5f1bea054..25f58352750f 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -9,7 +9,6 @@ #![feature(min_specialization)] #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] -#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index b786d2aa6813..c53c58965192 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -11,9 +11,9 @@ where let mut start = 0; let bytes = file_name.as_bytes(); for (index, &b) in bytes.iter().enumerate() { - if b.is_ascii_uppercase() - || b.is_ascii_lowercase() - || b.is_ascii_digit() + if (b'A'..=b'Z').contains(&b) + || (b'a'..=b'z').contains(&b) + || (b'0'..=b'9').contains(&b) || b == b'.' || b == b'-' || b == b'_' diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index a2d0943df90e..76fad6e8a34c 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -247,7 +247,7 @@ where HashMapEntry::Occupied(mut e) => { self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); - let entry = e.get_mut(); + let mut entry = e.get_mut(); self.trace.promote(entry.record); entry.value = value; } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index a28978096837..291254c5227b 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -33,7 +33,7 @@ pub trait HeapSize { impl HeapSize for [u8] { fn heap_size(&self) -> usize { - mem::size_of_val(self) + self.len() * mem::size_of::() } } diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index af22e411767c..260aa88ac8e0 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -64,7 +64,7 @@ impl Collector for AllocStatsCollector { .set(dealloc as _); }); let mut g = self.memory_stats.collect(); - g.extend(self.allocation.collect()); + g.extend(self.allocation.collect().into_iter()); g } } diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 354ef74adb0f..4492e33a9335 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -302,8 +302,6 @@ mod tests { use super::*; - // the JoinHandler is useless here, so just ignore this warning. - #[allow(clippy::let_underscore_future)] fn spawn_and_wait( rx_builder: impl FnOnce() -> S, ) -> (Runtime, Arc) { diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 61608d1518fe..6ec1621c629d 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -333,7 +333,7 @@ mod tests { for _ in 0..num * 10 { std::thread::spawn(move || { loop { - let _ = (0..10_000_000).sum::(); + let _ = (0..10_000_000).into_iter().sum::(); } }); } diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index a7a2b421ab04..bb555e11794f 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -81,14 +81,14 @@ impl Eq for TimeoutTask {} impl PartialOrd for TimeoutTask { fn partial_cmp(&self, other: &TimeoutTask) -> Option { - Some(self.cmp(other)) + self.next_tick.partial_cmp(&other.next_tick) } } impl Ord for TimeoutTask { fn cmp(&self, other: &TimeoutTask) -> Ordering { // TimeoutTask.next_tick must have same type of instants. - self.next_tick.partial_cmp(&other.next_tick).unwrap() + self.partial_cmp(other).unwrap() } } diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index 79727575d604..fb0cd9001232 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -118,10 +118,9 @@ impl slog::Value for TimeStamp { const TS_SET_USE_VEC_LIMIT: usize = 8; /// A hybrid immutable set for timestamps. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum TsSet { /// When the set is empty, avoid the useless cloning of Arc. - #[default] Empty, /// `Vec` is suitable when the set is small or the set is barely used, and /// it doesn't worth converting a `Vec` into a `HashSet`. @@ -131,6 +130,13 @@ pub enum TsSet { Set(Arc>), } +impl Default for TsSet { + #[inline] + fn default() -> TsSet { + TsSet::Empty + } +} + impl TsSet { /// Create a `TsSet` from the given vec of timestamps. It will select the /// proper internal collection type according to the size. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 5305e3ec69ac..624ac81212d8 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -451,7 +451,7 @@ impl From for Mutation { /// `OldValue` is used by cdc to read the previous value associated with some /// key during the prewrite process. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum OldValue { /// A real `OldValue`. Value { value: Value }, @@ -460,13 +460,18 @@ pub enum OldValue { /// `None` means we don't found a previous value. None, /// The user doesn't care about the previous value. - #[default] Unspecified, /// Not sure whether the old value exists or not. users can seek CF_WRITE to /// the give position to take a look. SeekWrite(Key), } +impl Default for OldValue { + fn default() -> Self { + OldValue::Unspecified + } +} + impl OldValue { pub fn value(value: Value) -> Self { OldValue::Value { value } @@ -585,9 +590,8 @@ impl WriteBatchFlags { /// The position info of the last actual write (PUT or DELETE) of a LOCK record. /// Note that if the last change is a DELETE, its LastChange can be either /// Exist(which points to it) or NotExist. -#[derive(Clone, Default, Eq, PartialEq, Debug)] +#[derive(Clone, Eq, PartialEq, Debug)] pub enum LastChange { - #[default] Unknown, /// The pointer may point to a PUT or a DELETE record. Exist { @@ -643,6 +647,12 @@ impl LastChange { } } +impl Default for LastChange { + fn default() -> Self { + LastChange::Unknown + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust-toolchain b/rust-toolchain index c1eb62e26cb8..4e5f9a4d82b8 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2023-08-15 +nightly-2022-11-15 diff --git a/src/config/mod.rs b/src/config/mod.rs index 63e36a543dc0..8318556483ed 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1482,7 +1482,7 @@ impl DbConfig { opts.set_paranoid_checks(b); } if for_engine == EngineType::RaftKv { - opts.set_info_log(RocksdbLogger); + opts.set_info_log(RocksdbLogger::default()); } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1858,7 +1858,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDbLogger); + opts.set_info_log(RaftDbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -2015,7 +2015,7 @@ impl ConfigManager for DbConfigManger { self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); - let cf_config = change.extract_if(|(name, _)| name.ends_with("cf")); + let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); for (cf_name, cf_change) in cf_config { if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default @@ -2049,7 +2049,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .extract_if(|(name, _)| name == "rate_bytes_per_sec") + .drain_filter(|(name, _)| name == "rate_bytes_per_sec") .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); @@ -2058,7 +2058,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .extract_if(|(name, _)| name == "rate_limiter_auto_tuned") + .drain_filter(|(name, _)| name == "rate_limiter_auto_tuned") .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); @@ -2067,7 +2067,7 @@ impl ConfigManager for DbConfigManger { } if let Some(size) = change - .extract_if(|(name, _)| name == "write_buffer_limit") + .drain_filter(|(name, _)| name == "write_buffer_limit") .next() { let size: ReadableSize = size.1.into(); @@ -2075,14 +2075,14 @@ impl ConfigManager for DbConfigManger { } if let Some(f) = change - .extract_if(|(name, _)| name == "write_buffer_flush_oldest_first") + .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") .next() { self.db.set_flush_oldest_first(f.1.into())?; } if let Some(background_jobs_config) = change - .extract_if(|(name, _)| name == "max_background_jobs") + .drain_filter(|(name, _)| name == "max_background_jobs") .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); @@ -2090,7 +2090,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_subcompactions_config) = change - .extract_if(|(name, _)| name == "max_sub_compactions") + .drain_filter(|(name, _)| name == "max_sub_compactions") .next() { let max_subcompactions: u32 = background_subcompactions_config.1.into(); @@ -2099,7 +2099,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_flushes_config) = change - .extract_if(|(name, _)| name == "max_background_flushes") + .drain_filter(|(name, _)| name == "max_background_flushes") .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 7d2d7e9e9477..02f45d353115 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -285,7 +285,7 @@ pub fn tls_collect_scan_details(cmd: ReqTag, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 874917130e48..fcd16f9b9478 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -64,13 +64,11 @@ type HandlerStreamStepResult = Result<(Option, bool)>; #[async_trait] pub trait RequestHandler: Send { /// Processes current request and produces a response. - #[allow(clippy::diverging_sub_expression)] async fn handle_request(&mut self) -> Result> { panic!("unary request is not supported for this handler"); } /// Processes current request and produces streaming responses. - #[allow(clippy::diverging_sub_expression)] async fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { panic!("streaming request is not supported for this handler"); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 1a670c917ca9..6d40ffe959c8 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -66,9 +66,9 @@ const REQUEST_WRITE_CONCURRENCY: usize = 16; /// bytes. In detail, they are: /// - 2 bytes for the request type (Tag+Value). /// - 2 bytes for every string or bytes field (Tag+Length), they are: -/// . + the key field -/// . + the value field -/// . + the CF field (None for CF_DEFAULT) +/// . + the key field +/// . + the value field +/// . + the CF field (None for CF_DEFAULT) /// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). /// - 2 bytes for the request itself (which would be embedded into a /// [`RaftCmdRequest`].) diff --git a/src/lib.rs b/src/lib.rs index aafb099c6cc8..b3e9ebaf8e84 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,14 +23,13 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(extract_if)] +#![feature(drain_filter)] #![feature(deadline_api)] #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] #![allow(incomplete_features)] #![feature(return_position_impl_trait_in_trait)] -#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 7060b20bdb2b..4230828dff1d 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1113,7 +1113,7 @@ fn get_tablet_cache( "tablet load failed, region_state {:?}", region_state.get_state() ); - Err(box_err!(e)) + return Err(box_err!(e)); } } } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index fe5a252b8dbc..665824a1baca 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -826,7 +826,6 @@ pub mod test_utils { use crate::storage::kv::RocksEngine as StorageRocksEngine; /// Do a global GC with the given safe point. - #[allow(clippy::needless_pass_by_ref_mut)] pub fn gc_by_compact(engine: &mut StorageRocksEngine, _: &[u8], safe_point: u64) { let engine = engine.get_rocksdb(); // Put a new key-value pair to ensure compaction can be triggered correctly. diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index d2dc6532200a..be18f8216d5b 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -546,9 +546,7 @@ impl GcMan ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); - let Some(region) = region else { - return Ok(None); - }; + let Some(region) = region else { return Ok(None) }; let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); @@ -809,7 +807,7 @@ mod tests { // Following code asserts gc_tasks == expected_gc_tasks. assert_eq!(gc_tasks.len(), expected_gc_tasks.len()); - let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks).all( + let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks.into_iter()).all( |((region, safe_point), (expect_region, expect_safe_point))| { region == expect_region && safe_point == expect_safe_point.into() }, @@ -886,7 +884,7 @@ mod tests { #[test] fn test_auto_gc_rewinding() { - for regions in [ + for regions in vec![ // First region starts with empty and last region ends with empty. vec![ (b"".to_vec(), b"1".to_vec(), 1), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index de40975632f2..c608470ba87a 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -254,7 +254,7 @@ fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Ve let mut keys_in_region = Vec::new(); loop { - let Some(key) = keys.peek() else { break }; + let Some(key) = keys.peek() else {break}; let key = key.as_encoded().as_slice(); if key < region.get_start_key() { @@ -552,7 +552,7 @@ impl GcRunner { let mut keys = keys.into_iter().peekable(); for region in regions { let mut raw_modifies = MvccRaw::new(); - let snapshot = self.get_snapshot(self.store_id, ®ion)?; + let mut snapshot = self.get_snapshot(self.store_id, ®ion)?; let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); let mut next_gc_key = keys_in_region.next(); @@ -563,7 +563,7 @@ impl GcRunner { &range_start_key, &range_end_key, &mut raw_modifies, - &snapshot, + &mut snapshot, &mut gc_info, ) { GC_KEY_FAILURES.inc(); @@ -615,7 +615,7 @@ impl GcRunner { range_start_key: &Key, range_end_key: &Key, raw_modifies: &mut MvccRaw, - kv_snapshot: &::Snap, + kv_snapshot: &mut ::Snap, gc_info: &mut GcInfo, ) -> Result<()> { let start_key = key.clone().append_ts(safe_point.prev()); @@ -669,7 +669,10 @@ impl GcRunner { } pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { - let stats = self.stats_map.entry(key_mode).or_default(); + let stats = self + .stats_map + .entry(key_mode) + .or_insert_with(Default::default); stats } @@ -2266,6 +2269,7 @@ mod tests { fn generate_keys(start: u64, end: u64) -> Vec { (start..end) + .into_iter() .map(|i| { let key = format!("k{:02}", i); Key::from_raw(key.as_bytes()) diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 938dfaff8a66..9583df80dd67 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -361,15 +361,20 @@ impl DetectTable { } /// The role of the detector. -#[derive(Debug, Default, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum Role { /// The node is the leader of the detector. Leader, /// The node is a follower of the leader. - #[default] Follower, } +impl Default for Role { + fn default() -> Role { + Role::Follower + } +} + impl From for Role { fn from(role: StateRole) -> Role { match role { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 58287c2bb834..82563666f048 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -306,7 +306,6 @@ struct WriteResFeed { unsafe impl Send for WriteResFeed {} impl WriteResFeed { - #[allow(clippy::arc_with_non_send_sync)] fn pair() -> (Self, WriteResSub) { let core = Arc::new(WriteResCore { ev: AtomicU8::new(0), @@ -582,9 +581,7 @@ where tx.notify(res); } rx.inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { - return; - }; + let WriteEvent::Finished(res) = ev else { return }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 9785e821312f..5183ecd6567f 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -315,9 +315,7 @@ impl tikv_kv::Engine for RaftKv2 { early_err: res.err(), }) .inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { - return; - }; + let WriteEvent::Finished(res) = ev else { return }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 73a15983bd08..d9b17c5d35c0 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -269,9 +269,7 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); - let Some((_, mut system)) = self.system.take() else { - return; - }; + let Some((_, mut system)) = self.system.take() else { return }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); system.shutdown(); } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 497d8240684d..d0b715542d57 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -300,6 +300,7 @@ where let debugger = self.debugger.clone(); let res = self.pool.spawn(async move { + let req = req; debugger .compact( req.get_db(), diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 413e36a6645d..8e77d65233ea 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -612,7 +612,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -639,7 +639,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -662,7 +662,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = ["2019/08/23 18:09:53.387 +08:00"] + let expected = vec!["2019/08/23 18:09:53.387 +08:00"] .iter() .map(|s| timestamp(s)) .collect::>(); @@ -671,7 +671,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# expected ); - for time in [0, i64::MAX].into_iter() { + for time in vec![0, i64::MAX].into_iter() { let log_iter = LogIterator::new( &log_file, timestamp("2019/08/23 18:09:53.387 +08:00"), @@ -680,7 +680,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:09:59.387 +08:00", "2019/08/23 18:10:06.387 +08:00", @@ -704,7 +704,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![regex::Regex::new(".*test-filter.*").unwrap()], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:10:06.387 +08:00", // for invalid line ] @@ -783,7 +783,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# req.set_end_time(i64::MAX); req.set_levels(vec![LogLevel::Warn as _]); req.set_patterns(vec![".*test-filter.*".to_string()].into()); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:11:58.387 +08:00", "2019/08/23 18:11:59.387 +08:00", // for invalid line @@ -796,7 +796,9 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# s.collect::>() .await .into_iter() - .flat_map(|mut resp| resp.take_messages().into_iter()) + .map(|mut resp| resp.take_messages().into_iter()) + .into_iter() + .flatten() .map(|msg| msg.get_time()) .collect::>() }); diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 12494e9e7c4e..8a84eaf6293c 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -601,7 +601,7 @@ mod tests { ] ); // memory - for name in ["virtual", "swap"].into_iter() { + for name in vec!["virtual", "swap"].into_iter() { let item = collector .iter() .find(|x| x.get_tp() == "memory" && x.get_name() == name); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 4a961eedf191..77f92d33d955 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -889,6 +889,7 @@ impl Tikv for Service { forward_duplex!(self.proxy, batch_commands, ctx, stream, sink); let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); + let ctx = Arc::new(ctx); let peer = ctx.peer(); let storage = self.storage.clone(); let copr = self.copr.clone(); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 1eba8cd81b73..32c99867a3fd 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -387,9 +387,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( - box MvccErrorInner::WriteConflict { .. }, - ))))) + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) + ))) )); // The tx should be dropped. rx.recv().unwrap_err(); @@ -422,9 +422,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( - box MvccErrorInner::KeyIsLocked(_), - ))))) + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) + ))) )); // Since the cancellation callback can fully execute only when it's successfully // removed from the lock waiting queues, it's impossible that `finish_request` diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 68e0118610aa..a81248fe9e25 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -110,7 +110,12 @@ impl Eq for LockWaitEntry {} impl PartialOrd for LockWaitEntry { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + // Reverse it since the priority queue is a max heap and we want to pop the + // minimal. + other + .parameters + .start_ts + .partial_cmp(&self.parameters.start_ts) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index d3b3e89a3f85..e9477b56b0ff 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -63,7 +63,7 @@ pub fn tls_collect_scan_details(cmd: CommandKind, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index b8224df696bb..cb4057bfd7e2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1946,7 +1946,7 @@ impl Storage { key_ranges.push(build_key_range(k.as_encoded(), k.as_encoded(), false)); (k, v) }) - .filter(|(_, v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) + .filter(|&(_, ref v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) .map(|(k, v)| match v { Ok(v) => { let (user_key, _) = F::decode_raw_key_owned(k, false).unwrap(); @@ -3892,9 +3892,9 @@ mod tests { let result = block_on(storage.get(Context::default(), Key::from_raw(b"x"), 100.into())); assert!(matches!( result, - Err(Error(box ErrorInner::Txn(txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( - box mvcc::ErrorInner::KeyIsLocked { .. }, - )))))) + Err(Error(box ErrorInner::Txn(txn::Error( + box txn::ErrorInner::Mvcc(mvcc::Error(box mvcc::ErrorInner::KeyIsLocked { .. })) + )))) )); } @@ -5744,7 +5744,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -5803,7 +5803,7 @@ mod tests { let mut total_bytes: u64 = 0; let mut is_first = true; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6116,7 +6116,7 @@ mod tests { #[test] fn test_raw_batch_put() { - for for_cas in [false, true].into_iter() { + for for_cas in vec![false, true].into_iter() { test_kv_format_impl!(test_raw_batch_put_impl(for_cas)); } } @@ -6245,7 +6245,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6260,7 +6260,7 @@ mod tests { } // Verify pairs in a batch - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, @@ -6292,7 +6292,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6310,7 +6310,7 @@ mod tests { let mut ids = vec![]; let cmds = test_data .iter() - .map(|(k, _)| { + .map(|&(ref k, _)| { let mut req = RawGetRequest::default(); req.set_context(ctx.clone()); req.set_key(k.clone()); @@ -6331,7 +6331,7 @@ mod tests { #[test] fn test_raw_batch_delete() { - for for_cas in [false, true].into_iter() { + for for_cas in vec![false, true].into_iter() { test_kv_format_impl!(test_raw_batch_delete_impl(for_cas)); } } @@ -6381,10 +6381,10 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data .iter() - .map(|(k, v)| Some((k.clone(), v.clone()))) + .map(|&(ref k, ref v)| Some((k.clone(), v.clone()))) .collect(); expect_multi_values( results, @@ -6512,7 +6512,7 @@ mod tests { // Scan pairs with key only let mut results: Vec> = test_data .iter() - .map(|(k, _)| Some((k.clone(), vec![]))) + .map(|&(ref k, _)| Some((k.clone(), vec![]))) .collect(); expect_multi_values( results.clone(), @@ -6909,7 +6909,7 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 474c789a31dc..cc4403229c13 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -1287,7 +1287,7 @@ mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (1..30).step_by(2) { + for start_ts in (1..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, k, k, start_ts); must_commit(&mut engine, k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 61a366c12eea..48158eda946a 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -418,10 +418,11 @@ impl MvccReader { estimated_versions_to_last_change, } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); - let Some(value) = self.snapshot.get_cf(CF_WRITE, &key_with_ts)? - else { - return Ok(None); - }; + let Some(value) = self + .snapshot + .get_cf(CF_WRITE, &key_with_ts)? else { + return Ok(None); + }; self.statistics.write.get += 1; let write = WriteRef::parse(&value)?.to_owned(); assert!( @@ -2420,7 +2421,7 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough LOCK recrods - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2429,7 +2430,7 @@ pub mod tests { engine.commit(k, 45, 46); // Write enough LOCK recrods - for start_ts in (50..80).step_by(2) { + for start_ts in (50..80).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2484,7 +2485,7 @@ pub mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2521,7 +2522,7 @@ pub mod tests { engine.put(k, 1, 2); // 10 locks were put - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2548,7 +2549,7 @@ pub mod tests { feature_gate.set_version("6.1.0").unwrap(); set_tls_feature_gate(feature_gate); engine.delete(k, 51, 52); - for start_ts in (56..80).step_by(2) { + for start_ts in (56..80).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } let feature_gate = FeatureGate::default(); @@ -2580,7 +2581,7 @@ pub mod tests { let k = b"k"; engine.put(k, 1, 2); - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } engine.rollback(k, 30); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 2b0a8e13582a..3437a1e5432d 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -633,7 +633,7 @@ impl ScanPolicy for LatestEntryPolicy { fn scan_latest_handle_lock( current_user_key: Key, - cfg: &ScannerConfig, + cfg: &mut ScannerConfig, cursors: &mut Cursors, statistics: &mut Statistics, ) -> Result> { @@ -1636,7 +1636,7 @@ mod latest_kv_tests { must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); must_commit(&mut engine, b"k4", 3, 7); - for start_ts in (10..30).step_by(2) { + for start_ts in (10..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index aa635827961d..8c4ad5da08b0 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -290,7 +290,7 @@ mod tests { RawEncodeSnapshot::from_snapshot(raw_mvcc_snapshot); // get_cf - for (key, value, _) in &test_data[6..12] { + for &(ref key, ref value, _) in &test_data[6..12] { let res = encode_snapshot.get_cf(CF_DEFAULT, &ApiV2::encode_raw_key(key, None)); assert_eq!(res.unwrap(), Some(value.to_owned())); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 713155f91608..64e22a13585a 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -766,6 +766,7 @@ fn async_commit_timestamps( #[cfg(not(feature = "failpoints"))] let injected_fallback = false; + let max_commit_ts = max_commit_ts; if (!max_commit_ts.is_zero() && min_commit_ts > max_commit_ts) || injected_fallback { warn!("commit_ts is too large, fallback to normal 2PC"; "key" => log_wrappers::Value::key(key.as_encoded()), @@ -1874,6 +1875,7 @@ pub mod tests { // At most 12 ops per-case. let ops_count = rg.gen::() % 12; let ops = (0..ops_count) + .into_iter() .enumerate() .map(|(i, _)| { if i == 0 { diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 61dbdac65652..9a54895e7e20 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -88,8 +88,8 @@ mod tests { fn test_atomic_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let raw_keys = [b"ra", b"rz"]; - let raw_values = [b"valuea", b"valuez"]; + let raw_keys = vec![b"ra", b"rz"]; + let raw_values = vec![b"valuea", b"valuez"]; let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let mut modifies = vec![]; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 2f39b29bc64c..10446db6292b 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1853,7 +1853,9 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::AlreadyExist { .. }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::AlreadyExist { .. } + ))) )); assert_eq!(cm.max_ts().into_inner(), 15); @@ -1876,7 +1878,9 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { .. } + ))) )); } @@ -2282,9 +2286,9 @@ mod tests { .unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request @@ -2465,9 +2469,9 @@ mod tests { let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); // Passing keys in different order gets the same result: let cmd = PrewritePessimistic::with_defaults( @@ -2488,9 +2492,9 @@ mod tests { let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); // If the two keys are sent in different requests, it would be the client's duty diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index 549d1d226361..a662d9bab79c 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -224,7 +224,7 @@ impl Latches { keep_latches_for_next_cmd: Option<(u64, &Lock)>, ) -> Vec { // Used to - let dummy_vec = []; + let dummy_vec = vec![]; let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), None => (None, dummy_vec.iter().peekable()), @@ -282,9 +282,9 @@ mod tests { fn test_wakeup() { let latches = Latches::new(256); - let keys_a = ["k1", "k3", "k5"]; + let keys_a = vec!["k1", "k3", "k5"]; let mut lock_a = Lock::new(keys_a.iter()); - let keys_b = ["k4", "k5", "k6"]; + let keys_b = vec!["k4", "k5", "k6"]; let mut lock_b = Lock::new(keys_b.iter()); let cid_a: u64 = 1; let cid_b: u64 = 2; @@ -310,9 +310,9 @@ mod tests { fn test_wakeup_by_multi_cmds() { let latches = Latches::new(256); - let keys_a = ["k1", "k2", "k3"]; - let keys_b = ["k4", "k5", "k6"]; - let keys_c = ["k3", "k4"]; + let keys_a = vec!["k1", "k2", "k3"]; + let keys_b = vec!["k4", "k5", "k6"]; + let keys_c = vec!["k3", "k4"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); @@ -353,10 +353,10 @@ mod tests { fn test_wakeup_by_small_latch_slot() { let latches = Latches::new(5); - let keys_a = ["k1", "k2", "k3"]; - let keys_b = ["k6", "k7", "k8"]; - let keys_c = ["k3", "k4"]; - let keys_d = ["k7", "k10"]; + let keys_a = vec!["k1", "k2", "k3"]; + let keys_b = vec!["k6", "k7", "k8"]; + let keys_c = vec!["k3", "k4"]; + let keys_d = vec!["k7", "k10"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 2ca3ef145c87..197363043730 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -267,7 +267,7 @@ pub fn tls_collect_scan_details(cmd: &'static str, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 0081d5e95bc9..aa0c2c29decc 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -120,7 +120,6 @@ uuid = { version = "0.8.1", features = ["serde", "v4"] } procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] -arrow = "46.0" byteorder = "1.2" # See https://bheisler.github.io/criterion.rs/book/user_guide/known_limitations.html for the usage # of `real_blackbox` feature. diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 3698860b4ea3..0a5708c74ce0 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -147,7 +147,7 @@ where I: 'static, { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + self.get_name().partial_cmp(other.get_name()) } } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 99f2c9ee1f42..92dacfe6dc9f 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -61,7 +61,7 @@ where .unwrap(); } let write_data = WriteData::from_modifies(txn.into_modifies()); - let _ = futures::executor::block_on(tikv_kv::write(engine, &ctx, write_data, None)); + let _ = tikv_kv::write(engine, &ctx, write_data, None); let keys: Vec = kvs.iter().map(|(k, _)| Key::from_raw(k)).collect(); let snapshot = engine.snapshot(Default::default()).unwrap(); (snapshot, keys) diff --git a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs deleted file mode 100644 index 241284a72282..000000000000 --- a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Arc; - -use arrow::{ - array, - datatypes::{self, DataType, Field}, - record_batch::RecordBatch, -}; -use tidb_query_datatype::{codec::Datum, prelude::*, FieldTypeFlag, FieldTypeTp}; -use tipb::FieldType; - -pub struct Chunk { - pub data: RecordBatch, -} - -impl Chunk { - pub fn get_datum(&self, col_id: usize, row_id: usize, field_type: &FieldType) -> Datum { - if self.data.column(col_id).is_null(row_id) { - return Datum::Null; - } - - match field_type.as_accessor().tp() { - FieldTypeTp::Tiny - | FieldTypeTp::Short - | FieldTypeTp::Int24 - | FieldTypeTp::Long - | FieldTypeTp::LongLong - | FieldTypeTp::Year => { - if field_type - .as_accessor() - .flag() - .contains(FieldTypeFlag::UNSIGNED) - { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - - Datum::U64(data.value(row_id)) - } else { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - - Datum::I64(data.value(row_id)) - } - } - FieldTypeTp::Float | FieldTypeTp::Double => { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - Datum::F64(data.value(row_id)) - } - _ => unreachable!(), - } - } -} - -pub struct ChunkBuilder { - columns: Vec, -} - -impl ChunkBuilder { - pub fn new(cols: usize, rows: usize) -> ChunkBuilder { - ChunkBuilder { - columns: vec![ColumnsBuilder::new(rows); cols], - } - } - - pub fn build(self, tps: &[FieldType]) -> Chunk { - let mut fields = Vec::with_capacity(tps.len()); - let mut arrays: Vec> = Vec::with_capacity(tps.len()); - for (field_type, column) in tps.iter().zip(self.columns) { - match field_type.as_accessor().tp() { - FieldTypeTp::Tiny - | FieldTypeTp::Short - | FieldTypeTp::Int24 - | FieldTypeTp::Long - | FieldTypeTp::LongLong - | FieldTypeTp::Year => { - if field_type - .as_accessor() - .flag() - .contains(FieldTypeFlag::UNSIGNED) - { - let (f, d) = column.into_u64_array(); - fields.push(f); - arrays.push(d); - } else { - let (f, d) = column.into_i64_array(); - fields.push(f); - arrays.push(d); - } - } - FieldTypeTp::Float | FieldTypeTp::Double => { - let (f, d) = column.into_f64_array(); - fields.push(f); - arrays.push(d); - } - _ => unreachable!(), - }; - } - let schema = datatypes::Schema::new(fields); - let batch = RecordBatch::try_new(Arc::new(schema), arrays).unwrap(); - Chunk { data: batch } - } - - pub fn append_datum(&mut self, col_id: usize, data: Datum) { - self.columns[col_id].append_datum(data) - } -} - -#[derive(Clone)] -pub struct ColumnsBuilder { - data: Vec, -} - -impl ColumnsBuilder { - fn new(rows: usize) -> ColumnsBuilder { - ColumnsBuilder { - data: Vec::with_capacity(rows), - } - } - - fn append_datum(&mut self, data: Datum) { - self.data.push(data) - } - - fn into_i64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::Int64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::I64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } - - fn into_u64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::UInt64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::U64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } - - fn into_f64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::Float64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::F64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } -} diff --git a/tests/benches/misc/coprocessor/codec/chunk/mod.rs b/tests/benches/misc/coprocessor/codec/chunk/mod.rs deleted file mode 100644 index f956e2cb14e9..000000000000 --- a/tests/benches/misc/coprocessor/codec/chunk/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - -mod chunk; - -use test::Bencher; -use tidb_query_datatype::{ - codec::{ - chunk::{Chunk, ChunkEncoder}, - datum::Datum, - mysql::*, - }, - FieldTypeTp, -}; -use tipb::FieldType; - -#[bench] -fn bench_encode_chunk(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![ - FieldTypeTp::LongLong.into(), - FieldTypeTp::LongLong.into(), - FieldTypeTp::VarChar.into(), - FieldTypeTp::VarChar.into(), - FieldTypeTp::NewDecimal.into(), - FieldTypeTp::Json.into(), - ]; - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - let s = format!("{}.123435", row_id); - let bs = Datum::Bytes(s.as_bytes().to_vec()); - let dec = Datum::Dec(s.parse().unwrap()); - let json = Datum::Json(Json::from_string(s).unwrap()); - chunk.append_datum(0, &Datum::Null).unwrap(); - chunk.append_datum(1, &Datum::I64(row_id as i64)).unwrap(); - chunk.append_datum(2, &bs).unwrap(); - chunk.append_datum(3, &bs).unwrap(); - chunk.append_datum(4, &dec).unwrap(); - chunk.append_datum(5, &json).unwrap(); - } - - b.iter(|| { - let mut buf = vec![]; - buf.write_chunk(&chunk).unwrap(); - }); -} - -#[bench] -fn bench_chunk_build_tidb(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::LongLong.into()]; - - b.iter(|| { - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - chunk.append_datum(0, &Datum::Null).unwrap(); - chunk.append_datum(1, &Datum::I64(row_id as i64)).unwrap(); - } - }); -} - -#[bench] -fn bench_chunk_build_official(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::LongLong.into()]; - - b.iter(|| { - let mut chunk = chunk::ChunkBuilder::new(fields.len(), rows); - for row_id in 0..rows { - chunk.append_datum(0, Datum::Null); - chunk.append_datum(1, Datum::I64(row_id as i64)); - } - chunk.build(&fields); - }); -} - -#[bench] -fn bench_chunk_iter_tidb(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::Double.into()]; - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - if row_id & 1 == 0 { - chunk.append_datum(0, &Datum::Null).unwrap(); - } else { - chunk.append_datum(0, &Datum::I64(row_id as i64)).unwrap(); - } - chunk.append_datum(1, &Datum::F64(row_id as f64)).unwrap(); - } - - b.iter(|| { - let mut col1 = 0; - let mut col2 = 0.0; - for row in chunk.iter() { - col1 += match row.get_datum(0, &fields[0]).unwrap() { - Datum::I64(v) => v, - Datum::Null => 0, - _ => unreachable!(), - }; - col2 += match row.get_datum(1, &fields[1]).unwrap() { - Datum::F64(v) => v, - _ => unreachable!(), - }; - } - assert_eq!(col1, 262_144); - assert!(!(523_776.0 - col2).is_normal()); - }); -} - -#[bench] -fn bench_chunk_iter_official(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::Double.into()]; - let mut chunk = chunk::ChunkBuilder::new(fields.len(), rows); - for row_id in 0..rows { - if row_id & 1 == 0 { - chunk.append_datum(0, Datum::Null); - } else { - chunk.append_datum(0, Datum::I64(row_id as i64)); - } - - chunk.append_datum(1, Datum::F64(row_id as f64)); - } - let chunk = chunk.build(&fields); - b.iter(|| { - let (mut col1, mut col2) = (0, 0.0); - for row_id in 0..chunk.data.num_rows() { - col1 += match chunk.get_datum(0, row_id, &fields[0]) { - Datum::I64(v) => v, - Datum::Null => 0, - _ => unreachable!(), - }; - col2 += match chunk.get_datum(1, row_id, &fields[1]) { - Datum::F64(v) => v, - _ => unreachable!(), - }; - } - assert_eq!(col1, 262_144); - assert!(!(523_776.0 - col2).is_normal()); - }); -} diff --git a/tests/benches/misc/coprocessor/codec/mod.rs b/tests/benches/misc/coprocessor/codec/mod.rs index 274ec3623774..082f1c558941 100644 --- a/tests/benches/misc/coprocessor/codec/mod.rs +++ b/tests/benches/misc/coprocessor/codec/mod.rs @@ -1,6 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -mod chunk; mod mysql; use byteorder::{BigEndian, ByteOrder, LittleEndian}; diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index a545d9935e64..d567edd5add9 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -171,7 +171,6 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { } #[bench] -#[allow(clippy::let_underscore_future)] fn bench_async_snapshot(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); @@ -206,7 +205,6 @@ fn bench_async_snapshot(b: &mut test::Bencher) { } #[bench] -#[allow(clippy::let_underscore_future)] fn bench_async_write(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index e164d59f82a8..05c602824c20 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -12,7 +12,7 @@ const DEFAULT_DATA_SIZE: usize = 100_000; fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { let mut wb = db.write_batch(); - for (k, v) in kvs { + for &(ref k, ref v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } wb.write().unwrap(); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index f40f40e6af11..a9dbd36a81a6 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,8 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -#![allow(clippy::arc_with_non_send_sync)] -#![allow(clippy::unnecessary_mut_passed)] -#[allow(clippy::let_underscore_future)] mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 55c06d87b070..217269bb5b85 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -35,7 +35,7 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { // check the region new leader is elected. fn assert_region_leader_changed( - cluster: &Cluster, + cluster: &mut Cluster, region_id: u64, original_leader: u64, ) { @@ -91,7 +91,7 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); - assert_region_leader_changed(&cluster, 1, 1); + assert_region_leader_changed(&mut cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); fail::cfg(get_fp(usage, 1), "return").unwrap(); @@ -199,7 +199,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&cluster, 1, 1); + assert_region_leader_changed(&mut cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -393,7 +393,7 @@ fn test_disk_full_followers_with_hibernate_regions() { // check the region new leader is elected. fn assert_region_merged( - cluster: &Cluster, + cluster: &mut Cluster, left_region_key: &[u8], right_region_key: &[u8], ) { diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 2dd5b6ac04b4..073f72764194 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -57,7 +57,6 @@ fn test_write_buffer_manager() { } } -#[rustfmt::skip] // The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: // note: before rocksdb/pull/347, lock is called before on_memtable_sealed. // Case: diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index d8f73f312b66..d2eb9aa10dde 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -93,7 +93,6 @@ fn test_break_leadership_on_restart() { // received, and become `GroupState::Ordered` after the proposal is received. // But they should keep wakeful for a while. #[test] -#[allow(clippy::let_underscore_future)] fn test_store_disconnect_with_hibernate() { let mut cluster = new_server_cluster(0, 3); let base_tick_ms = 50; diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 08b7474bb8e6..0c16819082bd 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1710,7 +1710,8 @@ fn test_destroy_source_peer_while_merging() { } struct MsgTimeoutFilter { - tx: Sender, + // wrap with mutex to make tx Sync. + tx: Mutex>, } impl Filter for MsgTimeoutFilter { @@ -1718,7 +1719,7 @@ impl Filter for MsgTimeoutFilter { let mut res = Vec::with_capacity(msgs.len()); for m in msgs.drain(..) { if m.get_message().msg_type == MessageType::MsgTimeoutNow { - self.tx.send(m).unwrap(); + self.tx.lock().unwrap().send(m).unwrap(); } else { res.push(m); } @@ -1787,7 +1788,7 @@ fn test_concurrent_between_transfer_leader_and_merge() { // msg by using Filter. So we make node-1-1000 be in leader_transferring status // for some time. let (tx, rx_msg) = channel(); - let filter = MsgTimeoutFilter { tx }; + let filter = MsgTimeoutFilter { tx: Mutex::new(tx) }; cluster.add_send_filter_on_node(1, Box::new(filter)); pd_client.transfer_leader( @@ -1811,13 +1812,15 @@ fn test_concurrent_between_transfer_leader_and_merge() { let router = cluster.get_router(2).unwrap(); let (tx, rx) = channel(); + let tx = Mutex::new(tx); let _ = fail::cfg_callback("propose_commit_merge_1", move || { - tx.send(()).unwrap(); + tx.lock().unwrap().send(()).unwrap(); }); let (tx2, rx2) = channel(); + let tx2 = Mutex::new(tx2); let _ = fail::cfg_callback("on_propose_commit_merge_success", move || { - tx2.send(()).unwrap(); + tx2.lock().unwrap().send(()).unwrap(); }); cluster.merge_region(left.get_id(), right.get_id(), Callback::None); diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 201aafce6fb5..0115d6d7ba53 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -43,7 +43,6 @@ macro_rules! request { } #[test] -#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let pd_client_reconnect_fp = "pd_client_reconnect"; diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index 583dad2ff34d..ac427c29e69d 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -43,7 +43,6 @@ macro_rules! request { } #[test] -#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let client = Arc::new(client); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index 5ab7edb503f3..a795422c120c 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -208,7 +208,7 @@ fn test_leader_transfer() { #[test] fn test_region_merge() { let mut suite = TestSuite::new(3, ApiVersion::V2); - let keys = [b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; + let keys = vec![b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; suite.must_raw_put(keys[1], b"v1"); suite.must_raw_put(keys[3], b"v3"); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index dc6906b668ae..7351044b2979 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -4,16 +4,11 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; use test_raftstore_macro::test_case; -use tikv_util::config::ReadableDuration; -#[test_case(test_raftstore::must_new_cluster_with_cfg_and_kv_client_mul)] -#[test_case(test_raftstore_v2::must_new_cluster_with_cfg_and_kv_client_mul)] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = new_cluster(1, |c| { - // set a small renew duration to avoid trigger pre-renew that can affact the - // metrics. - c.cfg.tikv.raft_store.renew_leader_lease_advance_duration = ReadableDuration::millis(1); - }); + let (_cluster, client, ctx) = new_cluster(); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 10a65271462b..65c50793d7a6 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1426,7 +1426,8 @@ impl Filter for TeeFilter { // 2. the splitted region set has_dirty_data be true in `apply_snapshot` // 3. the splitted region schedule tablet trim task in `on_applied_snapshot` // with tablet index 5 -// 4. the splitted region received a snapshot sent from its leader +// 4. the splitted region received a snapshot sent from its +// leader // 5. after finishing applying this snapshot, the tablet index in storage // changed to 6 // 6. tablet trim complete and callbacked to raftstore diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 4668c24ad661..57047bef9d41 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1620,7 +1620,9 @@ fn test_before_propose_deadline() { assert!( matches!( res, - Err(StorageError(box StorageErrorInner::Kv(KvError(box KvErrorInner::Request(_))))) + Err(StorageError(box StorageErrorInner::Kv(KvError( + box KvErrorInner::Request(_), + )))) ), "actual: {:?}", res diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 4154a764d99e..14f4161c7ae1 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -751,7 +751,7 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { let handle = std::thread::spawn(move || { let mut mutations = vec![]; - for key in [b"key3".to_vec(), b"key4".to_vec()] { + for key in vec![b"key3".to_vec(), b"key4".to_vec()] { let mut mutation = kvproto::kvrpcpb::Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(key); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 02fb8c046c84..75eb62bab990 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -361,8 +361,8 @@ fn test_read_lock_after_become_follower() { /// 1. Inserted 5 entries and make all stores commit and apply them. /// 2. Prevent the store 3 from append following logs. /// 3. Insert another 20 entries. -/// 4. Wait for some time so that part of the entry cache are compacted on the -/// leader(store 1). +/// 4. Wait for some time so that part of the entry cache are compacted +/// on the leader(store 1). macro_rules! run_cluster_for_test_warmup_entry_cache { ($cluster:expr) => { // Let the leader compact the entry cache. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index bd5461e6134f..4cfd4be07be9 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -492,7 +492,6 @@ fn test_backup_raw_meta() { } #[test] -#[allow(clippy::permissions_set_readonly_false)] fn test_invalid_external_storage() { let mut suite = TestSuite::new(1, 144 * 1024 * 1024, ApiVersion::V1); // Put some data. diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index f821ffea2e7e..3d8cf85b02cf 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -67,6 +67,6 @@ fn test_apply_twice() { &tikv, &ctx, CF_DEFAULT, - default_fst.into_iter().chain(default_snd), + default_fst.into_iter().chain(default_snd.into_iter()), ); } diff --git a/tests/integrations/mod.rs b/tests/integrations/mod.rs index 86ceb5369e7a..2b68c0a8ba94 100644 --- a/tests/integrations/mod.rs +++ b/tests/integrations/mod.rs @@ -4,8 +4,6 @@ #![feature(box_patterns)] #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::extra_unused_type_parameters)] extern crate test; diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 30ea12a424b3..056641e1e3f8 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -287,8 +287,8 @@ fn test_flush_before_stop2() { // 1. lock `k` with index 6 // 2. on_applied_res => lockcf's last_modified = 6 // 3. flush lock cf => lockcf's flushed_index = 6 -// 4. batch {unlock `k`, write `k`} with index 7 (last_modified is updated in -// store but RocksDB is modified in apply. So, +// 4. batch {unlock `k`, write `k`} with index 7 +// (last_modified is updated in store but RocksDB is modified in apply. So, // before on_apply_res, the last_modified is not updated.) // // flush-before-close: diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index 56cb65cce87e..fbc7629c73fe 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -5,13 +5,13 @@ use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &Cluster) { +fn flush(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &Cluster, interval: u64, written: bool) { +fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 7701fe167c8b..60f10936f2d0 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -434,7 +434,6 @@ fn test_txn_query_stats_tmpl() { fail::remove("only_check_source_task_name"); } -#[allow(clippy::extra_unused_type_parameters)] fn raw_put( _cluster: &Cluster, client: &TikvClient, From 74f82f651654dba267438782af8756ccb65e7fda Mon Sep 17 00:00:00 2001 From: SeaRise Date: Tue, 26 Sep 2023 16:03:16 +0800 Subject: [PATCH 070/203] expr: fix wrong result of 0 / decimal and 0 % decimal (#15675) close tikv/tikv#15631 Signed-off-by: SeaRise --- .../src/codec/mysql/decimal.rs | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 143ec6c77608..bc18d7192f9e 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -590,17 +590,24 @@ fn do_div_mod_impl( rhs: &Decimal, mut frac_incr: u8, do_mod: bool, + result_frac_cnt: Option, ) -> Option> { let r_frac_cnt = word_cnt!(rhs.frac_cnt) * DIGITS_PER_WORD; let (r_idx, r_prec) = rhs.remove_leading_zeroes(rhs.int_cnt + r_frac_cnt); if r_prec == 0 { + // short-circuit everything: rhs == 0 return None; } let l_frac_cnt = word_cnt!(lhs.frac_cnt) * DIGITS_PER_WORD; let (l_idx, l_prec) = lhs.remove_leading_zeroes(lhs.int_cnt + l_frac_cnt); if l_prec == 0 { - return Some(Res::Ok(Decimal::zero())); + // short-circuit everything: lhs == 0 + if let Some(result_frac) = result_frac_cnt { + return Some(Res::Ok(Decimal::new(0, result_frac, false))); + } else { + return Some(Res::Ok(Decimal::zero())); + } } frac_incr = frac_incr.saturating_sub(l_frac_cnt - lhs.frac_cnt + r_frac_cnt - rhs.frac_cnt); @@ -784,8 +791,9 @@ fn do_div_mod_impl( Some(res) } +#[allow(dead_code)] fn do_div_mod(lhs: &Decimal, rhs: &Decimal, frac_incr: u8, do_mod: bool) -> Option> { - do_div_mod_impl(lhs, rhs, frac_incr, do_mod) + do_div_mod_impl(lhs, rhs, frac_incr, do_mod, None) } /// `do_mul` multiplies two decimals. @@ -1704,7 +1712,7 @@ impl Decimal { fn div(&self, rhs: &Decimal, frac_incr: u8) -> Option> { let result_frac_cnt = cmp::min(self.result_frac_cnt.saturating_add(frac_incr), MAX_FRACTION); - let mut res = do_div_mod(self, rhs, frac_incr, false); + let mut res = do_div_mod_impl(self, rhs, frac_incr, false, Some(result_frac_cnt)); if let Some(ref mut dec) = res { dec.result_frac_cnt = result_frac_cnt; } @@ -2362,7 +2370,7 @@ impl<'a, 'b> Rem<&'a Decimal> for &'b Decimal { type Output = Option>; fn rem(self, rhs: &'a Decimal) -> Self::Output { let result_frac_cnt = cmp::max(self.result_frac_cnt, rhs.result_frac_cnt); - let mut res = do_div_mod_impl(self, rhs, 0, true); + let mut res = do_div_mod_impl(self, rhs, 0, true, Some(result_frac_cnt)); if let Some(ref mut dec) = res { dec.result_frac_cnt = result_frac_cnt; } @@ -3545,17 +3553,28 @@ mod tests { assert_eq!(res, rem_exp.map(|s| s.to_owned())); } - let div_cases = vec![( - "-43791957044243810000000000000000000000000000000000000000000000000000000000000", - "-0.0000000000000000000000000000000000000000000000000012867433602814482", - Res::Overflow( - "34033171179267041433424155279291553259014210153022524070386565694757521640", + let div_cases = vec![ + ( + "-43791957044243810000000000000000000000000000000000000000000000000000000000000", + "-0.0000000000000000000000000000000000000000000000000012867433602814482", + Res::Overflow( + "34033171179267041433424155279291553259014210153022524070386565694757521640", + ), ), - )]; - for (lhs_str, rhs_str, rem_exp) in div_cases { + ("0", "0.5", Res::Ok("0.0000")), + ]; + for (lhs_str, rhs_str, div_exp) in div_cases { let lhs: Decimal = lhs_str.parse().unwrap(); let rhs: Decimal = rhs_str.parse().unwrap(); let res = (&lhs / &rhs).unwrap().map(|d| d.to_string()); + assert_eq!(res, div_exp.map(|s| s.to_owned())) + } + + let rem_cases = vec![("0", "0.5", Res::Ok("0.0"))]; + for (lhs_str, rhs_str, rem_exp) in rem_cases { + let lhs: Decimal = lhs_str.parse().unwrap(); + let rhs: Decimal = rhs_str.parse().unwrap(); + let res = (lhs % rhs).unwrap().map(|d| d.to_string()); assert_eq!(res, rem_exp.map(|s| s.to_owned())) } } From 977888de9b218abd56928ab51e0f78a5b13c9063 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 26 Sep 2023 16:30:16 +0800 Subject: [PATCH 071/203] raftstore-v2: fix "failed to get merge entries" panic (#15649) close tikv/tikv#15633 fix "failed to get merge entries" panic Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/fsm/peer.rs | 1 + .../operation/command/admin/compact_log.rs | 2 + .../operation/command/admin/merge/commit.rs | 10 +- .../operation/command/admin/merge/prepare.rs | 2 + .../operation/command/admin/merge/rollback.rs | 18 +- .../raftstore-v2/src/operation/query/mod.rs | 1 + .../src/operation/ready/apply_trace.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 11 +- components/raftstore/src/store/fsm/peer.rs | 1 + components/test_raftstore-v2/src/util.rs | 38 +++- components/test_raftstore/src/cluster.rs | 5 +- tests/failpoints/cases/test_merge.rs | 179 +++++++++++++++--- .../integrations/raftstore/test_bootstrap.rs | 6 +- 13 files changed, 224 insertions(+), 52 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index d51d8eedb2a6..872b2c4e7e63 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -196,6 +196,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.schedule_tick(PeerTick::SplitRegionCheck); self.schedule_tick(PeerTick::PdHeartbeat); self.schedule_tick(PeerTick::CompactLog); + self.fsm.peer.on_check_merge(self.store_ctx); if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index d054234b46fd..1c4538ab51ea 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -145,6 +145,8 @@ impl Peer { store_ctx: &mut StoreContext, force: bool, ) { + fail::fail_point!("maybe_propose_compact_log", |_| {}); + // As leader, we would not keep caches for the peers that didn't response // heartbeat in the last few seconds. That happens probably because // another TiKV is down. In this case if we do not clean up the cache, diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 8e55f89a7d21..bec0265ffc3b 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -172,6 +172,7 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, ) { + fail::fail_point!("on_schedule_merge", |_| {}); fail::fail_point!( "ask_target_peer_to_commit_merge_2", self.region_id() == 2, @@ -198,7 +199,7 @@ impl Peer { Ok(ents) => ents, Err(e) => slog_panic!( self.logger, - "failed to get merge entires"; + "failed to get merge entries"; "err" => ?e, "low" => low, "commit" => state.get_commit() @@ -261,6 +262,7 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) { + fail::fail_point!("on_ask_commit_merge", |_| {}); let expected_epoch = req.get_header().get_region_epoch(); let merge = req.get_admin_request().get_commit_merge(); assert!(merge.has_source_state() && merge.get_source_state().has_merge_state()); @@ -736,6 +738,12 @@ impl Peer { store_ctx: &mut StoreContext, mut res: CommitMergeResult, ) { + fail::fail_point!( + "on_apply_res_commit_merge_2", + self.peer().store_id == 2, + |_| {} + ); + let region = res.region_state.get_region(); assert!( res.source.get_end_key() == region.get_end_key() diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index d3d1896287c4..6ff982eea8cb 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -812,6 +812,8 @@ impl Peer { store_ctx: &mut StoreContext, res: PrepareMergeResult, ) { + fail::fail_point!("on_apply_res_prepare_merge"); + let region = res.region_state.get_region().clone(); { let mut meta = store_ctx.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index d931a295f4d6..adc49a928b36 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -4,9 +4,8 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ - metapb, raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse}, - raft_serverpb::PeerState, + raft_serverpb::{PeerState, RegionLocalState}, }; use raftstore::{ coprocessor::RegionChangeReason, @@ -28,7 +27,7 @@ use crate::{ #[derive(Debug)] pub struct RollbackMergeResult { commit: u64, - region: metapb::Region, + region_state: RegionLocalState, } impl Peer { @@ -118,7 +117,7 @@ impl Apply { AdminResponse::default(), AdminCmdResult::RollbackMerge(RollbackMergeResult { commit: rollback.get_commit(), - region, + region_state: self.region_state().clone(), }), )) } @@ -131,6 +130,7 @@ impl Peer { store_ctx: &mut StoreContext, res: RollbackMergeResult, ) { + let region = res.region_state.get_region(); assert_ne!(res.commit, 0); let current = self.merge_context().and_then(|c| c.prepare_merge_index()); if current != Some(res.commit) { @@ -143,21 +143,21 @@ impl Peer { } { let mut meta = store_ctx.store_meta.lock().unwrap(); - meta.set_region(&res.region, true, &self.logger); - let (reader, _) = meta.readers.get_mut(&res.region.get_id()).unwrap(); + meta.set_region(region, true, &self.logger); + let (reader, _) = meta.readers.get_mut(®ion.get_id()).unwrap(); self.set_region( &store_ctx.coprocessor_host, reader, - res.region.clone(), + region.clone(), RegionChangeReason::RollbackMerge, self.storage().region_state().get_tablet_index(), ); } - let region_state = self.storage().region_state().clone(); let region_id = self.region_id(); self.state_changes_mut() - .put_region_state(region_id, res.commit, ®ion_state) + .put_region_state(region_id, res.commit, &res.region_state) .unwrap(); + self.storage_mut().set_region_state(res.region_state); self.set_has_extra_write(); self.rollback_merge(store_ctx); diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 2f1b1cd01389..10f6e3279c3e 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -471,6 +471,7 @@ impl Peer { // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { if applied_term == self.term() { + fail::fail_point!("on_applied_current_term"); ctx.coprocessor_host .on_applied_current_term(StateRole::Leader, self.region()); } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index af0257e763f1..e839089837d8 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -718,7 +718,7 @@ impl Peer { ); let region_id = self.region_id(); let flush_threshold: u64 = (|| { - fail_point!("flush_before_cluse_threshold", |t| { + fail_point!("flush_before_close_threshold", |t| { t.unwrap().parse::().unwrap() }); 50 diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 87d41de776c1..4ff47c4b4bbc 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -231,6 +231,14 @@ impl Peer { unsafe_recovery_state: None, }; + // If merge_context is not None, it means the PrepareMerge is applied before + // restart. So we have to neter prepare merge again to prevent all proposals + // except for RollbackMerge. + if let Some(ref state) = peer.merge_context { + peer.proposal_control + .enter_prepare_merge(state.prepare_merge_index().unwrap()); + } + // If this region has only one peer and I am the one, campaign directly. let region = peer.region(); if region.get_peers().len() == 1 @@ -265,9 +273,6 @@ impl Peer { } /// Set the region of a peer. - /// - /// This will update the region of the peer, caller must ensure the region - /// has been preserved in a durable device. pub fn set_region( &mut self, host: &CoprocessorHost, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 30ba0c3059d4..513e9c0636a3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4602,6 +4602,7 @@ where } fn on_ready_prepare_merge(&mut self, region: metapb::Region, state: MergeState) { + fail_point!("on_apply_res_prepare_merge"); { let mut meta = self.ctx.store_meta.lock().unwrap(); meta.set_region( diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 805394b1ea0f..d83dff12e9a4 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -1,6 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Write, path::Path, sync::Arc, thread, time::Duration}; +use std::{ + fmt::Write, + path::Path, + sync::Arc, + thread, + time::{Duration, Instant}, +}; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{RocksEngine, RocksStatistics}; @@ -18,7 +24,7 @@ use raftstore::{store::ReadResponse, Result}; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; -use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, Config}; +use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, sleep_ms, Config}; use tikv::{ server::KvEngineFactoryBuilder, storage::{ @@ -27,7 +33,8 @@ use tikv::{ }, }; use tikv_util::{ - config::ReadableDuration, escape, future::block_on_timeout, worker::LazyWorker, HandyRwLock, + config::ReadableDuration, escape, future::block_on_timeout, time::InstantExt, + worker::LazyWorker, HandyRwLock, }; use txn_types::Key; @@ -447,3 +454,28 @@ pub fn wait_down_peers, EK: KvEngine>( peers, count, peer ); } + +pub fn wait_region_epoch_change, EK: KvEngine>( + cluster: &Cluster, + waited_region: &metapb::Region, + timeout: Duration, +) { + let timer = Instant::now(); + loop { + if waited_region.get_region_epoch().get_version() + == cluster + .get_region_epoch(waited_region.get_id()) + .get_version() + { + if timer.saturating_elapsed() > timeout { + panic!( + "region {:?}, region epoch is still not changed.", + waited_region + ); + } + } else { + break; + } + sleep_ms(10); + } +} diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 26fa2a47d5f5..2a4082893e77 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -4,7 +4,10 @@ use std::{ collections::hash_map::Entry as MapEntry, error::Error as StdError, result, - sync::{mpsc, Arc, Mutex, RwLock}, + sync::{ + mpsc::{self}, + Arc, Mutex, RwLock, + }, thread, time::Duration, }; diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 0c16819082bd..861e4a658cea 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -3,7 +3,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, - mpsc::{channel, Sender}, + mpsc::{channel, sync_channel, Sender}, *, }, thread, @@ -22,14 +22,16 @@ use raft::eraftpb::MessageType; use raftstore::store::*; use raftstore_v2::router::PeerMsg; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is rollback as expected. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_rollback() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -53,8 +55,16 @@ fn test_node_merge_rollback() { let schedule_merge_fp = "on_schedule_merge"; fail::cfg(schedule_merge_fp, "return()").unwrap(); - // The call is finished when prepare_merge is applied. - cluster.must_try_merge(region.get_id(), target_region.get_id()); + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_prepare_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + cluster.merge_region(region.get_id(), target_region.get_id(), Callback::None); + // PrepareMerge is applied. + rx.recv().unwrap(); // Add a peer to trigger rollback. pd_client.must_add_peer(right.get_id(), new_peer(3, 5)); @@ -74,12 +84,7 @@ fn test_node_merge_rollback() { region.mut_region_epoch().set_version(4); for i in 1..3 { must_get_equal(&cluster.get_engine(i), b"k11", b"v11"); - let state_key = keys::region_state_key(region.get_id()); - let state: RegionLocalState = cluster - .get_engine(i) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state = cluster.region_local_state(region.get_id(), i); assert_eq!(state.get_state(), PeerState::Normal); assert_eq!(*state.get_region(), region); } @@ -88,7 +93,10 @@ fn test_node_merge_rollback() { fail::cfg(schedule_merge_fp, "return()").unwrap(); let target_region = pd_client.get_region(b"k3").unwrap(); - cluster.must_try_merge(region.get_id(), target_region.get_id()); + cluster.merge_region(region.get_id(), target_region.get_id(), Callback::None); + // PrepareMerge is applied. + rx.recv().unwrap(); + let mut region = pd_client.get_region(b"k1").unwrap(); // Split to trigger rollback. @@ -103,12 +111,7 @@ fn test_node_merge_rollback() { region.mut_region_epoch().set_version(6); for i in 1..3 { must_get_equal(&cluster.get_engine(i), b"k12", b"v12"); - let state_key = keys::region_state_key(region.get_id()); - let state: RegionLocalState = cluster - .get_engine(i) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state = cluster.region_local_state(region.get_id(), i); assert_eq!(state.get_state(), PeerState::Normal); assert_eq!(*state.get_region(), region); } @@ -1835,19 +1838,7 @@ fn test_concurrent_between_transfer_leader_and_merge() { rx2.recv().unwrap(); fail::remove("on_reject_commit_merge_1"); - let timer = Instant::now(); - loop { - if right.get_region_epoch().get_version() - == cluster.get_region_epoch(right.get_id()).get_version() - { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("region {:?} is still not merged.", right); - } - } else { - break; - } - sleep_ms(10); - } + wait_region_epoch_change(&cluster, &right, Duration::from_secs(5)); let region = pd_client.get_region(b"k1").unwrap(); assert_eq!(region.get_id(), right.get_id()); @@ -1856,3 +1847,129 @@ fn test_concurrent_between_transfer_leader_and_merge() { cluster.must_put(b"k4", b"v4"); } + +struct MsgVoteFilter {} + +impl Filter for MsgVoteFilter { + fn before(&self, msgs: &mut Vec) -> raftstore::Result<()> { + msgs.retain(|m| { + let msg_type = m.get_message().msg_type; + msg_type != MessageType::MsgRequestPreVote && msg_type != MessageType::MsgRequestVote + }); + check_messages(msgs) + } +} + +// Before the fix of this PR (#15649), after prepare merge, raft cmd can still +// be proposed if restart is involved. If the proposed raft cmd is CompactLog, +// panic can occur during fetch entries: see issue https://github.com/tikv/tikv/issues/15633. +// Consider the case: +// 1. node-1 apply PrepareMerge (assume log index 30), so it's in is_merging +// status which reject all proposals except for Rollback Merge +// 2. node-1 advance persisted_apply to 30 +// 3. node-1 restart and became leader. Now, it's not in is_merging status, so +// proposals can be proposed +// 4. node-1 propose CompactLog, replicate it to other nodes, and commit +// 5. node-0 apply PrepareMerge +// 6. node-0 apply CompactLog +// 6. node-0 fetches raft log entries which is required by +// AdminCmdType::CommitMerge and panic (due to compacted) +#[test] +fn test_restart_may_lose_merging_state() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 2); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(10); + + cluster.run(); + fail::cfg("maybe_propose_compact_log", "return").unwrap(); + fail::cfg("on_ask_commit_merge", "return").unwrap(); + fail::cfg("flush_before_close_threshold", "return(0)").unwrap(); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_prepare_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k20"); + + let source = cluster.get_region(b"k05"); + let target = cluster.get_region(b"k25"); + + cluster.add_send_filter_on_node(2, Box::new(MsgVoteFilter {})); + + cluster.must_transfer_leader( + source.id, + source + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + cluster.must_transfer_leader( + target.id, + target + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + for i in 0..20 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + cluster.merge_region(source.id, target.id, Callback::None); + + rx.recv().unwrap(); + let router = cluster.get_router(1).unwrap(); + let (tx, rx) = sync_channel(1); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(source.id, msg).unwrap(); + rx.recv().unwrap(); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_commit_merge_2", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + cluster.stop_node(1); + // Need to avoid propose commit merge, before node 1 becomes leader. Otherwise, + // the commit merge will be rejected. + let (tx2, rx2) = channel(); + let tx2 = Mutex::new(tx2); + fail::cfg_callback("on_applied_current_term", move || { + tx2.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + fail::remove("maybe_propose_compact_log"); + cluster.run_node(1).unwrap(); + + // we have two regions. + rx2.recv().unwrap(); + rx2.recv().unwrap(); + fail::remove("on_ask_commit_merge"); + // wait node 2 to apply commit merge + rx.recv_timeout(Duration::from_secs(10)).unwrap(); + + wait_region_epoch_change(&cluster, &target, Duration::from_secs(5)); + + let region = cluster.get_region(b"k1"); + assert_eq!(region.get_id(), target.get_id()); + assert_eq!(region.get_start_key(), source.get_start_key()); + assert_eq!(region.get_end_key(), target.get_end_key()); + + cluster.must_put(b"k400", b"v400"); +} diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 056641e1e3f8..bca389b26e67 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -216,7 +216,7 @@ fn test_flush_before_stop() { let region = cluster.get_region(b"k60"); cluster.must_split(®ion, b"k070"); - fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(10)").unwrap(); for i in 0..100 { let key = format!("k{:03}", i); @@ -260,7 +260,7 @@ fn test_flush_before_stop2() { let mut cluster = new_server_cluster(0, 3); cluster.run(); - fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(10)").unwrap(); fail::cfg("on_flush_completed", "return").unwrap(); for i in 0..20 { @@ -331,7 +331,7 @@ fn test_flush_index_exceed_last_modified() { ) .unwrap(); - fail::cfg("flush_before_cluse_threshold", "return(1)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(1)").unwrap(); let router = cluster.get_router(1).unwrap(); let (tx, rx) = sync_channel(1); let msg = PeerMsg::FlushBeforeClose { tx }; From 9307f7ccfdf11c1047f833f888cbd77487b1c707 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 26 Sep 2023 18:06:17 +0800 Subject: [PATCH 072/203] raftstore-v2: fix MergedRecords not being cleaned up (#15650) close tikv/tikv#15644 MergedRecords were not being properly cleaned up, causing unnecessary bloating of RegionLocalState and continuous sending of GcPeerRequest by raftstore. This commit addresses the issue by enhancing the handling of GcPeerRequests, ensuring that target region followers forward GcPeerRequests to the source peer. The source peer or store then reports GcPeerResponse accordingly. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- .../operation/command/admin/conf_change.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 22 ++- components/test_raftstore-v2/src/cluster.rs | 44 +++++ tests/integrations/raftstore/test_life.rs | 19 +- tests/integrations/raftstore/test_merge.rs | 170 +++++++++++++++--- 5 files changed, 214 insertions(+), 45 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 77ef6c823c14..55cee490e525 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -604,8 +604,8 @@ impl Apply { "update gc peer"; "index" => log_index, "updates" => ?updates, - "gc_peers" => ?removed_records, - "merged_peers" => ?merged_records + "removed_records" => ?removed_records, + "merged_records" => ?merged_records ); removed_records.retain(|p| !updates.contains(&p.get_id())); merged_records.retain_mut(|r| { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 4d1a59de0a6f..8591d5daf23c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -681,6 +681,10 @@ impl Peer { let _ = router.send_raft_message(m.into()); }, ); + } else { + // Source peer is already destroyed. Forward to store, and let + // it report GcPeer response. + let _ = ctx.router.send_raft_message(m.into()); } }); } @@ -748,15 +752,23 @@ impl Peer { } // 2. ask target to check whether source should be deleted. for record in state.get_merged_records() { - for (source, target) in record - .get_source_peers() - .iter() - .zip(record.get_target_peers()) - { + for source in record.get_source_peers() { need_gc_ids.push(source.get_id()); if gc_context.confirmed_ids.contains(&source.get_id()) { continue; } + let Some(target) = record + .get_target_peers() + .iter() + .find(|p| p.get_store_id() == source.get_store_id()) + else { + panic!( + "[region {}] {} target peer not found, {:?}", + self.region_id(), + self.peer_id(), + state + ); + }; let mut msg = RaftMessage::default(); msg.set_region_id(record.get_target_region_id()); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 8ede32901671..9d61918bd1f0 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1689,6 +1689,50 @@ impl, EK: KvEngine> Cluster { } } + pub fn must_empty_region_removed_records(&mut self, region_id: u64) { + let timer = Instant::now(); + loop { + thread::sleep(Duration::from_millis(100)); + + let leader = match self.leader_of_region(region_id) { + None => continue, + Some(l) => l, + }; + let region_state = self.region_local_state(region_id, leader.get_store_id()); + if region_state.get_removed_records().is_empty() { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "merged records and removed records must be empty, {:?}", + region_state + ); + } + } + } + + pub fn must_empty_region_merged_records(&mut self, region_id: u64) { + let timer = Instant::now(); + loop { + thread::sleep(Duration::from_millis(100)); + + let leader = match self.leader_of_region(region_id) { + None => continue, + Some(l) => l, + }; + let region_state = self.region_local_state(region_id, leader.get_store_id()); + if region_state.get_merged_records().is_empty() { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "merged records and removed records must be empty, {:?}", + region_state + ); + } + } + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index f3b5704a586f..809904c7f468 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -7,9 +7,7 @@ use std::{ use kvproto::raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}; use raftstore::errors::Result; -use test_raftstore::{ - new_learner_peer, new_peer, sleep_ms, Filter, FilterFactory, Simulator as S1, -}; +use test_raftstore::{new_learner_peer, new_peer, Filter, FilterFactory, Simulator as S1}; use test_raftstore_v2::Simulator as S2; use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; @@ -125,20 +123,7 @@ fn test_gc_peer_tiflash_engine() { .must_remove_peer(r21, new_learner_peer(2, 10)); // Make sure leader cleans up removed_records. - let start = Instant::now(); - loop { - sleep_ms(500); - if cluster_v2 - .region_local_state(r21, 1) - .get_removed_records() - .is_empty() - { - break; - } - if start.saturating_elapsed() > Duration::from_secs(5) { - panic!("timeout"); - } - } + cluster_v2.must_empty_region_removed_records(r21); } #[test] diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 0b17ff72ae72..080724b15a72 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -6,7 +6,7 @@ use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::{CF_LOCK, CF_WRITE}; use kvproto::{ raft_cmdpb::CmdType, - raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, + raft_serverpb::{ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, }; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; @@ -1733,7 +1733,7 @@ fn test_prepare_merge_with_5_nodes_snapshot() { } #[test_case(test_raftstore_v2::new_node_cluster)] -fn test_gc_peer_after_merge() { +fn test_gc_source_removed_records_after_merge() { let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); @@ -1792,23 +1792,151 @@ fn test_gc_peer_after_merge() { // Right region replica on store 3 must be removed. cluster.must_region_not_exist(right.get_id(), 3); - let start = Instant::now(); - loop { - sleep_ms(cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); - let region_state = cluster.region_local_state(left.get_id(), 1); - if (region_state.get_merged_records().is_empty() - || region_state.get_merged_records()[0] - .get_source_removed_records() - .is_empty()) - && region_state.get_removed_records().is_empty() - { - break; - } - if start.elapsed() > Duration::from_secs(5) { - panic!( - "source removed records and removed records must be empty, {:?}", - region_state - ); - } - } + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(left.get_id()); + cluster.must_empty_region_removed_records(left.get_id()); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_source_peers_forward_by_target_peer_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.raft_log_gc_threshold = 40; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(40); + cluster.cfg.raft_store.merge_max_log_gap = 15; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Use DropMessageFilter to drop messages to store 3 without reporting error. + cluster.add_recv_filter_on_node( + 3, + Box::new(DropMessageFilter::new(Arc::new(|m| { + // Do not drop MsgAvailabilityRequest and MsgAvailabilityResponse + // messages, otherwise merge is blocked. + matches!( + m.get_extra_msg().get_type(), + ExtraMessageType::MsgAvailabilityRequest + | ExtraMessageType::MsgAvailabilityResponse + ) + }))), + ); + + // So cluster becomes + // left region: 1(leader) 2 | 3 + // right region: 1(leader) 2 | 3 + // | means isolation. + + // Merge left to right and remove left peer on store 3. + pd_client.must_merge(left.get_id(), right.get_id()); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3); + let region_state = cluster.region_local_state(right.get_id(), 1); + assert!( + !region_state.get_merged_records().is_empty(), + "{:?}", + region_state + ); + + // So cluster becomes + // left region: merged + // right region: 1(leader) 2 | 3 (removed but not yet destroyed) + // | means isolation. + + let state1 = cluster.truncated_state(right.get_id(), 1); + (0..50).for_each(|i| cluster.must_put(b"k2", format!("v{}", i).as_bytes())); + // Wait to trigger compact raft log + cluster.wait_log_truncated(right.get_id(), 1, state1.get_index() + 1); + + // Cluster filters and wait for gc peer ticks. + cluster.clear_recv_filter_on_node(3); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Left region replica on store 3 must be removed. + cluster.must_region_not_exist(left.get_id(), 3); + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(right.get_id()); + cluster.must_empty_region_removed_records(right.get_id()); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_source_peers_forward_by_store_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Drop GcPeerResponse. + cluster.add_recv_filter_on_node( + 1, + Box::new(DropMessageFilter::new(Arc::new(|m| { + m.get_extra_msg().get_type() != ExtraMessageType::MsgGcPeerResponse + }))), + ); + + // So cluster becomes + // left region: 1(leader) 2 | 3 + // right region: 1(leader) 2 | 3 + // | means isolation. + + // Merge left to right and remove left peer on store 3. + pd_client.must_merge(left.get_id(), right.get_id()); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3); + // Right region replica on store 3 must be removed. + cluster.must_region_not_exist(right.get_id(), 3); + let region_state = cluster.region_local_state(right.get_id(), 1); + assert!( + !region_state.get_merged_records().is_empty(), + "{:?}", + region_state + ); + assert!( + !region_state.get_removed_records().is_empty(), + "{:?}", + region_state + ); + + // So cluster becomes + // left region: merged + // right region: 1(leader) 2 | 3 (destroyed but not yet cleaned in removed + // records) + // | means isolation. + + // Cluster filters and wait for gc peer ticks. + cluster.clear_recv_filter_on_node(1); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(right.get_id()); + cluster.must_empty_region_removed_records(right.get_id()); } From df263d287dbdc8397030a3437ee97c918c43abb4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 27 Sep 2023 15:34:47 +0800 Subject: [PATCH 073/203] raftstore-v2: check gc peer after commit merge (#15693) close tikv/tikv#15672 This commit addresses the issue of orphan peers remaining in TiKV due to the absence of GcPeer tick registration after commit merge. The lack of regular checks on removed_records and merged_records can lead to delays in detecting and resolving these issues. To improve this, we have implemented a solution that ensures TiKV registers the GcPeer tick after commit merge. This change enables regular checks on the removed_records and merged_records, preventing them from being overlooked for an extended period. Signed-off-by: Neil Shen --- .../operation/command/admin/merge/commit.rs | 1 + tests/integrations/raftstore/test_merge.rs | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index bec0265ffc3b..e95a13600fbc 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -829,6 +829,7 @@ impl Peer { "target_region" => ?self.region(), ); self.add_pending_tick(PeerTick::SplitRegionCheck); + self.maybe_schedule_gc_peer_tick(); } } diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 080724b15a72..8d93d2c5a5c2 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1940,3 +1940,34 @@ fn test_gc_source_peers_forward_by_store_after_merge() { cluster.must_empty_region_merged_records(right.get_id()); cluster.must_empty_region_removed_records(right.get_id()); } + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_merged_record_in_time() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(100); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + + // Wait enough time to trigger gc peer, and if there is nothing to gc, + // leader skips registering gc peer tick. + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Merge left to right. + pd_client.must_merge(left.get_id(), right.get_id()); + + // Once merge complete, gc peer tick should be registered and merged record + // will be cleaned up in time. + cluster.must_empty_region_merged_records(right.get_id()); +} From 73bc4012f0ea5c49870639ccf353d1de5382025f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 27 Sep 2023 17:45:16 +0800 Subject: [PATCH 074/203] sst_importer: impl SuspendImport interface (#15612) close tikv/tikv#15611 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/error_code/src/sst_importer.rs | 5 +- components/raftstore/src/store/util.rs | 2 +- components/sst_importer/src/errors.rs | 16 +++ src/import/sst_service.rs | 99 ++++++++++++++++++- tests/integrations/import/test_sst_service.rs | 94 ++++++++++++++++++ 6 files changed, 212 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 124a87f069eb..b3842f92752c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2933,7 +2933,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#090f247be15c00a6000a4d23669ac3e95ea9fcd5" +source = "git+https://github.com/pingcap/kvproto.git#87bebcc0d071a18cbbd94a4fc02de9c4988af815" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 001f4f146f6e..117400e8aff4 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -22,5 +22,8 @@ define_error_codes!( TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), - RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", "") + RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), + SUSPENDED => ("Suspended", + "this request has been suspended.", + "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them.") ); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 3f34fe691ee0..519d486102c4 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -306,7 +306,7 @@ pub fn compare_region_epoch( // tells TiDB with a epoch not match error contains the latest target Region // info, TiDB updates its region cache and sends requests to TiKV B, // and TiKV B has not applied commit merge yet, since the region epoch in - // request is higher than TiKV B, the request must be denied due to epoch + // request is higher than TiKV B, the request must be suspended due to epoch // not match, so it does not read on a stale snapshot, thus avoid the // KeyNotInRegion error. let current_epoch = region.get_region_epoch(); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index 7ff940fff12a..acca7523427d 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -2,6 +2,7 @@ use std::{ error::Error as StdError, io::Error as IoError, num::ParseIntError, path::PathBuf, result, + time::Duration, }; use encryption::Error as EncryptionError; @@ -31,6 +32,7 @@ pub fn error_inc(type_: &str, err: &Error) { Error::BadFormat(..) => "bad_format", Error::Encryption(..) => "encryption", Error::CodecError(..) => "codec", + Error::Suspended { .. } => "suspended", _ => return, }; IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); @@ -125,6 +127,9 @@ pub enum Error { #[error("resource is not enough {0}")] ResourceNotEnough(String), + + #[error("imports are suspended for {time_to_lease_expire:?}")] + Suspended { time_to_lease_expire: Duration }, } impl Error { @@ -160,6 +165,16 @@ impl From for import_sstpb::Error { err.set_store_error(import_err); err.set_message(format!("{}", e)); } + Error::Suspended { + time_to_lease_expire, + } => { + let mut store_err = errorpb::Error::default(); + let mut server_is_busy = errorpb::ServerIsBusy::default(); + server_is_busy.set_backoff_ms(time_to_lease_expire.as_millis() as _); + store_err.set_server_is_busy(server_is_busy); + err.set_store_error(store_err); + err.set_message(format!("{}", e)); + } _ => { err.set_message(format!("{}", e)); } @@ -197,6 +212,7 @@ impl ErrorCodeExt for Error { Error::IncompatibleApiVersion => error_code::sst_importer::INCOMPATIBLE_API_VERSION, Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, + Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, } } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6d40ffe959c8..68403e226f8b 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -5,7 +5,10 @@ use std::{ convert::identity, future::Future, path::PathBuf, - sync::{Arc, Mutex}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, time::Duration, }; @@ -20,7 +23,8 @@ use kvproto::{ errorpb, import_sstpb::{ Error as ImportPbError, ImportSst, Range, RawWriteRequest_oneof_chunk as RawChunk, SstMeta, - SwitchMode, WriteRequest_oneof_chunk as Chunk, *, + SuspendImportRpcRequest, SuspendImportRpcResponse, SwitchMode, + WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, }; @@ -41,7 +45,7 @@ use tikv_util::{ HandyRwLock, }; use tokio::{runtime::Runtime, time::sleep}; -use txn_types::{Key, WriteRef, WriteType}; +use txn_types::{Key, TimeStamp, WriteRef, WriteType}; use super::{ make_rpc_error, @@ -49,6 +53,7 @@ use super::{ }; use crate::{ import::duplicate_detect::DuplicateDetector, + send_rpc_response, server::CONFIG_ROCKSDB_GAUGE, storage::{self, errors::extract_region_error_from_error}, }; @@ -80,6 +85,10 @@ const WIRE_EXTRA_BYTES: usize = 12; /// [`raft_writer::ThrottledTlsEngineWriter`]. There aren't too many items held /// in the writer. So we can run the GC less frequently. const WRITER_GC_INTERVAL: Duration = Duration::from_secs(300); +/// The max time of suspending requests. +/// This may save us from some client sending insane value to the server. +const SUSPEND_REQUEST_MAX_SECS: u64 = // 6h + 6 * 60 * 60; fn transfer_error(err: storage::Error) -> ImportPbError { let mut e = ImportPbError::default(); @@ -121,6 +130,9 @@ pub struct ImportSstService { // it's some iff multi-rocksdb is enabled store_meta: Option>>>, resource_manager: Option>, + + // When less than now, don't accept any requests. + suspend_req_until: Arc, } struct RequestCollector { @@ -356,6 +368,7 @@ impl ImportSstService { writer, store_meta, resource_manager, + suspend_req_until: Arc::new(AtomicU64::new(0)), } } @@ -619,6 +632,47 @@ impl ImportSstService { Ok(range) } + + /// Check whether we should suspend the current request. + fn check_suspend(&self) -> Result<()> { + let now = TimeStamp::physical_now(); + let suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + if now < suspend_until { + Err(Error::Suspended { + time_to_lease_expire: Duration::from_millis(suspend_until - now), + }) + } else { + Ok(()) + } + } + + /// suspend requests for a period. + /// + /// # returns + /// + /// whether for now, the requests has already been suspended. + pub fn suspend_requests(&self, for_time: Duration) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + let suspend_until = TimeStamp::physical_now() + for_time.as_millis() as u64; + self.suspend_req_until + .store(suspend_until, Ordering::SeqCst); + suspended + } + + /// allow all requests to enter. + /// + /// # returns + /// + /// whether requests has already been previously suspended. + pub fn allow_requests(&self) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + self.suspend_req_until.store(0, Ordering::SeqCst); + suspended + } } #[macro_export] @@ -993,6 +1047,10 @@ impl ImportSst for ImportSstService { ) { let label = "ingest"; let timer = Instant::now_coarse(); + if let Err(err) = self.check_suspend() { + ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + return; + } let mut resp = IngestResponse::default(); let region_id = req.get_context().get_region_id(); @@ -1036,6 +1094,10 @@ impl ImportSst for ImportSstService { ) { let label = "multi-ingest"; let timer = Instant::now_coarse(); + if let Err(err) = self.check_suspend() { + ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + return; + } let mut resp = IngestResponse::default(); if let Some(errorpb) = self.check_write_stall(req.get_context().get_region_id()) { @@ -1240,6 +1302,37 @@ impl ImportSst for ImportSstService { RawChunk, new_raw_writer ); + + fn suspend_import_rpc( + &mut self, + ctx: RpcContext<'_>, + req: SuspendImportRpcRequest, + sink: UnarySink, + ) { + let label = "suspend_import_rpc"; + let timer = Instant::now_coarse(); + + if req.should_suspend_imports && req.get_duration_in_secs() > SUSPEND_REQUEST_MAX_SECS { + ctx.spawn(async move { + send_rpc_response!(Err(Error::Io( + std::io::Error::new(std::io::ErrorKind::InvalidInput, + format!("you are going to suspend the import RPCs too long. (for {} seconds, max acceptable duration is {} seconds)", + req.get_duration_in_secs(), SUSPEND_REQUEST_MAX_SECS)))), sink, label, timer); + }); + return; + } + + let suspended = if req.should_suspend_imports { + info!("suspend incoming import RPCs."; "for_second" => req.get_duration_in_secs(), "caller" => req.get_caller()); + self.suspend_requests(Duration::from_secs(req.get_duration_in_secs())) + } else { + info!("allow incoming import RPCs."; "caller" => req.get_caller()); + self.allow_requests() + }; + let mut resp = SuspendImportRpcResponse::default(); + resp.set_already_suspended(suspended); + ctx.spawn(async move { send_rpc_response!(Ok(resp), sink, label, timer) }); + } } // add error statistics from pb error response diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 22ab9c7d7fe8..6c56ab0018b5 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -555,3 +555,97 @@ fn test_duplicate_and_close() { req.set_mode(SwitchMode::Normal); import.switch_mode(&req).unwrap(); } + +#[test] +fn test_suspend_import() { + let (_cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client(); + let sst_range = (0, 10); + let write = |sst_range: (u8, u8)| { + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + let mut keys = vec![]; + let mut values = vec![]; + for i in sst_range.0..sst_range.1 { + keys.push(vec![i]); + values.push(vec![i]); + } + send_write_sst(&import, &meta, keys, values, 1) + }; + let ingest = |sst_meta: &SstMeta| { + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(sst_meta.clone()); + import.ingest(&ingest) + }; + let multi_ingest = |sst_metas: &[SstMeta]| { + let mut multi_ingest = MultiIngestRequest::default(); + multi_ingest.set_context(ctx.clone()); + multi_ingest.set_ssts(sst_metas.to_vec().into()); + import.multi_ingest(&multi_ingest) + }; + let suspendctl = |for_time| { + let mut req = SuspendImportRpcRequest::default(); + req.set_caller("test_suspend_import".to_owned()); + if for_time == 0 { + req.set_should_suspend_imports(false); + } else { + req.set_should_suspend_imports(true); + req.set_duration_in_secs(for_time); + } + req + }; + + let write_res = write(sst_range).unwrap(); + assert_eq!(write_res.metas.len(), 1); + let sst = write_res.metas[0].clone(); + + assert!( + !import + .suspend_import_rpc(&suspendctl(6000)) + .unwrap() + .already_suspended + ); + let write_res = write(sst_range); + write_res.unwrap(); + let ingest_res = ingest(&sst); + assert_to_string_contains!(ingest_res.unwrap_err(), "Suspended"); + let multi_ingest_res = multi_ingest(&[sst.clone()]); + assert_to_string_contains!(multi_ingest_res.unwrap_err(), "Suspended"); + + assert!( + import + .suspend_import_rpc(&suspendctl(0)) + .unwrap() + .already_suspended + ); + + let ingest_res = ingest(&sst); + assert!(ingest_res.is_ok(), "{:?} => {:?}", sst, ingest_res); + + check_ingested_txn_kvs(&tikv, &ctx, sst_range, 2); + + // test timeout. + assert!( + !import + .suspend_import_rpc(&suspendctl(1)) + .unwrap() + .already_suspended + ); + let sst_range = (10, 20); + let write_res = write(sst_range); + let sst = write_res.unwrap().metas; + let res = multi_ingest(&sst); + assert_to_string_contains!(res.unwrap_err(), "Suspended"); + std::thread::sleep(Duration::from_secs(1)); + multi_ingest(&sst).unwrap(); + + // check an insane value should be rejected. + import + .suspend_import_rpc(&suspendctl(u64::MAX - 42)) + .unwrap_err(); + let sst_range = (20, 30); + let ssts = write(sst_range).unwrap(); + multi_ingest(ssts.get_metas()).unwrap(); +} From 4814a6129b8a4ae122bb6152c140a064787456bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:34:48 +0800 Subject: [PATCH 075/203] compaction_guard: split SST when detected possible huge compaction (#15379) close tikv/tikv#15058 This PR make the compaction guard splits SSTs when it find that there are possible huge compactions. It works by iterating the next of the output level (Let is be level L+1), when a SST crosses such a huge key range that making L+1 contains size greater than the `max-compaction-size`. Signed-off-by: hillium Co-authored-by: tonyxuqqi --- .../engine_rocks/src/sst_partitioner.rs | 2 + .../engine_traits/src/sst_partitioner.rs | 2 + .../raftstore/src/store/compaction_guard.rs | 314 ++++++++++++++++-- src/config/mod.rs | 1 + 4 files changed, 293 insertions(+), 26 deletions(-) diff --git a/components/engine_rocks/src/sst_partitioner.rs b/components/engine_rocks/src/sst_partitioner.rs index fc1dcd402709..f642a94f28f4 100644 --- a/components/engine_rocks/src/sst_partitioner.rs +++ b/components/engine_rocks/src/sst_partitioner.rs @@ -23,6 +23,8 @@ impl rocksdb::SstPartitionerFactory output_level: context.output_level, smallest_key: context.smallest_key, largest_key: context.largest_key, + next_level_boundaries: context.next_level_boundaries.clone(), + next_level_sizes: context.next_level_sizes.clone(), }; self.0.create_partitioner(&ctx).map(RocksSstPartitioner) } diff --git a/components/engine_traits/src/sst_partitioner.rs b/components/engine_traits/src/sst_partitioner.rs index bc6ec13a4eb6..4a8ee9e71bc4 100644 --- a/components/engine_traits/src/sst_partitioner.rs +++ b/components/engine_traits/src/sst_partitioner.rs @@ -22,6 +22,8 @@ pub struct SstPartitionerContext<'a> { pub output_level: i32, pub smallest_key: &'a [u8], pub largest_key: &'a [u8], + pub next_level_boundaries: Vec<&'a [u8]>, + pub next_level_sizes: Vec, } pub trait SstPartitioner { diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index efee09be9069..138d730fa29f 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -23,10 +23,16 @@ pub struct CompactionGuardGeneratorFactory { cf_name: CfNames, provider: P, min_output_file_size: u64, + max_compaction_size: u64, } impl CompactionGuardGeneratorFactory

{ - pub fn new(cf: CfName, provider: P, min_output_file_size: u64) -> Result { + pub fn new( + cf: CfName, + provider: P, + min_output_file_size: u64, + max_compaction_size: u64, + ) -> Result { let cf_name = match cf { CF_DEFAULT => CfNames::default, CF_LOCK => CfNames::lock, @@ -43,6 +49,7 @@ impl CompactionGuardGeneratorFactory

{ cf_name, provider, min_output_file_size, + max_compaction_size, }) } } @@ -72,6 +79,15 @@ impl SstPartitionerFactory use_guard: false, boundaries: vec![], pos: 0, + next_level_pos: 0, + next_level_boundaries: context + .next_level_boundaries + .iter() + .map(|v| v.to_vec()) + .collect(), + next_level_size: context.next_level_sizes.clone(), + current_next_level_size: 0, + max_compaction_size: self.max_compaction_size, }) } } @@ -86,7 +102,20 @@ pub struct CompactionGuardGenerator { use_guard: bool, // The boundary keys are exclusive. boundaries: Vec>, + /// The SST boundaries overlapped with the compaction input at the next + /// level of output level (let we call it L+2). When the output level is the + /// bottom-most level(usually L6), this will be empty. The boundaries + /// are the first key of the first sst concatenating with all ssts' end key. + next_level_boundaries: Vec>, + /// The size of each "segment" of L+2. If the `next_level_boundaries`(let we + /// call it NLB) isn't empty, `next_level_size` will have length + /// `NLB.len() - 1`, and at the position `N` stores the size of range + /// `[NLB[N], NLB[N+1]]` in L+2. + next_level_size: Vec, pos: usize, + next_level_pos: usize, + current_next_level_size: u64, + max_compaction_size: u64, } impl CompactionGuardGenerator

{ @@ -153,27 +182,52 @@ impl SstPartitioner for CompactionGuardGenerator

{ if !self.use_guard { return SstPartitionerResult::NotRequired; } - let mut pos = self.pos; - let mut skip_count = 0; - while pos < self.boundaries.len() && self.boundaries[pos].as_slice() <= req.prev_user_key { - pos += 1; - skip_count += 1; - if skip_count >= COMPACTION_GUARD_MAX_POS_SKIP { - let prev_user_key = req.prev_user_key.to_vec(); - pos = match self.boundaries.binary_search(&prev_user_key) { - Ok(search_pos) => search_pos + 1, - Err(search_pos) => search_pos, - }; - break; - } + self.pos = seek_to(&self.boundaries, req.prev_user_key, self.pos); + // Generally this shall be a noop... because each time we are moving the cursor + // to the previous key. + let left_next_level_pos = seek_to( + &self.next_level_boundaries, + req.prev_user_key, + self.next_level_pos, + ); + let right_next_level_pos = seek_to( + &self.next_level_boundaries, + req.current_user_key, + left_next_level_pos, + ); + // The cursor has been moved. + if right_next_level_pos > left_next_level_pos { + self.current_next_level_size += self.next_level_size + [left_next_level_pos..right_next_level_pos - 1] + .iter() + .map(|x| *x as u64) + .sum::(); } - self.pos = pos; - if pos < self.boundaries.len() && self.boundaries[pos].as_slice() <= req.current_user_key { - if req.current_output_file_size >= self.min_output_file_size { + self.next_level_pos = right_next_level_pos; + + if self.pos < self.boundaries.len() + && self.boundaries[self.pos].as_slice() <= req.current_user_key + { + if req.current_output_file_size >= self.min_output_file_size + // Or, the output file may make a huge compaction even greater than the max compaction size. + || self.current_next_level_size >= self.max_compaction_size + { COMPACTION_GUARD_ACTION_COUNTER .get(self.cf_name) .partition .inc(); + // The current pointer status should be like (let * be the current pos, ^ be + // where the previous user key is): + // boundaries: A B C D + // size: 1 3 2 + // ^ * + // You will notice that the previous user key is between B and C, which indices + // that there must still be something between previous user key and C. + // We still set `current_next_level_size` to zero here, so the segment will be + // forgotten. I think that will be acceptable given generally a segment won't be + // greater than the `max-sst-size`, which is tiny comparing to the + // `max-compaction-size` usually. + self.current_next_level_size = 0; SstPartitionerResult::Required } else { COMPACTION_GUARD_ACTION_COUNTER @@ -193,10 +247,28 @@ impl SstPartitioner for CompactionGuardGenerator

{ } } +fn seek_to(all_data: &Vec>, target_key: &[u8], from_pos: usize) -> usize { + let mut pos = from_pos; + let mut skip_count = 0; + while pos < all_data.len() && all_data[pos].as_slice() <= target_key { + pos += 1; + skip_count += 1; + if skip_count >= COMPACTION_GUARD_MAX_POS_SKIP { + pos = match all_data.binary_search_by(|probe| probe.as_slice().cmp(target_key)) { + Ok(search_pos) => search_pos + 1, + Err(search_pos) => search_pos, + }; + break; + } + } + pos +} + #[cfg(test)] mod tests { - use std::str; + use std::{path::Path, str}; + use collections::HashMap; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -212,6 +284,13 @@ mod tests { use super::*; use crate::coprocessor::region_info_accessor::MockRegionInfoProvider; + impl CompactionGuardGenerator { + fn reset_next_level_size_state(&mut self) { + self.current_next_level_size = 0; + self.next_level_pos = 0; + } + } + #[test] fn test_compaction_guard_non_data() { let mut guard = CompactionGuardGenerator { @@ -224,6 +303,11 @@ mod tests { use_guard: false, boundaries: vec![], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: vec![], + next_level_size: vec![], + max_compaction_size: 1 << 30, }; guard.smallest_key = keys::LOCAL_MIN_KEY.to_vec(); @@ -267,8 +351,16 @@ mod tests { provider: MockRegionInfoProvider::new(vec![]), initialized: true, use_guard: true, - boundaries: vec![b"bbb".to_vec(), b"ccc".to_vec()], + boundaries: vec![b"bbb".to_vec(), b"ccc".to_vec(), b"ddd".to_vec()], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: (0..10) + .map(|x| format!("bbb{:02}", x).into_bytes()) + .chain((0..100).map(|x| format!("cccz{:03}", x).into_bytes())) + .collect(), + next_level_size: [&[1 << 18; 99][..], &[1 << 28; 10][..]].concat(), + max_compaction_size: 1 << 30, // 1GB }; // Crossing region boundary. let mut req = SstPartitionerRequest { @@ -277,7 +369,11 @@ mod tests { current_output_file_size: 32 << 20, }; assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); + assert_eq!(guard.next_level_pos, 10); assert_eq!(guard.pos, 0); + assert_eq!(guard.current_next_level_size, 0); + guard.reset_next_level_size_state(); + // Output file size too small. req = SstPartitionerRequest { prev_user_key: b"bba", @@ -289,6 +385,10 @@ mod tests { SstPartitionerResult::NotRequired ); assert_eq!(guard.pos, 0); + assert_eq!(guard.next_level_pos, 10); + assert_eq!(guard.current_next_level_size, 9 << 18); + guard.reset_next_level_size_state(); + // Not crossing boundary. req = SstPartitionerRequest { prev_user_key: b"aaa", @@ -300,6 +400,9 @@ mod tests { SstPartitionerResult::NotRequired ); assert_eq!(guard.pos, 0); + assert_eq!(guard.next_level_pos, 0); + guard.reset_next_level_size_state(); + // Move position req = SstPartitionerRequest { prev_user_key: b"cca", @@ -308,6 +411,30 @@ mod tests { }; assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); assert_eq!(guard.pos, 1); + assert_eq!(guard.next_level_pos, 110); + guard.reset_next_level_size_state(); + + // Move next level posistion + req = SstPartitionerRequest { + prev_user_key: b"cccz000", + current_user_key: b"cccz042", + current_output_file_size: 1 << 20, + }; + assert_eq!( + guard.should_partition(&req), + SstPartitionerResult::NotRequired + ); + assert_eq!(guard.pos, 2); + assert_eq!(guard.next_level_pos, 53); + + req = SstPartitionerRequest { + prev_user_key: b"cccz090", + current_user_key: b"dde", + current_output_file_size: 1 << 20, + }; + assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); + assert_eq!(guard.pos, 2); + assert_eq!(guard.next_level_pos, 110); } #[test] @@ -339,6 +466,11 @@ mod tests { b"aaa15".to_vec(), ], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: vec![], + next_level_size: vec![], + max_compaction_size: 1 << 30, }; // Binary search meet exact match. guard.pos = 0; @@ -365,15 +497,23 @@ mod tests { const MIN_OUTPUT_FILE_SIZE: u64 = 1024; const MAX_OUTPUT_FILE_SIZE: u64 = 4096; + const MAX_COMPACTION_SIZE: u64 = 10240; fn new_test_db(provider: MockRegionInfoProvider) -> (RocksEngine, TempDir) { let temp_dir = TempDir::new().unwrap(); let mut cf_opts = RocksCfOptions::default(); + cf_opts.set_max_bytes_for_level_base(MAX_OUTPUT_FILE_SIZE); + cf_opts.set_max_bytes_for_level_multiplier(5); cf_opts.set_target_file_size_base(MAX_OUTPUT_FILE_SIZE); cf_opts.set_sst_partitioner_factory(RocksSstPartitionerFactory( - CompactionGuardGeneratorFactory::new(CF_DEFAULT, provider, MIN_OUTPUT_FILE_SIZE) - .unwrap(), + CompactionGuardGeneratorFactory::new( + CF_DEFAULT, + provider, + MIN_OUTPUT_FILE_SIZE, + MAX_COMPACTION_SIZE, + ) + .unwrap(), )); cf_opts.set_disable_auto_compactions(true); cf_opts.compression_per_level(&[ @@ -412,6 +552,16 @@ mod tests { ret } + fn get_sst_files(dir: &Path) -> Vec { + let files = dir.read_dir().unwrap(); + let mut sst_files = files + .map(|entry| entry.unwrap().path().to_str().unwrap().to_owned()) + .filter(|entry| entry.ends_with(".sst")) + .collect::>(); + sst_files.sort(); + sst_files + } + #[test] fn test_compaction_guard_with_rocks() { let provider = MockRegionInfoProvider::new(vec![ @@ -463,11 +613,7 @@ mod tests { ) .unwrap(); - let files = dir.path().read_dir().unwrap(); - let mut sst_files = files - .map(|entry| entry.unwrap().path().to_str().unwrap().to_owned()) - .filter(|entry| entry.ends_with(".sst")) - .collect::>(); + let mut sst_files = get_sst_files(dir.path()); sst_files.sort(); assert_eq!(3, sst_files.len()); assert_eq!(collect_keys(&sst_files[0]), [b"za1", b"zb1", b"zb2"]); @@ -477,4 +623,120 @@ mod tests { ); assert_eq!(collect_keys(&sst_files[2]), [b"zc6"]); } + + fn simple_regions() -> MockRegionInfoProvider { + MockRegionInfoProvider::new(vec![ + Region { + id: 1, + start_key: b"a".to_vec(), + end_key: b"b".to_vec(), + ..Default::default() + }, + Region { + id: 2, + start_key: b"b".to_vec(), + end_key: b"c".to_vec(), + ..Default::default() + }, + Region { + id: 3, + start_key: b"c".to_vec(), + end_key: b"d".to_vec(), + ..Default::default() + }, + ]) + } + + #[test] + fn test_next_level_compaction() { + let provider = simple_regions(); + let (db, _dir) = new_test_db(provider); + assert_eq!(b"z", DATA_PREFIX_KEY); + let tiny_value = [b'v'; 1]; + let value = vec![b'v'; 1024 * 10]; + ['a', 'b', 'c'] + .into_iter() + .flat_map(|x| (1..10).map(move |n| format!("z{x}{n}").into_bytes())) + .for_each(|key| db.put(&key, &value).unwrap()); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(2)).unwrap(); + db.put(b"za0", &tiny_value).unwrap(); + db.put(b"zd0", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 2, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[0].largestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[1].smallestkey, b"zd0", "{:?}", level_1); + assert_eq!(level_1[1].largestkey, b"zd0", "{:?}", level_1); + } + + #[test] + fn test_next_level_compaction_no_split() { + let provider = simple_regions(); + let (db, _dir) = new_test_db(provider); + assert_eq!(b"z", DATA_PREFIX_KEY); + let tiny_value = [b'v'; 1]; + let value = vec![b'v'; 1024 * 10]; + ['a', 'b', 'c'] + .into_iter() + .flat_map(|x| (1..10).map(move |n| format!("z{x}{n}").into_bytes())) + .for_each(|key| db.put(&key, &value).unwrap()); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(2)).unwrap(); + // So... the next-level size will be almost 1024 * 9, which doesn't exceeds the + // compaction size limit. + db.put(b"za0", &tiny_value).unwrap(); + db.put(b"za9", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 1, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[0].largestkey, b"za9", "{:?}", level_1); + db.compact_range(None, None, false, 1).unwrap(); + + // So... the next-level size will be almost 1024 * 15, which should reach the + // limit. + db.put(b"za30", &tiny_value).unwrap(); + db.put(b"zb90", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 2, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za30", "{:?}", level_1); + assert_eq!(level_1[1].largestkey, b"zb90", "{:?}", level_1); + } + + #[derive(Debug)] + #[allow(dead_code)] + struct OwnedSstFileMetadata { + name: String, + size: usize, + smallestkey: Vec, + largestkey: Vec, + } + + #[allow(unused)] + fn level_files(db: &RocksEngine) -> HashMap> { + let db = db.as_inner(); + let cf = db.cf_handle("default").unwrap(); + let md = db.get_column_family_meta_data(cf); + let mut res: HashMap> = HashMap::default(); + for (i, level) in md.get_levels().into_iter().enumerate() { + for file in level.get_files() { + res.entry(i).or_default().push(OwnedSstFileMetadata { + name: file.get_name(), + size: file.get_size(), + smallestkey: file.get_smallestkey().to_owned(), + largestkey: file.get_largestkey().to_owned(), + }); + } + } + res + } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8318556483ed..d18d6f8cda0f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -645,6 +645,7 @@ macro_rules! build_cf_opt { $cf_name, provider.clone(), $opt.compaction_guard_min_output_file_size.0, + $opt.max_compaction_bytes.0, ) .unwrap(); cf_opts.set_sst_partitioner_factory(factory); From 56091d5998745f7c741d1c6fa8aa1ba281e990ed Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 28 Sep 2023 11:48:41 +0800 Subject: [PATCH 076/203] chore: bump version to 7.5.0-alpha (#15708) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3842f92752c..a10755f5a7f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6526,7 +6526,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.4.0-alpha" +version = "7.5.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 81be4d36906f..4d8cefa9fa46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.4.0-alpha" +version = "7.5.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From a7db07d72dcbf2c938ebd0b4661270fdc95f9a43 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 28 Sep 2023 17:40:50 +0800 Subject: [PATCH 077/203] raftstore-v2: gc removed_records and merged_records on tombstone store (#15677) close tikv/tikv#15669 Let leader directly GC removed_records and merged_records on tombstone store, instead of sending GcPeerRequests to such store. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 3 ++ components/raftstore-v2/src/operation/life.rs | 31 ++++++++++++ .../raftstore-v2/src/operation/ready/mod.rs | 7 +++ components/raftstore-v2/src/router/message.rs | 5 ++ components/test_pd_client/src/pd.rs | 4 +- components/test_raftstore-v2/src/server.rs | 5 ++ components/test_raftstore/src/server.rs | 4 +- components/tikv_kv/src/raft_extension.rs | 3 ++ src/server/lock_manager/deadlock.rs | 13 +---- src/server/lock_manager/mod.rs | 4 +- src/server/metrics.rs | 1 + src/server/raft_client.rs | 16 +++--- src/server/raftkv2/raft_extension.rs | 5 ++ src/server/resolve.rs | 50 +++++++++++++++++-- src/server/server.rs | 6 +-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_life.rs | 36 +++++++++++++ .../config/dynamic/pessimistic_txn.rs | 16 +----- tests/integrations/server/raft_client.rs | 44 ++++++++-------- 19 files changed, 188 insertions(+), 66 deletions(-) create mode 100644 tests/failpoints/cases/test_life.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 872b2c4e7e63..547297872719 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -315,6 +315,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::StoreUnreachable { to_store_id } => { self.fsm.peer_mut().on_store_unreachable(to_store_id) } + PeerMsg::StoreMaybeTombstone { store_id } => { + self.fsm.peer_mut().on_store_maybe_tombstone(store_id) + } PeerMsg::SnapshotSent { to_peer_id, status } => { self.fsm.peer_mut().on_snapshot_sent(to_peer_id, status) } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8591d5daf23c..84bded8a9bbb 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -713,6 +713,37 @@ impl Peer { ctx.confirmed_ids.push(gc_peer_id); } + // Clean up removed and merged records for peers on tombstone stores, + // otherwise it may keep sending gc peer request to the tombstone store. + pub fn on_store_maybe_tombstone_gc_peer(&mut self, store_id: u64) { + let mut peers_on_tombstone = vec![]; + let state = self.storage().region_state(); + for peer in state.get_removed_records() { + if peer.get_store_id() == store_id { + peers_on_tombstone.push(peer.clone()); + } + } + for record in state.get_merged_records() { + for peer in record.get_source_peers() { + if peer.get_store_id() == store_id { + peers_on_tombstone.push(peer.clone()); + } + } + } + if peers_on_tombstone.is_empty() { + return; + } + info!(self.logger, "gc peer on tombstone store"; + "tombstone_store_id" => store_id, + "peers" => ?peers_on_tombstone); + let ctx = self.gc_peer_context_mut(); + for peer in peers_on_tombstone { + if !ctx.confirmed_ids.contains(&peer.get_id()) { + ctx.confirmed_ids.push(peer.get_id()); + } + } + } + // Removes deleted peers from region state by proposing a `UpdateGcPeer` // command. pub fn on_gc_peer_tick(&mut self, ctx: &mut StoreContext) { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 17845b5d0b8c..1ff07f2ccc1e 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -247,6 +247,13 @@ impl Peer { } } + pub fn on_store_maybe_tombstone(&mut self, store_id: u64) { + if !self.is_leader() { + return; + } + self.on_store_maybe_tombstone_gc_peer(store_id); + } + pub fn on_raft_message( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 2d364af44e19..16d43970e7a1 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -197,6 +197,11 @@ pub enum PeerMsg { StoreUnreachable { to_store_id: u64, }, + // A store may be tombstone. Use it with caution, it also means store not + // found, PD can not distinguish them now, as PD may delete tombstone stores. + StoreMaybeTombstone { + store_id: u64, + }, /// Reports whether the snapshot sending is successful or not. SnapshotSent { to_peer_id: u64, diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index c81230f6a163..a9141bf62992 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -547,7 +547,9 @@ impl PdCluster { fn get_store(&self, store_id: u64) -> Result { match self.stores.get(&store_id) { Some(s) if s.store.get_id() != 0 => Ok(s.store.clone()), - _ => Err(box_err!("store {} not found", store_id)), + // Matches PD error message. + // See https://github.com/tikv/pd/blob/v7.3.0/server/grpc_service.go#L777-L780 + _ => Err(box_err!("invalid store ID {}, not found", store_id)), } } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7b5d501a59f4..299e93eb7461 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -222,6 +222,11 @@ impl RaftExtension for TestExtension { self.extension.report_store_unreachable(store_id) } + #[inline] + fn report_store_maybe_tombstone(&self, store_id: u64) { + self.extension.report_store_maybe_tombstone(store_id) + } + #[inline] fn report_snapshot_status( &self, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8d26bae968d8..0002f36d647e 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -113,8 +113,8 @@ impl StoreAddrResolver for AddressMap { fn resolve( &self, store_id: u64, - cb: Box) + Send>, - ) -> ServerResult<()> { + cb: Box) + Send>, + ) -> resolve::Result<()> { let addr = self.get(store_id); match addr { Some(addr) => cb(Ok(addr)), diff --git a/components/tikv_kv/src/raft_extension.rs b/components/tikv_kv/src/raft_extension.rs index 26c9e687ef6f..7ab4c1c030d1 100644 --- a/components/tikv_kv/src/raft_extension.rs +++ b/components/tikv_kv/src/raft_extension.rs @@ -32,6 +32,9 @@ pub trait RaftExtension: Clone + Send { /// Report the target store is unreachable. fn report_store_unreachable(&self, _store_id: u64) {} + /// Report the target store may be tombstone. + fn report_store_maybe_tombstone(&self, _store_id: u64) {} + /// Report the status of snapshot. fn report_snapshot_status(&self, _region_id: u64, _to_peer_id: u64, _status: SnapshotStatus) {} diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 9583df80dd67..fd749cc31757 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -1119,7 +1119,7 @@ pub mod tests { use tikv_util::worker::FutureWorker; use super::*; - use crate::server::resolve::Callback; + use crate::server::resolve; #[test] fn test_detect_table() { @@ -1467,15 +1467,6 @@ pub mod tests { impl PdClient for MockPdClient {} - #[derive(Clone)] - pub(crate) struct MockResolver; - - impl StoreAddrResolver for MockResolver { - fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { - Err(Error::Other(box_err!("unimplemented"))) - } - } - fn start_deadlock_detector( host: &mut CoprocessorHost, ) -> (FutureWorker, Scheduler) { @@ -1485,7 +1476,7 @@ pub mod tests { let detector_runner = Detector::new( 1, Arc::new(MockPdClient {}), - MockResolver {}, + resolve::MockStoreAddrResolver::default(), Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()), waiter_mgr_scheduler, &Config::default(), diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 243d533a0e55..c42531ae0fd9 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -318,7 +318,7 @@ mod tests { use self::{deadlock::tests::*, metrics::*, waiter_manager::tests::*}; use super::*; - use crate::storage::lock_manager::LockDigest; + use crate::{server::resolve::MockStoreAddrResolver, storage::lock_manager::LockDigest}; fn start_lock_manager() -> LockManager { let mut coprocessor_host = CoprocessorHost::::default(); @@ -336,7 +336,7 @@ mod tests { .start( 1, Arc::new(MockPdClient {}), - MockResolver {}, + MockStoreAddrResolver::default(), Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()), &cfg, ) diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 2745be59a71d..122748cdfa9a 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -86,6 +86,7 @@ make_auto_flush_static_metric! { failed, success, tombstone, + not_found, } pub label_enum ReplicaReadLockCheckResult { diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index f30e5b360459..b120011c490e 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -46,8 +46,11 @@ use tikv_util::{ use yatp::{task::future::TaskCell, ThreadPool}; use crate::server::{ - self, load_statistics::ThreadLoadPool, metrics::*, snap::Task as SnapTask, Config, - StoreAddrResolver, + load_statistics::ThreadLoadPool, + metrics::*, + resolve::{Error as ResolveError, Result as ResolveResult}, + snap::Task as SnapTask, + Config, StoreAddrResolver, }; pub struct MetadataSourceStoreId {} @@ -642,7 +645,7 @@ where S: StoreAddrResolver, R: RaftExtension + Unpin + 'static, { - fn resolve(&self) -> impl Future> { + fn resolve(&self) -> impl Future> { let (tx, rx) = oneshot::channel(); let store_id = self.store_id; let res = self.builder.resolver.resolve( @@ -673,7 +676,7 @@ where res?; match rx.await { Ok(a) => a, - Err(_) => Err(server::Error::Other( + Err(_) => Err(ResolveError::Other( "failed to receive resolve result".into(), )), } @@ -824,8 +827,7 @@ async fn start( RESOLVE_STORE_COUNTER.with_label_values(&["failed"]).inc(); back_end.clear_pending_message("resolve"); error_unknown!(?e; "resolve store address failed"; "store_id" => back_end.store_id,); - // TOMBSTONE - if format!("{}", e).contains("has been removed") { + if let ResolveError::StoreTombstone(_) = e { let mut pool = pool.lock().unwrap(); if let Some(s) = pool.connections.remove(&(back_end.store_id, conn_id)) { s.set_conn_state(ConnState::Disconnected); @@ -940,7 +942,7 @@ struct CachedQueue { /// ```text /// for m in msgs { /// if !raft_client.send(m) { -/// // handle error. +/// // handle error. /// } /// } /// raft_client.flush(); diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs index f6bb66e9e118..8b15c73fb657 100644 --- a/src/server/raftkv2/raft_extension.rs +++ b/src/server/raftkv2/raft_extension.rs @@ -49,6 +49,11 @@ impl tikv_kv::RaftExtension for Extension .send_control(StoreMsg::StoreUnreachable { to_store_id }); } + fn report_store_maybe_tombstone(&self, store_id: u64) { + self.router + .broadcast_normal(|| PeerMsg::StoreMaybeTombstone { store_id }); + } + fn report_snapshot_status( &self, region_id: u64, diff --git a/src/server/resolve.rs b/src/server/resolve.rs index c831ff28d17c..013511183e23 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -1,6 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + error::Error as StdError, fmt::{self, Display, Formatter}, sync::{Arc, Mutex}, }; @@ -9,16 +10,28 @@ use collections::HashMap; use kvproto::replication_modepb::ReplicationMode; use pd_client::{take_peer_address, PdClient}; use raftstore::store::GlobalReplicationState; +use thiserror::Error; use tikv_kv::RaftExtension; use tikv_util::{ + info, time::Instant, worker::{Runnable, Scheduler, Worker}, }; -use super::{metrics::*, Result}; +use super::metrics::*; const STORE_ADDRESS_REFRESH_SECONDS: u64 = 60; +#[derive(Debug, Error)] +pub enum Error { + #[error("{0:?}")] + Other(#[from] Box), + #[error("store {0} has been removed")] + StoreTombstone(u64), +} + +pub type Result = std::result::Result; + pub type Callback = Box) + Send>; pub fn store_address_refresh_interval_secs() -> u64 { @@ -95,9 +108,21 @@ where // it explicitly. Err(pd_client::Error::StoreTombstone(_)) => { RESOLVE_STORE_COUNTER_STATIC.tombstone.inc(); - return Err(box_err!("store {} has been removed", store_id)); + self.router.report_store_maybe_tombstone(store_id); + return Err(Error::StoreTombstone(store_id)); + } + Err(e) => { + // Tombstone store may be removed manually or automatically + // after 30 days of deletion. PD returns + // "invalid store ID %d, not found" for such store id. + // See https://github.com/tikv/pd/blob/v7.3.0/server/grpc_service.go#L777-L780 + if format!("{:?}", e).contains("not found") { + RESOLVE_STORE_COUNTER_STATIC.not_found.inc(); + info!("resolve store not found"; "store_id" => store_id); + self.router.report_store_maybe_tombstone(store_id); + } + return Err(box_err!(e)); } - Err(e) => return Err(box_err!(e)), }; let mut group_id = None; let mut state = self.state.lock().unwrap(); @@ -181,6 +206,25 @@ impl StoreAddrResolver for PdStoreAddrResolver { } } +#[derive(Clone)] +pub struct MockStoreAddrResolver { + pub resolve_fn: Arc Result<()> + Send + Sync>, +} + +impl StoreAddrResolver for MockStoreAddrResolver { + fn resolve(&self, store_id: u64, cb: Callback) -> Result<()> { + (self.resolve_fn)(store_id, cb) + } +} + +impl Default for MockStoreAddrResolver { + fn default() -> MockStoreAddrResolver { + MockStoreAddrResolver { + resolve_fn: Arc::new(|_, _| unimplemented!()), + } + } +} + #[cfg(test)] mod tests { use std::{net::SocketAddr, ops::Sub, str::FromStr, sync::Arc, thread, time::Duration}; diff --git a/src/server/server.rs b/src/server/server.rs index 948930ae7ae9..a886f1232f44 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -533,8 +533,8 @@ mod tests { use super::{ super::{ - resolve::{Callback as ResolveCallback, StoreAddrResolver}, - Config, Result, + resolve::{self, Callback as ResolveCallback, StoreAddrResolver}, + Config, }, *, }; @@ -552,7 +552,7 @@ mod tests { } impl StoreAddrResolver for MockResolver { - fn resolve(&self, _: u64, cb: ResolveCallback) -> Result<()> { + fn resolve(&self, _: u64, cb: ResolveCallback) -> resolve::Result<()> { if self.quick_fail.load(Ordering::SeqCst) { return Err(box_err!("quick fail")); } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index a9dbd36a81a6..ed2b8d79f9c7 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -17,6 +17,7 @@ mod test_gc_worker; mod test_hibernate; mod test_import_service; mod test_kv_service; +mod test_life; mod test_local_read; mod test_memory_usage_limit; mod test_merge; diff --git a/tests/failpoints/cases/test_life.rs b/tests/failpoints/cases/test_life.rs new file mode 100644 index 000000000000..2bc833075c60 --- /dev/null +++ b/tests/failpoints/cases/test_life.rs @@ -0,0 +1,36 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use test_raftstore::*; +use test_raftstore_macro::test_case; +use tikv_util::config::ReadableDuration; + +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_gc_peer_on_tombstone_store() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + + let region = cluster.get_region(b"k1"); + + let peer_on_store1 = find_peer(®ion, 1).unwrap().clone(); + let peer_on_store3 = find_peer(®ion, 3).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + cluster.add_send_filter(IsolationFilterFactory::new(3)); + pd_client.must_remove_peer(region.get_id(), peer_on_store3); + + // Immediately invalidate store address cache. + fail::cfg("mock_store_refresh_interval_secs", "return(0)").unwrap(); + + // Shutdown store 3 and wait for gc peer ticks. + cluster.stop_node(3); + cluster.clear_send_filters(); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + cluster.must_empty_region_removed_records(region.get_id()); +} diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index 7af5455a1993..dc88bbd93a3b 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -9,11 +9,7 @@ use security::SecurityManager; use test_pd_client::TestPdClient; use tikv::{ config::*, - server::{ - lock_manager::*, - resolve::{Callback, StoreAddrResolver}, - Error, Result, - }, + server::{lock_manager::*, resolve}, }; use tikv_util::config::ReadableDuration; @@ -27,14 +23,6 @@ fn test_config_validate() { invalid_cfg.validate().unwrap_err(); } -#[derive(Clone)] -struct MockResolver; -impl StoreAddrResolver for MockResolver { - fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { - Err(Error::Other(box_err!("unimplemented"))) - } -} - fn setup( cfg: TikvConfig, ) -> ( @@ -50,7 +38,7 @@ fn setup( .start( 1, pd_client, - MockResolver, + resolve::MockStoreAddrResolver::default(), security_mgr, &cfg.pessimistic_txn, ) diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index aad9ab7ceb10..2b51bb1f21b3 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -21,8 +21,8 @@ use kvproto::{ use raft::eraftpb::Entry; use raftstore::errors::DiscardReason; use tikv::server::{ - self, load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, resolve::Callback, - Config, ConnectionBuilder, RaftClient, StoreAddrResolver, TestRaftStoreRouter, + load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, Config, ConnectionBuilder, + RaftClient, StoreAddrResolver, TestRaftStoreRouter, }; use tikv_kv::{FakeExtension, RaftExtension}; use tikv_util::{ @@ -32,24 +32,6 @@ use tikv_util::{ use super::*; -#[derive(Clone)] -pub struct StaticResolver { - port: u16, -} - -impl StaticResolver { - fn new(port: u16) -> StaticResolver { - StaticResolver { port } - } -} - -impl StoreAddrResolver for StaticResolver { - fn resolve(&self, _store_id: u64, cb: Callback) -> server::Result<()> { - cb(Ok(format!("localhost:{}", self.port))); - Ok(()) - } -} - fn get_raft_client(router: R, resolver: T) -> RaftClient where R: RaftExtension + Unpin + 'static, @@ -75,8 +57,16 @@ where RaftClient::new(0, builder) } -fn get_raft_client_by_port(port: u16) -> RaftClient { - get_raft_client(FakeExtension, StaticResolver::new(port)) +fn get_raft_client_by_port(port: u16) -> RaftClient { + get_raft_client( + FakeExtension, + resolve::MockStoreAddrResolver { + resolve_fn: Arc::new(move |_, cb| { + cb(Ok(format!("localhost:{}", port))); + Ok(()) + }), + }, + ) } #[derive(Clone)] @@ -177,7 +167,15 @@ fn test_raft_client_reconnect() { let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); let router = TestRaftStoreRouter::new(tx, significant_msg_sender); let wrap = RaftRouterWrap::new(router); - let mut raft_client = get_raft_client(wrap, StaticResolver::new(port)); + let mut raft_client = get_raft_client( + wrap, + resolve::MockStoreAddrResolver { + resolve_fn: Arc::new(move |_, cb| { + cb(Ok(format!("localhost:{}", port))); + Ok(()) + }), + }, + ); (0..50).for_each(|_| raft_client.send(RaftMessage::default()).unwrap()); raft_client.flush(); From fda1b5caf19f3ee87ab26c4458c64a6b3f3ea5ca Mon Sep 17 00:00:00 2001 From: Juan Grande Date: Thu, 28 Sep 2023 02:55:50 -0700 Subject: [PATCH 078/203] logger: added thread_id to logs (#15638) close tikv/tikv#13395 Added thread_id to logs Signed-off-by: Juan Grande Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/logger/mod.rs | 126 +++++++++++++++++++------ 1 file changed, 96 insertions(+), 30 deletions(-) diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 5ebe9468a507..c321f56a1b5e 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -6,6 +6,7 @@ mod formatter; use std::{ env, fmt, io::{self, BufWriter}, + num::NonZeroU64, path::{Path, PathBuf}, sync::{ atomic::{AtomicUsize, Ordering}, @@ -15,7 +16,10 @@ use std::{ }; use log::{self, SetLoggerError}; -use slog::{self, slog_o, Drain, FnValue, Key, OwnedKVList, PushFnValue, Record, KV}; +use slog::{ + self, slog_o, Drain, FnValue, Key, OwnedKV, OwnedKVList, PushFnValue, Record, + SendSyncRefUnwindSafeKV, KV, +}; pub use slog::{FilterFn, Level}; use slog_async::{Async, AsyncGuard, OverflowStrategy}; use slog_term::{Decorator, PlainDecorator, RecordDecorator}; @@ -85,7 +89,7 @@ where }; let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - (slog::Logger::root(filtered, slog_o!()), Some(guard)) + (slog::Logger::root(filtered, get_values()), Some(guard)) } else { let drain = LogAndFuse(Mutex::new(drain)); let drain = SlowLogFilter { @@ -93,7 +97,7 @@ where inner: drain, }; let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - (slog::Logger::root(filtered, slog_o!()), None) + (slog::Logger::root(filtered, get_values()), None) }; set_global_logger(level, init_stdlog, logger, guard) @@ -628,6 +632,18 @@ fn write_log_fields( Ok(()) } +fn format_thread_id(thread_id: NonZeroU64) -> String { + format!("{:#0x}", thread_id) +} + +fn get_values() -> OwnedKV { + slog_o!( + "thread_id" => FnValue(|_| { + format_thread_id(std::thread::current().id().as_u64()) + }) + ) +} + struct Serializer<'a> { decorator: &'a mut dyn RecordDecorator, } @@ -679,7 +695,7 @@ impl<'a> slog::Serializer for Serializer<'a> { #[cfg(test)] mod tests { - use std::{cell::RefCell, io, io::Write, str::from_utf8}; + use std::{cell::RefCell, io, io::Write, str::from_utf8, sync::RwLock, time::Duration}; use chrono::DateTime; use regex::Regex; @@ -705,8 +721,6 @@ mod tests { } fn log_format_cases(logger: slog::Logger) { - use std::time::Duration; - // Empty message is not recommend, just for test purpose here. slog_info!(logger, ""); slog_info!(logger, "Welcome"); @@ -763,21 +777,25 @@ mod tests { fn test_log_format_text() { let decorator = PlainSyncDecorator::new(TestWriter); let drain = TikvFormat::new(decorator, true).fuse(); - let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); log_format_cases(logger); - let expect = r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] -[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] -[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] -[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] -[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] -"#; + let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let expect = format!( + r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] [thread_id={0}] +[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] [thread_id={0}] +[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] [thread_id={0}] +"#, + thread_id + ); BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); @@ -811,21 +829,25 @@ mod tests { fn test_log_format_json() { use serde_json::{from_str, Value}; let drain = Mutex::new(json_format(TestWriter, true)).map(slog::Fuse); - let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); log_format_cases(logger); - let expect = r#"{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":""} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\""} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null} -{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote"} -"#; + let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let expect = format!( + r#"{{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":"","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\"","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null,"thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote","thread_id":"{0}"}} +"#, + thread_id + ); BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); @@ -1074,4 +1096,48 @@ mod tests { } }); } + + static THREAD_SAFE_BUFFER: RwLock> = RwLock::new(Vec::new()); + + struct ThreadSafeWriter; + impl Write for ThreadSafeWriter { + fn write(&mut self, data: &[u8]) -> io::Result { + let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); + buffer.write(data) + } + + fn flush(&mut self) -> io::Result<()> { + let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); + buffer.flush() + } + } + + #[test] + fn test_threadid() { + let drain = TikvFormat::new(PlainSyncDecorator::new(ThreadSafeWriter), true).fuse(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); + + slog_info!(logger, "Hello from the first thread"); + let this_threadid = thread::current().id().as_u64(); + let this_threadid = format_thread_id(this_threadid); + + let handle = thread::spawn(move || { + slog_info!(logger, "Hello from the second thread"); + }); + let other_threadid = handle.thread().id().as_u64(); + let other_threadid = format_thread_id(other_threadid); + handle.join().unwrap(); + + let expected = vec![this_threadid, other_threadid]; + + let re = Regex::new(r"\[thread_id=(.*?)\]").unwrap(); + let buffer = THREAD_SAFE_BUFFER.read().unwrap(); + let output = from_utf8(&buffer).unwrap(); + let actual: Vec<&str> = output + .lines() + .map(|line| re.captures(line).unwrap()) + .map(|captures| captures.get(1).unwrap().as_str()) + .collect(); + assert_eq!(expected, actual); + } } From 58253e8b7cea59b414511753b75dd7fc980d99af Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Sun, 8 Oct 2023 13:03:22 +0800 Subject: [PATCH 079/203] raftstore: split bucket if the increment flow reach the limit (#15637) close tikv/tikv#15636 there are three reason may cause the bucket not split: 1. split check tick will refresh bucket info even info the bucket version not change 2. the suspect buckets only conside the increment flow 3. all the bucket increment flows are reset if one bucket is updated. To solve this, bucket stats only record the increment flow and reset it after meta size updated. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/pd_client/src/lib.rs | 9 + .../raftstore-v2/src/operation/bucket.rs | 536 ++++++++++++------ components/raftstore-v2/src/worker/pd/mod.rs | 6 +- .../raftstore-v2/src/worker/pd/region.rs | 22 +- components/raftstore/src/store/fsm/peer.rs | 25 +- components/raftstore/src/store/util.rs | 14 + 6 files changed, 391 insertions(+), 221 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 7a9d2cd2a611..21ae61ccd61e 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -211,6 +211,15 @@ impl BucketStat { } } + pub fn clean_stats(&mut self, idx: usize) { + self.stats.write_keys[idx] = 0; + self.stats.write_bytes[idx] = 0; + self.stats.read_qps[idx] = 0; + self.stats.write_qps[idx] = 0; + self.stats.read_keys[idx] = 0; + self.stats.read_bytes[idx] = 0; + } + pub fn split(&mut self, idx: usize) { assert!(idx != 0); // inherit the traffic stats for splited bucket diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 432ea72456ae..242b9a9b33ba 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -11,10 +11,10 @@ use kvproto::{ }; use pd_client::{BucketMeta, BucketStat}; use raftstore::{ - coprocessor::RegionChangeEvent, + coprocessor::{Config, RegionChangeEvent}, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, }; -use slog::{error, info, warn}; +use slog::{error, info}; use crate::{ batch::StoreContext, @@ -26,15 +26,13 @@ use crate::{ #[derive(Debug, Clone, Default)] pub struct BucketStatsInfo { + // the stats is increment flow. bucket_stat: Option, - // the last buckets records the stats that the recently refreshed. - last_bucket_stat: Option, // the report bucket stat records the increment stats after last report pd. // it will be reset after report pd. report_bucket_stat: Option, - // last bucket count. - // BucketStat.meta is Arc so it cannot be used for last bucket count - last_bucket_count: usize, + // avoid the version roll back, it record the last bucket version if bucket stat isn't none. + last_bucket_version: u64, } impl BucketStatsInfo { @@ -42,55 +40,33 @@ impl BucketStatsInfo { /// diff_size_threshold. pub fn gen_bucket_range_for_update( &self, - diff_size_threshold: u64, + region_bucket_max_size: u64, ) -> Option> { let region_buckets = self.bucket_stat.as_ref()?; let stats = ®ion_buckets.stats; let keys = ®ion_buckets.meta.keys; + let sizes = ®ion_buckets.meta.sizes; - let empty_last_keys = vec![]; - let empty_last_stats = metapb::BucketStats::default(); - let (last_keys, last_stats, stats_reset) = self - .last_bucket_stat - .as_ref() - .map(|b| { - ( - &b.meta.keys, - &b.stats, - region_buckets.create_time != b.create_time, - ) - }) - .unwrap_or((&empty_last_keys, &empty_last_stats, false)); - - let mut bucket_ranges = vec![]; - let mut j = 0; + let mut suspect_bucket_ranges = vec![]; assert_eq!(keys.len(), stats.write_bytes.len() + 1); for i in 0..stats.write_bytes.len() { - let mut diff_in_bytes = stats.write_bytes[i]; - while j < last_keys.len() && keys[i] > last_keys[j] { - j += 1; - } - if j < last_keys.len() && keys[i] == last_keys[j] { - if !stats_reset { - diff_in_bytes -= last_stats.write_bytes[j]; - } - j += 1; - } - if diff_in_bytes >= diff_size_threshold { - bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); + let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; + if estimated_bucket_size >= region_bucket_max_size { + suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); } } - Some(bucket_ranges) + Some(suspect_bucket_ranges) } #[inline] pub fn version(&self) -> u64 { self.bucket_stat .as_ref() - .or(self.last_bucket_stat.as_ref()) .map(|b| b.meta.version) + .or(Some(self.last_bucket_version)) .unwrap_or_default() } + #[inline] pub fn add_bucket_flow(&mut self, delta: &Option) { if let (Some(buckets), Some(report_buckets), Some(delta)) = ( @@ -105,21 +81,18 @@ impl BucketStatsInfo { #[inline] pub fn set_bucket_stat(&mut self, buckets: Option) { - if let Some(b) = self.bucket_stat.take() { - self.last_bucket_stat = Some(b); - } - self.report_bucket_stat = buckets.clone(); - self.bucket_stat = buckets; - self.last_bucket_count = self - .bucket_stat - .as_ref() - .map_or(0, |bucket_stat| bucket_stat.meta.keys.len() - 1); - } - - #[inline] - pub fn clear_bucket_stat(&mut self) { - if let Some(bucket) = self.report_bucket_stat.as_mut() { - bucket.clear_stats(); + self.bucket_stat = buckets.clone(); + if let Some(new_buckets) = buckets { + self.last_bucket_version = new_buckets.meta.version; + let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); + if let Some(old) = &mut self.report_bucket_stat { + new_report_buckets.merge(old); + *old = new_report_buckets; + } else { + self.report_bucket_stat = Some(new_report_buckets); + } + } else { + self.report_bucket_stat = None; } } @@ -136,142 +109,163 @@ impl BucketStatsInfo { &self.bucket_stat } - #[inline] - pub fn last_bucket_count(&self) -> usize { - self.last_bucket_count - } -} - -impl Peer { - #[inline] - pub fn on_refresh_region_buckets( + pub fn on_refresh_region_buckets( &mut self, - store_ctx: &mut StoreContext, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, region_epoch: RegionEpoch, - mut buckets: Vec, + region: metapb::Region, bucket_ranges: Option>, - ) { - // bucket version layout - // term logical counter - // |-----------|-----------| - // high bits low bits - // term: given 10s election timeout, the 32 bit means 1362 year running time - let gen_bucket_version = |term, current_version| { - let current_version_term = current_version >> 32; - let bucket_version: u64 = if current_version_term == term { - current_version + 1 - } else { - if term > u32::MAX.into() { - error!( - self.logger, - "unexpected term {} more than u32::MAX. Bucket - version will be backward.", - term - ); - } - term << 32 - }; - bucket_version - }; - - let region = self.region(); - let current_version = self.region_buckets_info().version(); - let next_bucket_version = gen_bucket_version(self.term(), current_version); - let mut is_first_refresh = true; - let mut change_bucket_version = false; - let mut region_buckets: BucketStat; - + ) -> bool { + let change_bucket_version: bool; // The region buckets reset after this region happened split or merge. // The message should be dropped if it's epoch is lower than the regions. // The bucket ranges is none when the region buckets is also none. // So this condition indicates that the region buckets needs to refresh not // renew. - if let (Some(bucket_ranges), Some(peer_region_buckets)) = - (bucket_ranges, self.region_buckets_info().bucket_stat()) - { - is_first_refresh = false; + if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ assert_eq!(buckets.len(), bucket_ranges.len()); - let mut meta_idx = 0; - region_buckets = peer_region_buckets.clone(); - let mut meta = (*region_buckets.meta).clone(); - meta.region_epoch = region_epoch; - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - // the bucket ranges maybe need to split or merge not all the meta keys, so it - // needs to find the first keys. - while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { - meta_idx += 1; - } - // meta_idx can't be not the last entry (which is end key) - if meta_idx >= meta.keys.len() - 1 { - warn!( - self.logger, - "can't find the bucket key"; - "bucket_range_key" => log_wrappers::Value::key(&bucket_range.0)); - break; - } - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = store_ctx - .coprocessor_host - .cfg - .region_bucket_merge_size_ratio - * (store_ctx.coprocessor_host.cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[meta_idx] = bucket.size; - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && meta_idx != 0 - && meta.sizes[meta_idx - 1] + bucket.size - < store_ctx.coprocessor_host.cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(meta_idx); - meta.left_merge(meta_idx); - change_bucket_version = true; - continue; - } - } else { - // update size - meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - meta_idx += 1; - region_buckets.split(meta_idx); - meta.split(meta_idx, bucket_key); - change_bucket_version = true; - } - } + change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); + }else{ + change_bucket_version = true; + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); + } + change_bucket_version + } + + fn update_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + bucket_ranges: &Vec, + ) -> bool { + let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); + let mut change_bucket_version = false; + let mut meta_idx = 0; + let mut region_buckets = origin_region_buckets.clone(); + let mut meta = (*region_buckets.meta).clone(); + meta.region_epoch = region_epoch; + + // bucket stats will clean if the bucket size is updated. + for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { + // the bucket ranges maybe need to split or merge not all the meta keys, so it + // needs to find the first keys. + while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { meta_idx += 1; } - if self.region_buckets_info().last_bucket_count() != region_buckets.meta.keys.len() - 1 - { - change_bucket_version = true; + // meta_idx can't be not the last entry (which is end key) + if meta_idx >= meta.keys.len() - 1 { + break; } - if change_bucket_version { - meta.version = next_bucket_version; + // the bucket size is small and does not have split keys, + // then it should be merged with its left neighbor + let region_bucket_merge_size = + cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); + if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { + meta.sizes[meta_idx] = bucket.size; + region_buckets.clean_stats(meta_idx); + // the region has more than one bucket + // and the left neighbor + current bucket size is not very big + if meta.keys.len() > 2 + && meta_idx != 0 + && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 + { + // bucket is too small + region_buckets.left_merge(meta_idx); + meta.left_merge(meta_idx); + change_bucket_version = true; + continue; + } + } else { + // update size + meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; + region_buckets.clean_stats(meta_idx); + // insert new bucket keys (split the original bucket) + for bucket_key in bucket.keys { + meta_idx += 1; + region_buckets.split(meta_idx); + meta.split(meta_idx, bucket_key); + change_bucket_version = true; + } } - region_buckets.meta = Arc::new(meta); - } else { - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - assert_eq!(buckets.len(), 1); - change_bucket_version = true; - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - let mut meta = BucketMeta { - region_id: self.region_id(), - region_epoch, - version: next_bucket_version, - keys: bucket_keys, - sizes: vec![store_ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], - }; - // padding the boundary keys and initialize the flow. - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - region_buckets = BucketStat::from_meta(Arc::new(meta)); + meta_idx += 1; + } + if change_bucket_version { + meta.version = next_bucket_version; } + region_buckets.meta = Arc::new(meta); + self.set_bucket_stat(Some(region_buckets)); + change_bucket_version + } + + fn init_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + mut buckets: Vec, + region_epoch: RegionEpoch, + region: metapb::Region, + ) { + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + assert_eq!(buckets.len(), 1); + let bucket_keys = buckets.pop().unwrap().keys; + let bucket_count = bucket_keys.len() + 1; + let mut meta = BucketMeta { + region_id: region.get_id(), + region_epoch, + version: next_bucket_version, + keys: bucket_keys, + sizes: vec![cfg.region_bucket_size.0; bucket_count], + }; + // padding the boundary keys and initialize the flow. + meta.keys.insert(0, region.get_start_key().to_vec()); + meta.keys.push(region.get_end_key().to_vec()); + let bucket_stats = BucketStat::from_meta(Arc::new(meta)); + self.set_bucket_stat(Some(bucket_stats)); + } +} +impl Peer { + #[inline] + pub fn on_refresh_region_buckets( + &mut self, + store_ctx: &mut StoreContext, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ) { + if self.term() > u32::MAX.into() { + error!( + self.logger, + "unexpected term {} more than u32::MAX. Bucket version will be backward.", + self.term() + ); + } + + let current_version = self.region_buckets_info().version(); + let next_bucket_version = util::gen_bucket_version(self.term(), current_version); + // let mut is_first_refresh = true; + let region = self.region().clone(); + let change_bucket_version = self.region_buckets_info_mut().on_refresh_region_buckets( + &store_ctx.coprocessor_host.cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + let region_buckets = self + .region_buckets_info() + .bucket_stat() + .as_ref() + .unwrap() + .clone(); let buckets_count = region_buckets.meta.keys.len() - 1; if change_bucket_version { // TODO: we may need to make it debug once the coprocessor timeout is resolved. @@ -281,17 +275,18 @@ impl Peer { "bucket_version" => next_bucket_version, "buckets_count" => buckets_count, "estimated_region_size" => region_buckets.meta.total_size(), - "first_refresh" => is_first_refresh, ); + } else { + // it means the buckets key range not any change, so don't need to refresh. + return; } + store_ctx.coprocessor_host.on_region_changed( - region, + self.region(), RegionChangeEvent::UpdateBuckets(buckets_count), self.state_role(), ); let meta = region_buckets.meta.clone(); - self.region_buckets_info_mut() - .set_bucket_stat(Some(region_buckets.clone())); { let mut store_meta = store_ctx.store_meta.lock().unwrap(); if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { @@ -302,13 +297,13 @@ impl Peer { if let Some(apply_scheduler) = self.apply_scheduler() { apply_scheduler.send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } + if !self.is_leader() { + return; + } let version = region_buckets.meta.version; let keys = region_buckets.meta.keys.clone(); // Notify followers to flush their relevant memtables let peers = self.region().get_peers().to_vec(); - if !self.is_leader() { - return; - } for p in peers { if p == *self.peer() || p.is_witness { continue; @@ -397,9 +392,9 @@ impl Peer { if !ctx.coprocessor_host.cfg.enable_region_bucket() { return None; } - let bucket_update_diff_size_threshold = ctx.coprocessor_host.cfg.region_bucket_size.0 / 2; + let region_bucket_max_size = ctx.coprocessor_host.cfg.region_bucket_size.0 * 2; self.region_buckets_info() - .gen_bucket_range_for_update(bucket_update_diff_size_threshold) + .gen_bucket_range_for_update(region_bucket_max_size) } } @@ -448,3 +443,178 @@ where self.schedule_tick(PeerTick::ReportBuckets); } } + +#[cfg(test)] +mod tests { + use super::*; + + // create BucketStatsInfo include three keys: ["","100","200",""]. + fn mock_bucket_stats_info() -> BucketStatsInfo { + let mut bucket_stats_info = BucketStatsInfo::default(); + let cfg = Config::default(); + let next_bucket_version = 1; + let bucket_ranges = None; + let mut region_epoch = RegionEpoch::default(); + region_epoch.set_conf_ver(1); + region_epoch.set_version(1); + let mut region = metapb::Region::default(); + region.set_id(1); + + let mut buckets = vec![]; + let mut bucket = Bucket::default(); + bucket.keys.push(vec![100]); + bucket.keys.push(vec![200]); + buckets.insert(0, bucket); + + let _ = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + bucket_stats_info + } + + #[test] + pub fn test_version() { + let mut bucket_stats_info = mock_bucket_stats_info(); + assert_eq!(1, bucket_stats_info.version()); + bucket_stats_info.set_bucket_stat(None); + assert_eq!(1, bucket_stats_info.version()); + + let mut meta = BucketMeta::default(); + meta.version = 2; + meta.keys.push(vec![]); + meta.keys.push(vec![]); + let bucket_stat = BucketStat::from_meta(Arc::new(meta)); + bucket_stats_info.set_bucket_stat(Some(bucket_stat)); + assert_eq!(2, bucket_stats_info.version()); + } + + #[test] + pub fn test_insert_new_buckets() { + let bucket_stats_info = mock_bucket_stats_info(); + + let cfg = Config::default(); + let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stat.meta.keys + ); + for i in 0..bucket_stat.stats.write_bytes.len() { + assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); + assert_eq!(0, bucket_stat.stats.write_bytes[i]); + } + } + + #[test] + pub fn test_report_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); + let bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + } + + #[test] + pub fn test_spilt_and_merge_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let next_bucket_version = 2; + let mut region = metapb::Region::default(); + region.set_id(1); + let cfg = Config::default(); + let bucket_size = cfg.region_bucket_size.0; + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let region_epoch = bucket_stats.meta.region_epoch.clone(); + + // step1: update buckets flow + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step2: tick not affect anything + let bucket_ranges = Some(vec![]); + let buckets = vec![]; + let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch.clone(), + region.clone(), + bucket_ranges, + ); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert!(!change_bucket_version); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step3: split key 50 + let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![vec![50]]; + bucket.size = bucket_size; + let mut buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets.clone(), + region_epoch.clone(), + region.clone(), + bucket_ranges.clone(), + ); + assert!(change_bucket_version); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![50], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); + + // step4: merge [50-100] to [0-50], + bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![]; + bucket.size = 0; + buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + assert!(change_bucket_version); + + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); + + // report buckets doesn't be affected by the split and merge. + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); + } +} diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 061a5ad51262..77915dd0378e 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -57,7 +57,6 @@ pub enum Task { }, // In region.rs. RegionHeartbeat(RegionHeartbeatTask), - ReportRegionBuckets(BucketStat), UpdateReadStats(ReadStats), UpdateWriteStats(WriteStats), UpdateRegionCpuRecords(Arc), @@ -85,6 +84,7 @@ pub enum Task { initial_status: u64, txn_ext: Arc, }, + // BucketStat is the delta write flow of the bucket. ReportBuckets(BucketStat), ReportMinResolvedTs { store_id: u64, @@ -123,7 +123,6 @@ impl Display for Task { hb_task.region, hb_task.peer.get_id(), ), - Task::ReportRegionBuckets(ref buckets) => write!(f, "report buckets: {:?}", buckets), Task::UpdateReadStats(ref stats) => { write!(f, "update read stats: {stats:?}") } @@ -314,7 +313,6 @@ where write_io_rates, } => self.handle_update_store_infos(cpu_usages, read_io_rates, write_io_rates), Task::RegionHeartbeat(task) => self.handle_region_heartbeat(task), - Task::ReportRegionBuckets(buckets) => self.handle_report_region_buckets(buckets), Task::UpdateReadStats(stats) => self.handle_update_read_stats(stats), Task::UpdateWriteStats(stats) => self.handle_update_write_stats(stats), Task::UpdateRegionCpuRecords(records) => self.handle_update_region_cpu_records(records), @@ -341,7 +339,7 @@ where initial_status, txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), - Task::ReportBuckets(buckets) => self.handle_report_region_buckets(buckets), + Task::ReportBuckets(delta_buckets) => self.handle_report_region_buckets(delta_buckets), Task::ReportMinResolvedTs { store_id, min_resolved_ts, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 763e12fff072..d3ef54bd75a9 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -339,9 +339,9 @@ where self.is_hb_receiver_scheduled = true; } - pub fn handle_report_region_buckets(&mut self, region_buckets: BucketStat) { - let region_id = region_buckets.meta.region_id; - self.merge_buckets(region_buckets); + pub fn handle_report_region_buckets(&mut self, delta_buckets: BucketStat) { + let region_id = delta_buckets.meta.region_id; + self.merge_buckets(delta_buckets); let report_buckets = self.region_buckets.get_mut(®ion_id).unwrap(); let last_report_ts = if report_buckets.last_report_ts.is_zero() { self.start_ts @@ -388,8 +388,8 @@ where .engine_total_query_num .add_query_stats(®ion_info.query_stats.0); } - for (_, region_buckets) in std::mem::take(&mut stats.region_buckets) { - self.merge_buckets(region_buckets); + for (_, delta_buckets) in std::mem::take(&mut stats.region_buckets) { + self.merge_buckets(delta_buckets); } if !stats.region_infos.is_empty() { self.stats_monitor.maybe_send_read_stats(stats); @@ -424,18 +424,18 @@ where } } - fn merge_buckets(&mut self, mut buckets: BucketStat) { - let region_id = buckets.meta.region_id; + fn merge_buckets(&mut self, mut delta: BucketStat) { + let region_id = delta.meta.region_id; self.region_buckets .entry(region_id) .and_modify(|report_bucket| { let current = &mut report_bucket.current_stat; - if current.meta < buckets.meta { - std::mem::swap(current, &mut buckets); + if current.meta < delta.meta { + std::mem::swap(current, &mut delta); } - current.merge(&buckets); + current.merge(&delta); }) - .or_insert_with(|| ReportBucket::new(buckets)); + .or_insert_with(|| ReportBucket::new(delta)); } fn calculate_region_cpu_records( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 513e9c0636a3..b6d7f8fcfcc3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5970,27 +5970,6 @@ where } }; - // bucket version layout - // term logical counter - // |-----------|-----------| - // high bits low bits - // term: given 10s election timeout, the 32 bit means 1362 year running time - let gen_bucket_version = |term, current_version| { - let current_version_term = current_version >> 32; - let bucket_version: u64 = if current_version_term == term { - current_version + 1 - } else { - if term > u32::MAX.into() { - error!( - "unexpected term {} more than u32::MAX. Bucket version will be backward.", - term - ); - } - term << 32 - }; - bucket_version - }; - let region = self.fsm.peer.region(); if util::is_epoch_stale(®ion_epoch, region.get_region_epoch()) { info!( @@ -6042,7 +6021,7 @@ where region_buckets = self.fsm.peer.region_buckets.clone().unwrap(); let mut meta = (*region_buckets.meta).clone(); if !buckets.is_empty() { - meta.version = gen_bucket_version(self.fsm.peer.term(), current_version); + meta.version = util::gen_bucket_version(self.fsm.peer.term(), current_version); } meta.region_epoch = region_epoch; for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { @@ -6096,7 +6075,7 @@ where let mut meta = BucketMeta { region_id: self.fsm.region_id(), region_epoch, - version: gen_bucket_version(self.fsm.peer.term(), current_version), + version: util::gen_bucket_version(self.fsm.peer.term(), current_version), keys: bucket_keys, sizes: vec![self.ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], }; diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 519d486102c4..d9076a67d8a9 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -160,6 +160,20 @@ pub fn new_empty_snapshot( snapshot } +pub fn gen_bucket_version(term: u64, current_version: u64) -> u64 { + // term logical counter + // |-----------|-----------| + // high bits low bits + // term: given 10s election timeout, the 32 bit means 1362 year running time + let current_version_term = current_version >> 32; + let bucket_version: u64 = if current_version_term == term { + current_version + 1 + } else { + term << 32 + }; + bucket_version +} + const STR_CONF_CHANGE_ADD_NODE: &str = "AddNode"; const STR_CONF_CHANGE_REMOVE_NODE: &str = "RemoveNode"; const STR_CONF_CHANGE_ADDLEARNER_NODE: &str = "AddLearner"; From 64d2129a0c21bc1e8521c38dd144a327baa88965 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 10 Oct 2023 13:01:53 +0800 Subject: [PATCH 080/203] config: set a longer rocksdb io limiter smooth window for raft-v2 (#15734) ref tikv/tikv#11470 Signed-off-by: glorv --- Cargo.lock | 6 +++--- cmd/tikv-ctl/src/main.rs | 2 +- src/config/mod.rs | 21 ++++++++++++++++++--- src/server/engine_factory.rs | 2 +- tests/integrations/storage/test_titan.rs | 4 +++- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a10755f5a7f1..c221af119e98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2998,7 +2998,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3017,7 +3017,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "bzip2-sys", "cc", @@ -4936,7 +4936,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6baa1fe6c39d..df17e81f1ef4 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1048,7 +1048,7 @@ fn build_rocks_opts(cfg: &TikvConfig) -> engine_rocks::RocksDbOptions { .unwrap() .map(Arc::new); let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); - let resource = cfg.rocksdb.build_resources(env); + let resource = cfg.rocksdb.build_resources(env, cfg.storage.engine); cfg.rocksdb.build_opt(&resource, cfg.storage.engine) } diff --git a/src/config/mod.rs b/src/config/mod.rs index d18d6f8cda0f..911308809c6d 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1412,14 +1412,25 @@ impl DbConfig { } } - pub fn build_resources(&self, env: Arc) -> DbResources { + pub fn build_resources(&self, env: Arc, engine: EngineType) -> DbResources { let rate_limiter = if self.rate_bytes_per_sec.0 > 0 { + // for raft-v2, we use a longer window to make the compaction io smoother + let (tune_per_secs, window_size, recent_size) = match engine { + // 1s tune duraion, long term window is 5m, short term window is 30s. + // this is the default settings. + EngineType::RaftKv => (1, 300, 30), + // 5s tune duraion, long term window is 1h, short term window is 5m + EngineType::RaftKv2 => (5, 720, 60), + }; Some(Arc::new(RateLimiter::new_writeampbased_with_auto_tuned( self.rate_bytes_per_sec.0 as i64, (self.rate_limiter_refill_period.as_millis() * 1000) as i64, 10, // fairness self.rate_limiter_mode, self.rate_limiter_auto_tuned, + tune_per_secs, + window_size, + recent_size, ))) } else { None @@ -4844,7 +4855,9 @@ mod tests { fn test_rocks_rate_limit_zero() { let mut tikv_cfg = TikvConfig::default(); tikv_cfg.rocksdb.rate_bytes_per_sec = ReadableSize(0); - let resource = tikv_cfg.rocksdb.build_resources(Arc::new(Env::default())); + let resource = tikv_cfg + .rocksdb + .build_resources(Arc::new(Env::default()), tikv_cfg.storage.engine); tikv_cfg .rocksdb .build_opt(&resource, tikv_cfg.storage.engine); @@ -5008,7 +5021,9 @@ mod tests { Arc, ) { assert_eq!(F::TAG, cfg.storage.api_version()); - let resource = cfg.rocksdb.build_resources(Arc::default()); + let resource = cfg + .rocksdb + .build_resources(Arc::default(), cfg.storage.engine); let engine = RocksDBEngine::new( &cfg.storage.data_dir, Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 85de282b1377..3593c01ca7fb 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -56,7 +56,7 @@ impl KvEngineFactoryBuilder { flow_listener: None, sst_recovery_sender: None, encryption_key_manager: key_manager, - db_resources: config.rocksdb.build_resources(env), + db_resources: config.rocksdb.build_resources(env, config.storage.engine), cf_resources: config.rocksdb.build_cf_resources(cache), state_storage: None, lite: false, diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 9c3eeec0c831..4bb8fee40878 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -159,7 +159,9 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); - let resource = cfg.rocksdb.build_resources(Default::default()); + let resource = cfg + .rocksdb + .build_resources(Default::default(), cfg.storage.engine); let kv_db_opts = cfg.rocksdb.build_opt(&resource, cfg.storage.engine); let kv_cfs_opts = cfg.rocksdb.build_cf_opts( &cfg.rocksdb.build_cf_resources(cache), From 905e8bffbee3a289198b31de70e418c101f3be78 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 10 Oct 2023 14:01:54 +0800 Subject: [PATCH 081/203] raftstore: disable region bucket for raftstore v1 by default (#15740) ref tikv/tikv#15719 disable region bucket for raftstore v1 by default Signed-off-by: SpadeA-Tang --- .../raftstore/src/coprocessor/config.rs | 21 ++++++++++--------- src/config/mod.rs | 11 +++++++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index e1246e8d59d8..b1dc3830bbb3 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -168,7 +168,7 @@ impl Config { Ok(()) } - pub fn validate(&mut self) -> Result<()> { + pub fn validate(&mut self, raft_kv_v2: bool) -> Result<()> { if self.region_split_keys.is_none() { self.region_split_keys = Some((self.region_split_size().as_mb_f64() * 10000.0) as u64); } @@ -199,8 +199,9 @@ impl Config { None => self.region_max_keys = Some(self.region_split_keys() / 2 * 3), } let res = self.validate_bucket_size(); - // If it's OK to enable bucket, we will prefer to enable it if useful. - if let Ok(()) = res && self.enable_region_bucket.is_none() { + // If it's OK to enable bucket, we will prefer to enable it if useful for + // raftstore-v2. + if let Ok(()) = res && self.enable_region_bucket.is_none() && raft_kv_v2 { let useful = self.region_split_size() >= self.region_bucket_size * 2; self.enable_region_bucket = Some(useful); } else if let Err(e) = res && self.enable_region_bucket() { @@ -237,39 +238,39 @@ mod tests { #[test] fn test_config_validate() { let mut cfg = Config::default(); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); cfg = Config::default(); cfg.region_max_size = Some(ReadableSize(10)); cfg.region_split_size = Some(ReadableSize(20)); - cfg.validate().unwrap_err(); + cfg.validate(false).unwrap_err(); cfg = Config::default(); cfg.region_max_size = None; cfg.region_split_size = Some(ReadableSize(20)); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_max_size, Some(ReadableSize(30))); cfg = Config::default(); cfg.region_max_keys = Some(10); cfg.region_split_keys = Some(20); - cfg.validate().unwrap_err(); + cfg.validate(false).unwrap_err(); cfg = Config::default(); cfg.region_max_keys = None; cfg.region_split_keys = Some(20); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_max_keys, Some(30)); cfg = Config::default(); cfg.enable_region_bucket = Some(false); cfg.region_split_size = Some(ReadableSize(20)); cfg.region_bucket_size = ReadableSize(30); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); cfg = Config::default(); cfg.region_split_size = Some(ReadableSize::mb(20)); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_split_keys, Some(200000)); } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 911308809c6d..0eb006363f0a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3725,7 +3725,8 @@ impl TikvConfig { self.raft_engine.validate()?; self.server.validate()?; self.pd.validate()?; - self.coprocessor.validate()?; + self.coprocessor + .validate(self.storage.engine == EngineType::RaftKv2)?; self.raft_store.validate( self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket(), @@ -6238,21 +6239,25 @@ mod tests { let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); default_cfg.coprocessor.optimize_for(false); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.coprocessor.validate(false).unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(!default_cfg.coprocessor.enable_region_bucket()); + default_cfg.coprocessor.validate(true).unwrap(); assert!(default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); default_cfg.coprocessor.optimize_for(true); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.coprocessor.validate(false).unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(!default_cfg.coprocessor.enable_region_bucket()); + default_cfg.coprocessor.validate(true).unwrap(); assert!(default_cfg.coprocessor.enable_region_bucket()); } From 88aaaa3e7b1e194d389fee6a9831f7491d7f9acd Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 10 Oct 2023 05:18:25 -0500 Subject: [PATCH 082/203] status_server: Add symbol service to support remote fetching symbolized heap profile (#15695) close tikv/tikv#15732 Jeprof supports generating the svg by remote fetching, so we can add a symbol service following the [pprof format](https://gperftools.github.io/gperftools/pprof_remote_servers.html), then with ` jeprof --show_bytes http://:20180/debug/pprof/heap --svg` it can simply get the heap profiling svg from remote. With this PR, we can get rid of the limitation that the heap profile must be processed with the corresponding tikv binary and perl runtime which is used by `jeprof`. Later, we only need to install `jeprof` and `perl` in tidb_dashboard environment and collect the heap profile just like how CPU profile does. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 22 +++--- src/server/status_server/mod.rs | 136 ++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c221af119e98..fccff7d7822d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,9 +89,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.26" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "api_version" @@ -777,9 +777,9 @@ checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" @@ -3168,9 +3168,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.5.3" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" dependencies = [ "libc 0.2.146", ] @@ -5834,7 +5834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac457d054f793cedfde6f32d21d692b8351cfec9084fefd0470c0373f6d799bc" dependencies = [ "debugid", - "memmap2 0.5.3", + "memmap2 0.5.10", "stable_deref_trait", "uuid 1.2.1", ] @@ -7237,9 +7237,13 @@ dependencies = [ [[package]] name = "twox-hash" -version = "1.5.0" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bfd5b7557925ce778ff9b9ef90e3ade34c524b5ff10e239c69a42d546d2af56" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if 1.0.0", + "static_assertions", +] [[package]] name = "txn_types" diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 98077d9e93f6..3e68b0b6310d 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -3,6 +3,7 @@ /// Provides profilers for TiKV. mod profile; use std::{ + env::args, error::Error as StdError, net::SocketAddr, path::PathBuf, @@ -308,6 +309,83 @@ where }) } + async fn get_cmdline(_req: Request) -> hyper::Result> { + let args = args().into_iter().fold(String::new(), |mut a, b| { + a.push_str(&b); + a.push('\x00'); + a + }); + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .body(args.into()) + .unwrap(); + Ok(response) + } + + async fn get_symbol_count(req: Request) -> hyper::Result> { + assert_eq!(req.method(), Method::GET); + // We don't know how many symbols we have, but we + // do have symbol information. pprof only cares whether + // this number is 0 (no symbols available) or > 0. + let text = "num_symbols: 1\n"; + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .header("Content-Length", text.len()) + .body(text.into()) + .unwrap(); + Ok(response) + } + + // The request and response format follows pprof remote server + // https://gperftools.github.io/gperftools/pprof_remote_servers.html + // Here is the go pprof implementation: + // https://github.com/golang/go/blob/3857a89e7eb872fa22d569e70b7e076bec74ebbb/src/net/http/pprof/pprof.go#L191 + async fn get_symbol(req: Request) -> hyper::Result> { + assert_eq!(req.method(), Method::POST); + let mut text = String::new(); + let body_bytes = hyper::body::to_bytes(req.into_body()).await?; + let body = String::from_utf8(body_bytes.to_vec()).unwrap(); + + // The request body is a list of addr to be resolved joined by '+'. + // Resolve addrs with addr2line and write the symbols each per line in + // response. + for pc in body.split('+') { + let addr = usize::from_str_radix(pc.trim_start_matches("0x"), 16).unwrap_or(0); + if addr == 0 { + info!("invalid addr: {}", addr); + continue; + } + + // Would be multiple symbols if inlined. + let mut syms = vec![]; + backtrace::resolve(addr as *mut std::ffi::c_void, |sym| { + let name = sym + .name() + .unwrap_or_else(|| backtrace::SymbolName::new(b"")); + syms.push(name.to_string()); + }); + + if !syms.is_empty() { + // join inline functions with '--' + let f = syms.join("--"); + // should be + text.push_str(format!("{:#x} {}\n", addr, f).as_str()); + } else { + info!("can't resolve mapped addr: {:#x}", addr); + text.push_str(format!("{:#x} ??\n", addr).as_str()); + } + } + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .header("Content-Length", text.len()) + .body(text.into()) + .unwrap(); + Ok(response) + } + async fn update_config( cfg_controller: ConfigController, req: Request, @@ -693,6 +771,11 @@ where (Method::GET, "/debug/pprof/heap") => { Self::dump_heap_prof_to_resp(req).await } + (Method::GET, "/debug/pprof/cmdline") => Self::get_cmdline(req).await, + (Method::GET, "/debug/pprof/symbol") => { + Self::get_symbol_count(req).await + } + (Method::POST, "/debug/pprof/symbol") => Self::get_symbol(req).await, (Method::GET, "/config") => { Self::get_config(req, &cfg_controller).await } @@ -1658,6 +1741,59 @@ mod tests { status_server.stop(); } + #[test] + fn test_pprof_symbol_service() { + let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut status_server = StatusServer::new( + 1, + ConfigController::default(), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + GrpcServiceManager::dummy(), + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + let client = Client::new(); + + let mut addr = None; + backtrace::trace(|f| { + addr = Some(f.ip()); + false + }); + assert!(addr.is_some()); + + let uri = Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/debug/pprof/symbol") + .build() + .unwrap(); + let req = Request::builder() + .method(Method::POST) + .uri(uri) + .body(Body::from(format!("{:p}", addr.unwrap()))) + .unwrap(); + let handle = status_server + .thread_pool + .spawn(async move { client.request(req).await.unwrap() }); + let resp = block_on(handle).unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body_bytes = block_on(hyper::body::to_bytes(resp.into_body())).unwrap(); + assert!( + String::from_utf8(body_bytes.as_ref().to_owned()) + .unwrap() + .split(' ') + .last() + .unwrap() + .starts_with("backtrace::backtrace") + ); + status_server.stop(); + } + #[test] fn test_metrics() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); From 262845cefc4810aa8bdcdc7ec18fa3d4469547de Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 11 Oct 2023 13:27:24 +0800 Subject: [PATCH 083/203] raftstore-v2: support to make protection when disk full. (#15558) close tikv/tikv#15170 This pr is used to protect `raftstore-v2` when disk full. And all checking and validation is transplant from `raftstore`. --- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/peer.rs | 1 + .../operation/command/admin/merge/prepare.rs | 36 +- .../src/operation/command/admin/mod.rs | 57 ++- .../src/operation/command/admin/split.rs | 16 + .../command/admin/transfer_leader.rs | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 6 + .../src/operation/command/write/mod.rs | 10 +- components/raftstore-v2/src/operation/life.rs | 326 +++++++++++++- components/raftstore-v2/src/operation/pd.rs | 2 +- .../raftstore-v2/src/operation/query/lease.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 75 +++- .../raftstore-v2/src/operation/txn_ext.rs | 16 +- components/raftstore-v2/src/raft/peer.rs | 21 +- components/raftstore-v2/src/router/message.rs | 11 + components/raftstore/src/store/mod.rs | 4 +- components/raftstore/src/store/peer.rs | 9 + components/test_raftstore-v2/src/cluster.rs | 31 +- components/test_raftstore-v2/src/util.rs | 111 ++++- src/server/raftkv2/mod.rs | 1 + tests/failpoints/cases/test_disk_full.rs | 401 +++++++++--------- .../integrations/raftstore/test_stale_read.rs | 2 +- 22 files changed, 897 insertions(+), 251 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index cd5ae8f42f79..5ed84c709371 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -47,7 +47,7 @@ use tikv_util::{ box_err, config::{Tracker, VersionTrack}, log::SlogFormat, - sys::SysQuota, + sys::{disk::get_disk_status, SysQuota}, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, Limiter}, timer::{SteadyTimer, GLOBAL_TIMER_HANDLE}, worker::{Builder, LazyWorker, Scheduler, Worker}, @@ -104,6 +104,10 @@ pub struct StoreContext { /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, + // TODO: how to remove offlined stores? + /// Disk usage for other stores. The store itself is not included. + /// Only contains items which is not `DiskUsage::Normal`. + pub store_disk_usages: HashMap, pub snap_mgr: TabletSnapManager, pub global_stat: GlobalStoreStat, @@ -228,6 +232,7 @@ impl PollHandler PeerFsmDelegate<'a, EK, ER, write.header, write.data, write.ch, + Some(write.disk_full_opt), ); } PeerMsg::UnsafeWrite(write) => { diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 6ff982eea8cb..4a5875f70975 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -219,22 +219,7 @@ impl Peer { if r.is_ok() { self.proposal_control_mut().set_pending_prepare_merge(false); } else { - // Match v1::post_propose_fail. - // If we just failed to propose PrepareMerge, the pessimistic locks status - // may become MergingRegion incorrectly. So, we have to revert it here. - // Note: The `is_merging` check from v1 is removed because proposed - // `PrepareMerge` rejects all writes (in `ProposalControl::check_conflict`). - assert!( - !self.proposal_control().is_merging(), - "{}", - SlogFormat(&self.logger) - ); - self.take_merge_context(); - self.proposal_control_mut().set_pending_prepare_merge(false); - let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); - if pessimistic_locks.status == LocksStatus::MergingRegion { - pessimistic_locks.status = LocksStatus::Normal; - } + self.post_prepare_merge_fail(); } r } @@ -707,6 +692,25 @@ impl Peer { self.propose(store_ctx, cmd.write_to_bytes().unwrap())?; Ok(()) } + + pub fn post_prepare_merge_fail(&mut self) { + // Match v1::post_propose_fail. + // If we just failed to propose PrepareMerge, the pessimistic locks status + // may become MergingRegion incorrectly. So, we have to revert it here. + // Note: The `is_merging` check from v1 is removed because proposed + // `PrepareMerge` rejects all writes (in `ProposalControl::check_conflict`). + assert!( + !self.proposal_control().is_merging(), + "{}", + SlogFormat(&self.logger) + ); + self.take_merge_context(); + self.proposal_control_mut().set_pending_prepare_merge(false); + let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); + if pessimistic_locks.status == LocksStatus::MergingRegion { + pessimistic_locks.status = LocksStatus::Normal; + } + } } impl Apply { diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index d59a564c696d..9d7fee55ae4e 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -12,6 +12,7 @@ use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ + kvrpcpb::DiskFullOpt, metapb::{PeerRole, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessageType, FlushMemtable, RaftMessage}, @@ -33,13 +34,13 @@ use raftstore::{ }, Error, }; -use slog::{error, info}; +use slog::{debug, error, info}; use split::SplitResult; pub use split::{ report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SplitPendingAppend, SPLIT_PREFIX, }; -use tikv_util::{box_err, log::SlogFormat, slog_panic}; +use tikv_util::{box_err, log::SlogFormat, slog_panic, sys::disk::DiskUsage}; use txn_types::WriteBatchFlags; use self::flashback::FlashbackResult; @@ -103,6 +104,18 @@ impl Peer { let pre_transfer_leader = cmd_type == AdminCmdType::TransferLeader && !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL); + let is_conf_change = apply::is_conf_change_cmd(&req); + + // Check whether the admin request can be proposed when disk full. + let can_skip_check = is_transfer_leader || pre_transfer_leader || is_conf_change; + if !can_skip_check && let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + self.post_propose_fail(cmd_type); + return; + } // The admin request is rejected because it may need to update epoch checker // which introduces an uncertainty and may breaks the correctness of epoch @@ -134,9 +147,11 @@ impl Peer { ch.report_error(resp); return; } + // Prepare Merge need to be broadcast to as many as followers when disk full. + self.on_prepare_merge(cmd_type, ctx); // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); - let res = if apply::is_conf_change_cmd(&req) { + let res = if is_conf_change { self.propose_conf_change(ctx, req) } else { // propose other admin command. @@ -258,6 +273,42 @@ impl Peer { self.post_propose_command(ctx, res, vec![ch], true); } + fn on_prepare_merge( + &mut self, + cmd_type: AdminCmdType, + ctx: &StoreContext, + ) { + let is_merge_cmd = + cmd_type == AdminCmdType::PrepareMerge || cmd_type == AdminCmdType::RollbackMerge; + let has_disk_full_peers = self.abnormal_peer_context().disk_full_peers().is_empty(); + let proposal_index = self.next_proposal_index(); + if is_merge_cmd + && (!matches!(ctx.self_disk_usage, DiskUsage::Normal) || !has_disk_full_peers) + { + self.has_region_merge_proposal = true; + self.region_merge_proposal_index = proposal_index; + let mut peers = vec![]; + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .iter_mut() + .for_each(|(k, v)| { + if !matches!(v.0, DiskUsage::AlreadyFull) { + v.1 = true; + peers.push(*k); + } + }); + debug!( + self.logger, + "adjust max inflight msgs"; + "cmd_type" => ?cmd_type, + "raft_max_inflight_msgs" => ctx.cfg.raft_max_inflight_msgs, + "region" => self.region_id() + ); + self.adjust_peers_max_inflight_msgs(&peers, ctx.cfg.raft_max_inflight_msgs); + } + } + fn start_pre_flush( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0f9cae7218df..cfbd7678c171 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -35,6 +35,7 @@ use engine_traits::{ use fail::fail_point; use futures::channel::oneshot; use kvproto::{ + kvrpcpb::DiskFullOpt, metapb::{self, Region, RegionEpoch}, pdpb::CheckPolicy, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, @@ -332,6 +333,14 @@ impl Peer { )))); return; } + // Check whether the admin request can be proposed when disk full. + if let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + info!(self.logger, "disk is full, skip split"; "err" => ?e); + ch.set_result(cmd_resp::new_error(e)); + return; + } if let Err(e) = util::validate_split_region( self.region_id(), self.peer_id(), @@ -365,6 +374,13 @@ impl Peer { info!(self.logger, "not leader, skip."); return; } + // Check whether the admin request can be proposed when disk full. + if let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + info!(self.logger, "disk is full, skip half split"; "err" => ?e); + return; + } let region = self.region(); if util::is_epoch_stale(&rhs.epoch, region.get_region_epoch()) { diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 4cdeba3bc411..bf9cb426255d 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -118,7 +118,7 @@ impl Peer { transferee } - fn pre_transfer_leader(&mut self, peer: &metapb::Peer) -> bool { + pub fn pre_transfer_leader(&mut self, peer: &metapb::Peer) -> bool { if self.raft_group().raft.has_pending_conf() { info!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index e579d22c6da0..70cdbfda237d 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -481,6 +481,12 @@ impl Peer { } self.check_unsafe_recovery_state(ctx); } + + pub fn post_propose_fail(&mut self, cmd_type: AdminCmdType) { + if cmd_type == AdminCmdType::PrepareMerge { + self.post_prepare_merge_fail(); + } + } } #[derive(Debug)] diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index a9d8bd664fe3..6eacc75c0f1a 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -5,7 +5,7 @@ use engine_traits::{ }; use fail::fail_point; use futures::channel::oneshot; -use kvproto::raft_cmdpb::RaftRequestHeader; +use kvproto::{kvrpcpb::DiskFullOpt, raft_cmdpb::RaftRequestHeader}; use raftstore::{ store::{ cmd_resp, @@ -42,6 +42,7 @@ impl Peer { header: Box, data: SimpleWriteBinary, ch: CmdResChannel, + disk_full_opt: Option, ) { if !self.serving() { apply::notify_req_region_removed(self.region_id(), ch); @@ -59,6 +60,13 @@ impl Peer { ch.report_error(resp); return; } + // Check whether the write request can be proposed with the given disk full + // option. + if let Some(opt) = disk_full_opt && let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opt) { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); if let Some(conflict) = self.proposal_control_mut().check_conflict(None) { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 84bded8a9bbb..5828a7bb661b 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -26,28 +26,34 @@ //! `merged_records`, to avoid race between destroy and merge, leader needs to //! ask target peer to destroy source peer. -use std::{cmp, mem}; +use std::{cmp, collections::HashSet, mem}; use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ - metapb::{self, Region}, + kvrpcpb::DiskFullOpt, + metapb::{self, PeerRole, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore::store::{ - fsm::{ - apply, - life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, - Proposal, +use raft::eraftpb::MessageType; +use raftstore::{ + store::{ + fsm::{ + apply, + life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, + Proposal, + }, + metrics::RAFT_PEER_PENDING_DURATION, + util, DiskFullPeers, Transport, WriteTask, }, - metrics::RAFT_PEER_PENDING_DURATION, - util, Transport, WriteTask, + Error, Result, }; use slog::{debug, error, info, warn}; use tikv_util::{ store::find_peer, + sys::disk::DiskUsage, time::{duration_to_sec, Instant}, }; @@ -126,16 +132,22 @@ pub struct AbnormalPeerContext { pending_peers: Vec<(u64, Instant)>, /// A inaccurate cache about which peer is marked as down. down_peers: Vec, + // disk full peer set. + disk_full_peers: DiskFullPeers, + // show whether an already disk full TiKV appears in the potential majority set. + dangerous_majority_set: bool, } impl AbnormalPeerContext { #[inline] pub fn is_empty(&self) -> bool { - self.pending_peers.is_empty() && self.down_peers.is_empty() + self.pending_peers.is_empty() && self.down_peers.is_empty() /* && self.disk_full_peers.is_empty() */ } #[inline] pub fn reset(&mut self) { + // No need to refresh disk_full_peers as it will be refreshed + // automatically when the disk usage updated. self.pending_peers.clear(); self.down_peers.clear(); } @@ -174,6 +186,26 @@ impl AbnormalPeerContext { RAFT_PEER_PENDING_DURATION.observe(elapsed); }); } + + #[inline] + pub fn disk_full_peers(&self) -> &DiskFullPeers { + &self.disk_full_peers + } + + #[inline] + pub fn disk_full_peers_mut(&mut self) -> &mut DiskFullPeers { + &mut self.disk_full_peers + } + + #[inline] + pub fn is_dangerous_majority_set(&self) -> bool { + self.dangerous_majority_set + } + + #[inline] + pub fn setup_dangerous_majority_set(&mut self, is_dangerous: bool) { + self.dangerous_majority_set = is_dangerous; + } } #[derive(Default)] @@ -415,6 +447,20 @@ impl Store { ctx.raft_metrics.message_dropped.stale_msg.inc(); return false; } + // Check whether this message should be dropped when disk full. + let msg_type = msg.get_message().get_msg_type(); + if matches!(ctx.self_disk_usage, DiskUsage::AlreadyFull) + && MessageType::MsgTimeoutNow == msg_type + { + debug!( + self.logger(), + "skip {:?} because of disk full", msg_type; + "region_id" => region_id, "peer_id" => to_peer.id, + ); + ctx.raft_metrics.message_dropped.disk_full.inc(); + return false; + } + let destroyed = match check_if_to_peer_destroyed(&ctx.engine, &msg, self.store_id()) { Ok(d) => d, Err(e) => { @@ -836,6 +882,266 @@ impl Peer { self.maybe_schedule_gc_peer_tick(); } + pub fn adjust_peers_max_inflight_msgs(&mut self, peers: &[u64], raft_max_inflight_msgs: usize) { + peers.iter().for_each(|id| { + self.raft_group_mut() + .raft + .adjust_max_inflight_msgs(*id, raft_max_inflight_msgs); + debug!( + self.logger, + "adjust max inflight msgs"; + "raft_max_inflight_msgs" => raft_max_inflight_msgs, + "peer_id" => id + ); + }); + } + + // Check disk usages for the peer itself and other peers in the raft group. + // The return value indicates whether the proposal is allowed or not. + pub fn check_proposal_with_disk_full_opt( + &mut self, + ctx: &StoreContext, + disk_full_opt: DiskFullOpt, + ) -> Result<()> { + let leader_allowed = match ctx.self_disk_usage { + DiskUsage::Normal => true, + DiskUsage::AlmostFull => !matches!(disk_full_opt, DiskFullOpt::NotAllowedOnFull), + DiskUsage::AlreadyFull => false, + }; + let mut disk_full_stores = Vec::new(); + let abnormal_peer_context = self.abnormal_peer_context(); + let disk_full_peers = abnormal_peer_context.disk_full_peers(); + if !leader_allowed { + disk_full_stores.push(ctx.store_id); + // Try to transfer leader to a node with disk usage normal to maintain write + // availability. If majority node is disk full, to transfer leader or not is not + // necessary. Note: Need to exclude learner node. + if !disk_full_peers.majority() { + let target_peer = self + .region() + .get_peers() + .iter() + .find(|x| { + !disk_full_peers.has(x.get_id()) + && x.get_id() != self.peer_id() + && !self + .abnormal_peer_context() + .down_peers() + .contains(&x.get_id()) + && !matches!(x.get_role(), PeerRole::Learner) + }) + .cloned(); + if let Some(p) = target_peer { + debug!( + self.logger, + "try to transfer leader because of current leader disk full"; + "region_id" => self.region().get_id(), + "peer_id" => self.peer_id(), + "target_peer_id" => p.get_id(), + ); + self.pre_transfer_leader(&p); + } + } + } else { + // Check followers. + if disk_full_peers.is_empty() { + return Ok(()); + } + if !abnormal_peer_context.is_dangerous_majority_set() { + if !disk_full_peers.majority() { + return Ok(()); + } + // Majority peers are in disk full status but the request carries a special + // flag. + if matches!(disk_full_opt, DiskFullOpt::AllowedOnAlmostFull) + && disk_full_peers.peers().values().any(|x| x.1) + { + return Ok(()); + } + } + for peer in self.region().get_peers() { + let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); + if disk_full_peers.peers().get(&peer_id).is_some() { + disk_full_stores.push(store_id); + } + } + } + let errmsg = format!( + "propose failed: tikv disk full, cmd diskFullOpt={:?}, leader diskUsage={:?}", + disk_full_opt, ctx.self_disk_usage + ); + Err(Error::DiskFull(disk_full_stores, errmsg)) + } + + pub fn clear_disk_full_peers(&mut self, ctx: &StoreContext) { + let disk_full_peers = mem::take(self.abnormal_peer_context_mut().disk_full_peers_mut()); + let raft = &mut self.raft_group_mut().raft; + for peer in disk_full_peers.peers().iter() { + raft.adjust_max_inflight_msgs(*peer.0, ctx.cfg.raft_max_inflight_msgs); + } + } + + pub fn refill_disk_full_peers(&mut self, ctx: &StoreContext) { + self.clear_disk_full_peers(ctx); + debug!( + self.logger, + "region id {}, peer id {}, store id {}: refill disk full peers when peer disk usage status changed or merge triggered", + self.region().get_id(), + self.peer_id(), + ctx.store_id, + ); + + // Collect disk full peers and all peers' `next_idx` to find a potential quorum. + let peers_len = self.region().get_peers().len(); + let mut normal_peers = HashSet::default(); + let mut next_idxs = Vec::with_capacity(peers_len); + let mut min_peer_index = u64::MAX; + for peer in self.region().get_peers() { + let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); + let usage = ctx.store_disk_usages.get(&store_id); + if usage.is_none() { + // Always treat the leader itself as normal. + normal_peers.insert(peer_id); + } + if let Some(pr) = self.raft_group().raft.prs().get(peer_id) { + // status 3-normal, 2-almostfull, 1-alreadyfull, only for simplying the sort + // func belowing. + let mut status = 3; + if let Some(usg) = usage { + status = match usg { + DiskUsage::Normal => 3, + DiskUsage::AlmostFull => 2, + DiskUsage::AlreadyFull => 1, + }; + } + + if !self.abnormal_peer_context().down_peers().contains(&peer_id) { + next_idxs.push((peer_id, pr.next_idx, usage, status)); + if min_peer_index > pr.next_idx { + min_peer_index = pr.next_idx; + } + } + } + } + if self.has_region_merge_proposal { + debug!( + self.logger, + "region id {}, peer id {}, store id {} has a merge request, with region_merge_proposal_index {}", + self.region_id(), + self.peer_id(), + ctx.store_id, + self.region_merge_proposal_index + ); + if min_peer_index > self.region_merge_proposal_index { + self.has_region_merge_proposal = false; + } + } + + if normal_peers.len() == peers_len { + return; + } + + // Reverse sort peers based on `next_idx`, `usage` and `store healthy status`, + // then try to get a potential quorum. + next_idxs.sort_by(|x, y| { + if x.3 == y.3 { + y.1.cmp(&x.1) + } else { + y.3.cmp(&x.3) + } + }); + + let majority = !self.raft_group().raft.prs().has_quorum(&normal_peers); + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .set_majority(majority); + // Here set all peers can be sent when merging. + for &(peer, _, usage, ..) in &next_idxs { + if let Some(usage) = usage { + if self.has_region_merge_proposal && !matches!(*usage, DiskUsage::AlreadyFull) { + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .insert(peer, (*usage, true)); + self.raft_group_mut() + .raft + .adjust_max_inflight_msgs(peer, ctx.cfg.raft_max_inflight_msgs); + debug!( + self.logger, + "refill disk full peer max inflight to {} on a merging region: region id {}, peer id {}", + ctx.cfg.raft_max_inflight_msgs, + self.region_id(), + peer + ); + } else { + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .insert(peer, (*usage, false)); + self.raft_group_mut().raft.adjust_max_inflight_msgs(peer, 0); + debug!( + self.logger, + "refill disk full peer max inflight to {} on region without merging: region id {}, peer id {}", + 0, + self.region_id(), + peer + ); + } + } + } + + if !self.abnormal_peer_context().disk_full_peers().majority() { + // Less than majority peers are in disk full status. + return; + } + + let (mut potential_quorum, mut quorum_ok) = (HashSet::default(), false); + let mut is_dangerous_set = false; + for &(peer_id, _, _, status) in &next_idxs { + potential_quorum.insert(peer_id); + + if status == 1 { + // already full peer. + is_dangerous_set = true; + } + + if self.raft_group().raft.prs().has_quorum(&potential_quorum) { + quorum_ok = true; + break; + } + } + + self.abnormal_peer_context_mut() + .setup_dangerous_majority_set(is_dangerous_set); + + // For the Peer with AlreadFull in potential quorum set, we still need to send + // logs to it. To support incoming configure change. + if quorum_ok { + let has_region_merge_proposal = self.has_region_merge_proposal; + let peers = self + .abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut(); + let mut inflight_peers = vec![]; + for peer in potential_quorum { + if let Some(x) = peers.get_mut(&peer) { + // It can help to establish a quorum. + x.1 = true; + // for merge region, all peers have been set to the max. + if !has_region_merge_proposal { + inflight_peers.push(peer); + } + } + } + debug!( + self.logger, + "refill disk full peer max inflight to 1 in potential quorum set: region id {}", + self.region_id(), + ); + self.adjust_peers_max_inflight_msgs(&inflight_peers, 1); + } + } + /// A peer can be destroyed in four cases: /// /// 1. Received a gc message; diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 9bce8f3ba02b..8e392755c5eb 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -103,7 +103,7 @@ impl Peer { let task = pd::Task::RegionHeartbeat(pd::RegionHeartbeatTask { term: self.term(), region: self.region().clone(), - down_peers: self.collect_down_peers(ctx.cfg.max_peer_down_duration.0), + down_peers: self.collect_down_peers(ctx), peer: self.peer().clone(), pending_peers: self.collect_pending_peers(ctx), written_bytes: self.self_stat().written_bytes, diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 84a8ad09ed39..189986f93d2f 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -168,7 +168,7 @@ impl Peer { header.set_term(self.term()); let empty_data = SimpleWriteEncoder::with_capacity(0).encode(); let (ch, _) = CmdResChannel::pair(); - self.on_simple_write(ctx, header, empty_data, ch); + self.on_simple_write(ctx, header, empty_data, ch, None); } /// response the read index request diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1ff07f2ccc1e..3ceb8693c0bc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -54,6 +54,7 @@ use tikv_util::{ log::SlogFormat, slog_panic, store::find_peer, + sys::disk::DiskUsage, time::{duration_to_sec, monotonic_raw_now, Duration}, }; @@ -265,6 +266,7 @@ impl Peer { "message_type" => %util::MsgType(&msg), "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), + "disk_usage" => ?msg.disk_usage, ); if self.pause_for_replay() && msg.get_message().get_msg_type() == MessageType::MsgAppend { ctx.raft_metrics.message_dropped.recovery.inc(); @@ -287,6 +289,9 @@ impl Peer { return; } } + + self.handle_reported_disk_usage(ctx, &msg); + if msg.get_to_peer().get_store_id() != self.peer().get_store_id() { ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); return; @@ -515,7 +520,11 @@ impl Peer { /// /// If the recipient can't be found, `None` is returned. #[inline] - fn build_raft_message(&mut self, msg: eraftpb::Message) -> Option { + fn build_raft_message( + &mut self, + msg: eraftpb::Message, + disk_usage: DiskUsage, + ) -> Option { let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { @@ -530,6 +539,8 @@ impl Peer { }; let mut raft_msg = self.prepare_raft_message(); + // Fill in the disk usage. + raft_msg.set_disk_usage(disk_usage); raft_msg.set_to_peer(to_peer); if msg.from != self.peer().id { @@ -772,8 +783,9 @@ impl Peer { if !ready.messages().is_empty() { debug_assert!(self.is_leader()); + let disk_usage = ctx.self_disk_usage; for msg in ready.take_messages() { - if let Some(msg) = self.build_raft_message(msg) { + if let Some(msg) = self.build_raft_message(msg, disk_usage) { self.send_raft_message_on_leader(ctx, msg); } } @@ -802,10 +814,11 @@ impl Peer { self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { + let disk_usage = ctx.self_disk_usage; write_task.messages = ready .take_persisted_messages() .into_iter() - .flat_map(|m| self.build_raft_message(m)) + .flat_map(|m| self.build_raft_message(m, disk_usage)) .collect(); } if self.has_pending_messages() { @@ -1069,6 +1082,16 @@ impl Peer { // Exit entry cache warmup state when the peer becomes leader. self.entry_storage_mut().clear_entry_cache_warmup_state(); + if !ctx.store_disk_usages.is_empty() { + self.refill_disk_full_peers(ctx); + debug!( + self.logger, + "become leader refills disk full peers to {:?}", + self.abnormal_peer_context().disk_full_peers(); + "region_id" => self.region_id(), + ); + } + self.region_heartbeat_pd(ctx); self.add_pending_tick(PeerTick::CompactLog); self.add_pending_tick(PeerTick::SplitRegionCheck); @@ -1209,6 +1232,52 @@ impl Peer { ); } } + + fn handle_reported_disk_usage( + &mut self, + ctx: &mut StoreContext, + msg: &RaftMessage, + ) { + let store_id = msg.get_from_peer().get_store_id(); + let peer_id = msg.get_from_peer().get_id(); + let disk_full_peers = self.abnormal_peer_context().disk_full_peers(); + let refill_disk_usages = if matches!(msg.disk_usage, DiskUsage::Normal) { + ctx.store_disk_usages.remove(&store_id); + if !self.is_leader() { + return; + } + disk_full_peers.has(peer_id) + } else { + ctx.store_disk_usages.insert(store_id, msg.disk_usage); + if !self.is_leader() { + return; + } + + disk_full_peers.is_empty() + || disk_full_peers + .get(peer_id) + .map_or(true, |x| x != msg.disk_usage) + }; + + if refill_disk_usages || self.has_region_merge_proposal { + let prev = disk_full_peers.get(peer_id); + if Some(msg.disk_usage) != prev { + info!( + self.logger, + "reported disk usage changes {:?} -> {:?}", prev, msg.disk_usage; + "region_id" => self.region_id(), + "peer_id" => peer_id, + ); + } + self.refill_disk_full_peers(ctx); + debug!( + self.logger, + "raft message refills disk full peers to {:?}", + self.abnormal_peer_context().disk_full_peers(); + "region_id" => self.region_id(), + ); + } + } } impl Storage { diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 272b2526b392..4c875a675ef2 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -9,7 +9,11 @@ use std::sync::{atomic::Ordering, Arc}; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; -use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::RaftRequestHeader}; +use kvproto::{ + kvrpcpb::{DiskFullOpt, ExtraOp}, + metapb::Region, + raft_cmdpb::RaftRequestHeader, +}; use parking_lot::RwLockWriteGuard; use raft::eraftpb; use raftstore::store::{ @@ -266,8 +270,14 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; - self.on_simple_write(ctx, write.header, write.data, write.ch); + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), DiskFullOpt::AllowedOnAlmostFull).0 else {unreachable!()}; + self.on_simple_write( + ctx, + write.header, + write.data, + write.ch, + Some(write.disk_full_opt), + ); true } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 4ff47c4b4bbc..2c8b8cef1db2 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -32,6 +32,7 @@ use tikv_util::{slog_panic, time::duration_to_sec}; use super::storage::Storage; use crate::{ + batch::StoreContext, fsm::ApplyScheduler, operation::{ AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, @@ -126,6 +127,10 @@ pub struct Peer { abnormal_peer_context: AbnormalPeerContext, + // region merge logic need to be broadcast to all followers when disk full happens. + pub has_region_merge_proposal: bool, + pub region_merge_proposal_index: u64, + /// Force leader state is only used in online recovery when the majority of /// peers are missing. In this state, it forces one peer to become leader /// out of accordance with Raft election rule, and forbids any @@ -227,6 +232,8 @@ impl Peer { pending_messages: vec![], gc_peer_context: GcPeerContext::default(), abnormal_peer_context: AbnormalPeerContext::default(), + has_region_merge_proposal: false, + region_merge_proposal_index: 0_u64, force_leader_state: None, unsafe_recovery_state: None, }; @@ -600,7 +607,7 @@ impl Peer { ) } - pub fn collect_down_peers(&mut self, max_duration: Duration) -> Vec { + pub fn collect_down_peers(&mut self, ctx: &StoreContext) -> Vec { let mut down_peers = Vec::new(); let mut down_peer_ids = Vec::new(); let now = Instant::now(); @@ -610,7 +617,7 @@ impl Peer { } if let Some(instant) = self.peer_heartbeats.get(&p.get_id()) { let elapsed = now.saturating_duration_since(*instant); - if elapsed >= max_duration { + if elapsed >= ctx.cfg.max_peer_down_duration.0 { let mut stats = pdpb::PeerStats::default(); stats.set_peer(p.clone()); stats.set_down_seconds(elapsed.as_secs()); @@ -619,8 +626,11 @@ impl Peer { } } } + let exist_down_peers = !down_peer_ids.is_empty(); *self.abnormal_peer_context_mut().down_peers_mut() = down_peer_ids; - // TODO: `refill_disk_full_peers` + if exist_down_peers { + self.refill_disk_full_peers(ctx); + } down_peers } @@ -925,6 +935,11 @@ impl Peer { self.last_sent_snapshot_index } + #[inline] + pub fn next_proposal_index(&self) -> u64 { + self.raft_group.raft.raft_log.last_index() + 1 + } + #[inline] pub fn index_term(&self, idx: u64) -> u64 { match self.raft_group.raft.raft_log.term(idx) { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 16d43970e7a1..830286bb1425 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -6,6 +6,7 @@ use std::sync::{mpsc::SyncSender, Arc}; use collections::HashSet; use kvproto::{ import_sstpb::SstMeta, + kvrpcpb::DiskFullOpt, metapb, metapb::RegionEpoch, pdpb, @@ -134,6 +135,7 @@ pub struct SimpleWrite { pub header: Box, pub data: SimpleWriteBinary, pub ch: CmdResChannel, + pub disk_full_opt: DiskFullOpt, } #[derive(Debug)] @@ -296,6 +298,14 @@ impl PeerMsg { pub fn simple_write( header: Box, data: SimpleWriteBinary, + ) -> (Self, CmdResSubscriber) { + PeerMsg::simple_write_with_opt(header, data, DiskFullOpt::default()) + } + + pub fn simple_write_with_opt( + header: Box, + data: SimpleWriteBinary, + disk_full_opt: DiskFullOpt, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -304,6 +314,7 @@ impl PeerMsg { header, data, ch, + disk_full_opt, }), sub, ) diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index efd149e7c41b..0ca99efffc4a 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -57,8 +57,8 @@ pub use self::{ }, peer::{ can_amend_read, get_sync_log_from_request, make_transfer_leader_response, - propose_read_index, should_renew_lease, Peer, PeerStat, ProposalContext, ProposalQueue, - RequestInspector, RequestPolicy, TRANSFER_LEADER_COMMAND_REPLY_CTX, + propose_read_index, should_renew_lease, DiskFullPeers, Peer, PeerStat, ProposalContext, + ProposalQueue, RequestInspector, RequestPolicy, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8ef857bfa129..e9350ba7bb00 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5049,6 +5049,15 @@ impl DiskFullPeers { pub fn majority(&self) -> bool { self.majority } + pub fn set_majority(&mut self, majority: bool) { + self.majority = majority; + } + pub fn peers(&self) -> &HashMap { + &self.peers + } + pub fn peers_mut(&mut self) -> &mut HashMap { + &mut self.peers + } pub fn has(&self, peer_id: u64) -> bool { !self.peers.is_empty() && self.peers.contains_key(&peer_id) } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 9d61918bd1f0..496f8cc87dc2 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -37,7 +37,7 @@ use pd_client::PdClient; use raftstore::{ store::{ cmd_resp, initial_region, region_meta::RegionMeta, util::check_key_in_region, Bucket, - BucketRange, Callback, RegionSnapshot, TabletSnapManager, WriteResponse, + BucketRange, Callback, RaftCmdExtraOpts, RegionSnapshot, TabletSnapManager, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, }, Error, Result, @@ -283,9 +283,18 @@ pub trait Simulator { } fn async_command_on_node( + &mut self, + node_id: u64, + request: RaftCmdRequest, + ) -> BoxFuture<'static, RaftCmdResponse> { + self.async_command_on_node_with_opts(node_id, request, RaftCmdExtraOpts::default()) + } + + fn async_command_on_node_with_opts( &mut self, node_id: u64, mut request: RaftCmdRequest, + opts: RaftCmdExtraOpts, ) -> BoxFuture<'static, RaftCmdResponse> { let region_id = request.get_header().get_region_id(); @@ -316,7 +325,11 @@ pub trait Simulator { _ => unreachable!(), } } - PeerMsg::simple_write(Box::new(request.take_header()), write_encoder.encode()) + PeerMsg::simple_write_with_opt( + Box::new(request.take_header()), + write_encoder.encode(), + opts.disk_full_opt, + ) }; self.async_peer_msg_on_node(node_id, region_id, msg) @@ -1275,6 +1288,20 @@ impl, EK: KvEngine> Cluster { .async_command_on_node(leader.get_store_id(), req) } + pub fn async_request_with_opts( + &mut self, + mut req: RaftCmdRequest, + opts: RaftCmdExtraOpts, + ) -> Result> { + let region_id = req.get_header().get_region_id(); + let leader = self.leader_of_region(region_id).unwrap(); + req.mut_header().set_peer(leader.clone()); + Ok(self + .sim + .wl() + .async_command_on_node_with_opts(leader.get_store_id(), req, opts)) + } + pub fn async_put( &mut self, key: &[u8], diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index d83dff12e9a4..af2bab261837 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -14,16 +14,19 @@ use engine_test::raft::RaftTestEngine; use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::future::BoxFuture; +use grpcio::{ChannelBuilder, Environment}; use kvproto::{ encryptionpb::EncryptionMethod, - kvrpcpb::Context, + kvrpcpb::{Context, DiskFullOpt, GetResponse, Mutation, PrewriteResponse}, metapb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, + tikvpb::TikvClient, }; use raftstore::{store::ReadResponse, Result}; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; +use test_pd_client::TestPdClient; use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, sleep_ms, Config}; use tikv::{ server::KvEngineFactoryBuilder, @@ -479,3 +482,109 @@ pub fn wait_region_epoch_change, EK: KvEngine>( sleep_ms(10); } } + +pub struct PeerClient { + pub cli: TikvClient, + pub ctx: Context, +} + +impl PeerClient { + pub fn new( + cluster: &Cluster, EK>, + region_id: u64, + peer: metapb::Peer, + ) -> PeerClient { + let cli = { + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(peer.get_store_id())); + TikvClient::new(channel) + }; + let ctx = { + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(peer); + ctx.set_region_epoch(epoch); + ctx + }; + PeerClient { cli, ctx } + } + + pub fn kv_read(&self, key: Vec, ts: u64) -> GetResponse { + test_raftstore::kv_read(&self.cli, self.ctx.clone(), key, ts) + } + + pub fn must_kv_read_equal(&self, key: Vec, val: Vec, ts: u64) { + test_raftstore::must_kv_read_equal(&self.cli, self.ctx.clone(), key, val, ts) + } + + pub fn must_kv_write(&self, pd_client: &TestPdClient, kvs: Vec, pk: Vec) -> u64 { + test_raftstore::must_kv_write(pd_client, &self.cli, self.ctx.clone(), kvs, pk) + } + + pub fn must_kv_prewrite(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite(&self.cli, self.ctx.clone(), muts, pk, ts) + } + + pub fn try_kv_prewrite( + &self, + muts: Vec, + pk: Vec, + ts: u64, + opt: DiskFullOpt, + ) -> PrewriteResponse { + let mut ctx = self.ctx.clone(); + ctx.disk_full_opt = opt; + test_raftstore::try_kv_prewrite(&self.cli, ctx, muts, pk, ts) + } + + pub fn must_kv_prewrite_async_commit(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + pk, + ts, + 0, + true, + false, + ) + } + + pub fn must_kv_prewrite_one_pc(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + pk, + ts, + 0, + false, + true, + ) + } + + pub fn must_kv_commit(&self, keys: Vec>, start_ts: u64, commit_ts: u64) { + test_raftstore::must_kv_commit( + &self.cli, + self.ctx.clone(), + keys, + start_ts, + commit_ts, + commit_ts, + ) + } + + pub fn must_kv_rollback(&self, keys: Vec>, start_ts: u64) { + test_raftstore::must_kv_rollback(&self.cli, self.ctx.clone(), keys, start_ts) + } + + pub fn must_kv_pessimistic_lock(&self, key: Vec, ts: u64) { + test_raftstore::must_kv_pessimistic_lock(&self.cli, self.ctx.clone(), key, ts) + } + + pub fn must_kv_pessimistic_rollback(&self, key: Vec, ts: u64) { + test_raftstore::must_kv_pessimistic_rollback(&self.cli, self.ctx.clone(), key, ts, ts) + } +} diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 5183ecd6567f..a80cdda392f8 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -304,6 +304,7 @@ impl tikv_kv::Engine for RaftKv2 { data, ch, send_time: Instant::now_coarse(), + disk_full_opt: batch.disk_full_opt, }); let res = self .router diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 217269bb5b85..d8b3fadb0542 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -5,12 +5,12 @@ use std::{thread, time::Duration}; use kvproto::{ disk_usage::DiskUsage, kvrpcpb::{DiskFullOpt, Op}, - metapb::Region, raft_cmdpb::*, }; use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, future::block_on_timeout, time::Instant}; fn assert_disk_full(resp: &RaftCmdResponse) { @@ -34,148 +34,147 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { } // check the region new leader is elected. -fn assert_region_leader_changed( - cluster: &mut Cluster, - region_id: u64, - original_leader: u64, -) { - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("Leader cannot change when the only disk full node is leader"); +macro_rules! assert_region_leader_changed { + ($cluster:expr, $region_id:expr, $original_leader:expr) => {{ + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("Leader cannot change when the only disk full node is leader"); + } + let new_leader = $cluster.query_leader(1, $region_id, Duration::from_secs(1)); + if new_leader.is_none() { + sleep_ms(10); + continue; + } + if new_leader.unwrap().get_id() == $original_leader { + sleep_ms(10); + continue; + } else { + break; + } } - let new_leader = cluster.query_leader(1, region_id, Duration::from_secs(1)); - if new_leader.is_none() { - sleep_ms(10); - continue; - } - if new_leader.unwrap().get_id() == original_leader { - sleep_ms(10); - continue; - } else { - break; - } - } + }}; } -fn ensure_disk_usage_is_reported( - cluster: &mut Cluster, - peer_id: u64, - store_id: u64, - region: &Region, -) { - let peer = new_peer(store_id, peer_id); - let key = region.get_start_key(); - let ch = async_read_on_peer(cluster, peer, region.clone(), key, true, true); - block_on_timeout(ch, Duration::from_secs(1)).unwrap(); +macro_rules! ensure_disk_usage_is_reported { + ($cluster:expr, $peer_id:expr, $store_id:expr, $region:expr) => {{ + let peer = new_peer($store_id, $peer_id); + let key = $region.get_start_key(); + let ch = async_read_on_peer($cluster, peer, $region.clone(), key, true, true); + block_on_timeout(ch, Duration::from_secs(1)).unwrap(); + }}; } -fn test_disk_full_leader_behaviors(usage: DiskUsage) { - let mut cluster = new_node_cluster(0, 3); - cluster.pd_client.disable_default_operator(); - cluster.run(); - - // To ensure all replicas are not pending. - cluster.must_put(b"k1", b"v1"); - must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // Test new normal proposals won't be allowed when disk is full. - let old_last_index = cluster.raft_local_state(1, 1).last_index; - let rx = cluster.async_put(b"k2", b"v2").unwrap(); - assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); - let new_last_index = cluster.raft_local_state(1, 1).last_index; - assert_eq!(old_last_index, new_last_index); - - assert_region_leader_changed(&mut cluster, 1, 1); - fail::remove(get_fp(usage, 1)); - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // merge/split is only allowed on disk almost full. - if usage != DiskUsage::AlreadyFull { - // Test split must be allowed when disk is full. - let region = cluster.get_region(b"k1"); - cluster.must_split(®ion, b"k1"); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_disk_full_leader_behaviors() { + for usage in [DiskUsage::AlmostFull, DiskUsage::AlreadyFull] { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); // set gc duration for v2 + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // To ensure all replicas are not pending. + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // Test new normal proposals won't be allowed when disk is full. + let old_last_index = cluster.raft_local_state(1, 1).last_index; + let rx = cluster.async_put(b"k2", b"v2").unwrap(); + assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); + let new_last_index = cluster.raft_local_state(1, 1).last_index; + assert_eq!(old_last_index, new_last_index); + + assert_region_leader_changed!(&cluster, 1, 1); + fail::remove(get_fp(usage, 1)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // merge/split is only allowed on disk almost full. + if usage != DiskUsage::AlreadyFull { + // Test split must be allowed when disk is full. + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k1"); + } + // Test transfer leader should be allowed. + cluster.must_transfer_leader(1, new_peer(2, 2)); + + // Transfer the leadership back to store 1. + fail::remove(get_fp(usage, 1)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // Test remove peer should be allowed. + cluster.pd_client.must_remove_peer(1, new_peer(3, 3)); + // Sleep for a while until the disk usage and peer changes have been synced. + thread::sleep(Duration::from_secs(1)); + must_get_none(&cluster.get_engine(3), b"k1"); + + // Test add peer should be allowed. It must be a higher peer-id in v2. + cluster.pd_client.must_add_peer(1, new_peer(3, 4)); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + fail::remove(get_fp(usage, 1)); + // Sleep for a while before next case to make it clear. + thread::sleep(Duration::from_secs(1)); } - // Test transfer leader should be allowed. - cluster.must_transfer_leader(1, new_peer(2, 2)); - - // Transfer the leadership back to store 1. - fail::remove(get_fp(usage, 1)); - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // Test remove peer should be allowed. - cluster.pd_client.must_remove_peer(1, new_peer(3, 3)); - must_get_none(&cluster.get_engine(3), b"k1"); - - // Test add peer should be allowed. - cluster.pd_client.must_add_peer(1, new_peer(3, 3)); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - fail::remove(get_fp(usage, 1)); -} - -#[test] -fn test_disk_full_for_region_leader() { - test_disk_full_leader_behaviors(DiskUsage::AlmostFull); - test_disk_full_leader_behaviors(DiskUsage::AlreadyFull); -} - -fn test_disk_full_follower_behaviors(usage: DiskUsage) { - let mut cluster = new_node_cluster(0, 3); - cluster.pd_client.disable_default_operator(); - cluster.run(); - - // To ensure all replicas are not pending. - cluster.must_put(b"k1", b"v1"); - must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 2), "return").unwrap(); - - // Test followers will reject pre-transfer-leader command. - let epoch = cluster.get_region_epoch(1); - let transfer = new_admin_request(1, &epoch, new_transfer_leader_cmd(new_peer(2, 2))); - cluster - .call_command_on_leader(transfer, Duration::from_secs(3)) - .unwrap(); - assert_eq!(cluster.leader_of_region(1).unwrap(), new_peer(1, 1)); - cluster.must_put(b"k2", b"v2"); - - // Test leader shouldn't append entries to disk full followers. - let old_last_index = cluster.raft_local_state(1, 2).last_index; - cluster.must_put(b"k3", b"v3"); - let new_last_index = cluster.raft_local_state(1, 2).last_index; - assert_eq!(old_last_index, new_last_index); - must_get_none(&cluster.get_engine(2), b"k3"); - - // Test followers will response votes when disk is full. - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(1, 1) - .direction(Direction::Send) - .msg_type(MessageType::MsgRequestVoteResponse), - )); - cluster.must_transfer_leader(1, new_peer(3, 3)); - - fail::remove(get_fp(usage, 2)); } -#[test] -fn test_disk_full_for_region_follower() { - test_disk_full_follower_behaviors(DiskUsage::AlmostFull); - test_disk_full_follower_behaviors(DiskUsage::AlreadyFull); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_disk_full_follower_behaviors() { + for usage in [DiskUsage::AlmostFull, DiskUsage::AlreadyFull] { + let mut cluster = new_cluster(0, 3); + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // To ensure all replicas are not pending. + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 2), "return").unwrap(); + + // Test followers will reject pre-transfer-leader command. + let epoch = cluster.get_region_epoch(1); + let transfer = new_admin_request(1, &epoch, new_transfer_leader_cmd(new_peer(2, 2))); + cluster + .call_command_on_leader(transfer, Duration::from_secs(3)) + .unwrap(); + assert_eq!(cluster.leader_of_region(1).unwrap(), new_peer(1, 1)); + cluster.must_put(b"k2", b"v2"); + + // Test leader shouldn't append entries to disk full followers. + let old_last_index = cluster.raft_local_state(1, 2).last_index; + cluster.must_put(b"k3", b"v3"); + let new_last_index = cluster.raft_local_state(1, 2).last_index; + assert_eq!(old_last_index, new_last_index); + must_get_none(&cluster.get_engine(2), b"k3"); + + // Test followers will response votes when disk is full. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 1) + .direction(Direction::Send) + .msg_type(MessageType::MsgRequestVoteResponse), + )); + cluster.must_transfer_leader(1, new_peer(3, 3)); + + fail::remove(get_fp(usage, 2)); + } } -fn test_disk_full_txn_behaviors(usage: DiskUsage) { - let mut cluster = new_server_cluster(0, 3); +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_disk_full_txn_behaviors() { + let usage = DiskUsage::AlmostFull; + let mut cluster = new_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -199,7 +198,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed!(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -269,16 +268,13 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { fail::remove(get_fp(usage, 1)); } -#[test] -fn test_disk_full_for_txn_operations() { - test_disk_full_txn_behaviors(DiskUsage::AlmostFull); -} - -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_majority_disk_full() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); // set gc duration for v2 cluster.pd_client.disable_default_operator(); cluster.run(); @@ -295,7 +291,7 @@ fn test_majority_disk_full() { // To ensure followers have reported disk usages to the leader. for i in 1..3 { fail::cfg(get_fp(DiskUsage::AlmostFull, i + 1), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } // Normal proposals will be rejected because of majority peers' disk full. @@ -319,14 +315,14 @@ fn test_majority_disk_full() { // new disk usages are reported. for i in 1..3 { fail::remove(get_fp(DiskUsage::AlmostFull, i + 1)); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); } // To ensure followers have reported disk usages to the leader. for i in 1..3 { fail::cfg(get_fp(DiskUsage::AlreadyFull, i + 1), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } // Proposals with special `DiskFullOpt`s will still be rejected if majority @@ -342,10 +338,12 @@ fn test_majority_disk_full() { // Peer 2 disk usage changes from already full to almost full. fail::remove(get_fp(DiskUsage::AlreadyFull, 2)); fail::cfg(get_fp(DiskUsage::AlmostFull, 2), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, 2, 2, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, 2, 2, ®ion); - // Configuration change should be alloed. + // Configuration change should be allowed. cluster.pd_client.must_remove_peer(1, new_peer(2, 2)); + // Sleep for a while until the disk usage and peer changes have been synced. + thread::sleep(Duration::from_secs(1)); // After the last configuration change is applied, the raft group will be like // `[(1, DiskUsage::AlmostFull), (3, DiskUsage::AlreadyFull)]`. So no more @@ -364,9 +362,10 @@ fn test_majority_disk_full() { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_disk_full_followers_with_hibernate_regions() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -391,31 +390,13 @@ fn test_disk_full_followers_with_hibernate_regions() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } -// check the region new leader is elected. -fn assert_region_merged( - cluster: &mut Cluster, - left_region_key: &[u8], - right_region_key: &[u8], -) { - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("region merge failed"); - } - let region_left = cluster.get_region(left_region_key); - let region_right = cluster.get_region(right_region_key); - if region_left.get_id() != region_right.get_id() { - sleep_ms(10); - continue; - } else { - break; - } - } -} - -#[test] +// #[test_case(test_raftstore_v2::new_server_cluster)] +// FIXME: #[test_case(test_raftstore_v2::new_server_cluster)] +// In v2 `must_try_merge` always return error. Also the last `must_merge` +// sometimes cannot get an updated min_matched. +#[test_case(test_raftstore::new_server_cluster)] fn test_merge_on_majority_disk_full() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -448,23 +429,42 @@ fn test_merge_on_majority_disk_full() { fail::cfg(get_fp(DiskUsage::AlmostFull, i), "return").unwrap(); } for peer in region1.get_peers().iter() { - ensure_disk_usage_is_reported(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion1); + ensure_disk_usage_is_reported!(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion1); } for peer in region2.get_peers().iter() { - ensure_disk_usage_is_reported(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion2); + ensure_disk_usage_is_reported!(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion2); } cluster.must_try_merge(region1.get_id(), region2.get_id()); - assert_region_merged(&mut cluster, b"k1", b"k3"); + + // check the region new leader is elected. + let assert_region_merged = |left_region_key: &[u8], right_region_key: &[u8]| { + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("region merge failed"); + } + let region_left = cluster.get_region(left_region_key); + let region_right = cluster.get_region(right_region_key); + if region_left.get_id() != region_right.get_id() { + sleep_ms(10); + continue; + } else { + break; + } + } + }; + assert_region_merged(b"k1", b"k3"); for i in 1..3 { fail::remove(get_fp(DiskUsage::AlmostFull, i)); } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_almost_and_already_full_behavior() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -481,7 +481,7 @@ fn test_almost_and_already_full_behavior() { fail::cfg(get_fp(DiskUsage::AlreadyFull, i), "return").unwrap(); } for i in 1..5 { - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } let lead_client = PeerClient::new(&cluster, 1, new_peer(1, 1)); @@ -521,29 +521,10 @@ fn test_almost_and_already_full_behavior() { } } -fn wait_down_peers_reported( - cluster: &Cluster, - total_down_count: u64, - target_report_peer: u64, -) { - let mut peers = cluster.get_down_peers(); - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("Leader cannot change when the only disk full node is leader"); - } - - if peers.len() == total_down_count as usize && peers.contains_key(&target_report_peer) { - return; - } - sleep_ms(10); - peers = cluster.get_down_peers(); - } -} - -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_down_node_when_disk_full() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.cfg.raft_store.max_peer_down_duration = ReadableDuration::secs(1); @@ -555,7 +536,7 @@ fn test_down_node_when_disk_full() { let region = cluster.get_region(b"k1"); for i in 3..6 { fail::cfg(get_fp(DiskUsage::AlmostFull, i), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i, i, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i, i, ®ion); } let lead_client = PeerClient::new(&cluster, 1, new_peer(1, 1)); @@ -574,7 +555,23 @@ fn test_down_node_when_disk_full() { ); cluster.stop_node(2); - wait_down_peers_reported(&cluster, 1, 2u64); + + let wait_down_peers_reported = |total_down_count: u64, target_report_peer: u64| { + let mut peers = cluster.get_down_peers(); + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("Leader cannot change when the only disk full node is leader"); + } + + if peers.len() == total_down_count as usize && peers.contains_key(&target_report_peer) { + return; + } + sleep_ms(10); + peers = cluster.get_down_peers(); + } + }; + wait_down_peers_reported(1u64, 2u64); let prewrite_ts = get_tso(&cluster.pd_client); let res = lead_client.try_kv_prewrite( diff --git a/tests/integrations/raftstore/test_stale_read.rs b/tests/integrations/raftstore/test_stale_read.rs index 24e13003f7ef..5de9bda1f641 100644 --- a/tests/integrations/raftstore/test_stale_read.rs +++ b/tests/integrations/raftstore/test_stale_read.rs @@ -8,7 +8,7 @@ use kvproto::{ metapb::{Peer, Region}, tikvpb_grpc::TikvClient, }; -use test_raftstore::{must_get_equal, new_mutation, new_peer, PeerClient}; +use test_raftstore::{must_get_equal, new_mutation, new_peer}; use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant}; From e29d3a989d73f0a1c1534114dc530d3c3200d38d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 11 Oct 2023 15:09:25 +0800 Subject: [PATCH 084/203] raftstore-v2: fix non-deterministic region merge (#15697) close tikv/tikv#15682 This commit addresses the issue where a "region corrupted" error still occurs in certain scenarios despite PR #15625 resolving the problem in the transfer leader scenario. The root cause of the issue is the non-deterministic nature of commit merge and rollback merge, allowing transient errors during propose to trigger the problem again. To fix this issue, the proposed solution ensures that TiKV only initiates rollback merge when either the target region is not found or the epoch has increased. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../operation/command/admin/merge/commit.rs | 106 +++++++++--------- .../tests/failpoints/test_merge.rs | 9 +- components/raftstore/src/store/peer.rs | 2 + tests/failpoints/cases/test_merge.rs | 94 +++++++++++++++- 4 files changed, 153 insertions(+), 58 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index e95a13600fbc..b12ba9eaf9df 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -178,6 +178,11 @@ impl Peer { self.region_id() == 2, |_| {} ); + fail::fail_point!( + "ask_target_peer_to_commit_merge_store_1", + store_ctx.store_id == 1, + |_| {} + ); let state = self.applied_merge_state().unwrap(); let target = state.get_target(); let target_id = target.get_id(); @@ -295,7 +300,10 @@ impl Peer { target_id: self.region_id(), }, ); - } else if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { + return; + } + // current region_epoch > region epoch in commit merge. + if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { info!( self.logger, "reject commit merge because of stale"; @@ -306,63 +314,51 @@ impl Peer { let _ = store_ctx .router .force_send(source_id, PeerMsg::RejectCommitMerge { index }); - } else if expected_epoch == region.get_region_epoch() { - assert!( - util::is_sibling_regions(source_region, region), - "{}: {:?}, {:?}", - SlogFormat(&self.logger), - source_region, - region - ); - assert!( - region_on_same_stores(source_region, region), - "{:?}, {:?}", - source_region, - region - ); - assert!(!self.storage().has_dirty_data()); - if self.is_leader() && !self.leader_transferring() { - let index = commit_of_merge(req.get_admin_request().get_commit_merge()); - if self.proposal_control().is_merging() { - // `on_admin_command` may delay our request indefinitely. It's better to check - // directly. - info!( - self.logger, - "reject commit merge because of target is merging with another region"; - ); - } else { - let (ch, res) = CmdResChannel::pair(); - self.on_admin_command(store_ctx, req, ch); - if let Some(res) = res.take_result() - && res.get_header().has_error() - { - error!( - self.logger, - "failed to propose commit merge"; - "source" => source_id, - "res" => ?res, - ); - } else { - fail::fail_point!("on_propose_commit_merge_success"); - return; - } - } - let _ = store_ctx - .router - .force_send(source_id, PeerMsg::RejectCommitMerge { index }); - } else if self.leader_transferring() { - info!( - self.logger, - "not to propose commit merge when transferring leader"; - "transferee" => self.leader_transferee(), - ); - } - } else { + return; + } + // current region_epoch < region epoch in commit merge. + if util::is_epoch_stale(region.get_region_epoch(), expected_epoch) { info!( self.logger, - "ignore commit merge because self epoch is stale"; + "target region still not catch up, skip."; "source" => ?source_region, + "target_region_epoch" => ?expected_epoch, + "exist_region_epoch" => ?self.region().get_region_epoch(), ); + return; + } + assert!( + util::is_sibling_regions(source_region, region), + "{}: {:?}, {:?}", + SlogFormat(&self.logger), + source_region, + region + ); + assert!( + region_on_same_stores(source_region, region), + "{:?}, {:?}", + source_region, + region + ); + assert!(!self.storage().has_dirty_data()); + let (ch, res) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + if let Some(res) = res.take_result() + && res.get_header().has_error() + { + error!( + self.logger, + "failed to propose commit merge"; + "source" => source_id, + "res" => ?res, + ); + fail::fail_point!( + "on_propose_commit_merge_fail_store_1", + store_ctx.store_id == 1, + |_| {} + ); + } else { + fail::fail_point!("on_propose_commit_merge_success"); } } @@ -691,6 +687,8 @@ impl Peer { info!( self.logger, "become follower for new logs"; + "first_log_term" => first.term, + "first_log_index" => first.index, "new_log_term" => last_log.term, "new_log_index" => last_log.index, "term" => self.term(), diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs index 890b8c5e27a1..11fe666b49b4 100644 --- a/components/raftstore-v2/tests/failpoints/test_merge.rs +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -7,7 +7,7 @@ use std::{ use engine_traits::Peekable; use raftstore_v2::router::{PeerMsg, PeerTick}; -use tikv_util::store::new_peer; +use tikv_util::{config::ReadableDuration, info, store::new_peer}; use crate::cluster::{ life_helper::assert_peer_not_exist, @@ -179,7 +179,9 @@ fn test_rollback() { // Target is merging. #[test] fn test_merge_conflict_0() { - let mut cluster = Cluster::default(); + let mut cluster = Cluster::with_configs(1, None, None, |cfg| { + cfg.merge_check_tick_interval = ReadableDuration::millis(100); + }); let store_id = cluster.node(0).id(); let router = &mut cluster.routers[0]; @@ -216,6 +218,7 @@ fn test_merge_conflict_0() { format!("k{}", region_3_id).as_bytes(), false, ); + info!("regions: {:?}, {:?}, {:?}", region_1, region_2, region_3); // pause merge progress of 2+3. let fp = fail::FailGuard::new("apply_commit_merge", "pause"); @@ -236,9 +239,9 @@ fn test_merge_conflict_0() { .unwrap(); let region_2 = cluster.routers[0].region_detail(region_2.get_id()); merge_region(&cluster, 0, region_1, peer_1, region_2, false); + drop(fp); // wait for rollback. rx.recv_timeout(std::time::Duration::from_secs(1)).unwrap(); - drop(fp); fail::remove("apply_rollback_merge"); // Check region 1 is not merged and can serve writes. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index e9350ba7bb00..85b8798bfb17 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1086,6 +1086,8 @@ where // of term explicitly to get correct metadata. info!( "become follower for new logs"; + "first_log_term" => first.term, + "first_log_index" => first.index, "new_log_term" => last_log.term, "new_log_index" => last_log.index, "term" => self.term(), diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 861e4a658cea..ffbd69dc05eb 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -20,7 +20,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::store::*; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::router::{PeerMsg, PeerTick}; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; @@ -1848,6 +1848,98 @@ fn test_concurrent_between_transfer_leader_and_merge() { cluster.must_put(b"k4", b"v4"); } +#[test] +fn test_deterministic_commit_rollback_merge() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + // Use a large election tick to stable test. + configure_for_lease_read(&mut cluster.cfg, None, Some(1000)); + // Use 2 threads for polling peers, so that they can run concurrently. + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.run(); + + let pd_client = Arc::clone(&cluster.pd_client); + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k3").unwrap(); + let right_1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_1); + let left_2 = find_peer(&left, 2).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_2); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + for i in 0..3 { + must_get_equal(&cluster.get_engine(i + 1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); + } + + // Delay 1003 apply by dropping append response, so that proposal will fail + // due to applied_term != current_term. + let target_region_id = left.get_id(); + cluster.add_recv_filter_on_node( + 1, + Box::new(DropMessageFilter::new(Arc::new(move |m| { + if m.get_region_id() == target_region_id { + return m.get_message().get_msg_type() != MessageType::MsgAppendResponse; + } + true + }))), + ); + + let left_1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_1); + + // left(1000) <- right(1). + let (tx1, rx1) = channel(); + let (tx2, rx2) = channel(); + let tx1 = Mutex::new(tx1); + let rx2 = Mutex::new(rx2); + fail::cfg_callback("on_propose_commit_merge_fail_store_1", move || { + tx1.lock().unwrap().send(()).unwrap(); + rx2.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + cluster.merge_region(right.get_id(), left.get_id(), Callback::None); + + // Wait for target fails to propose commit merge. + rx1.recv_timeout(Duration::from_secs(5)).unwrap(); + // Let target apply continue, and new AskCommitMerge messages will propose + // commit merge successfully. + cluster.clear_recv_filter_on_node(1); + + // Trigger a CheckMerge tick, so source will send a AskCommitMerge again. + fail::cfg("ask_target_peer_to_commit_merge_store_1", "pause").unwrap(); + let router = cluster.get_router(1).unwrap(); + router + .check_send(1, PeerMsg::Tick(PeerTick::CheckMerge)) + .unwrap(); + + // Send RejectCommitMerge to source. + tx2.send(()).unwrap(); + fail::remove("on_propose_commit_merge_fail_store_1"); + + // Wait for target applies to current term. + cluster.must_put(b"k1", b"v11"); + + // By remove the failpoint, CheckMerge tick sends a AskCommitMerge again. + fail::remove("ask_target_peer_to_commit_merge_store_1"); + // At this point, source region will propose rollback merge if commit merge + // is not deterministic. + + // Wait for source handle commit or rollback merge. + wait_region_epoch_change(&cluster, &left, Duration::from_secs(5)); + + // No matter commit merge or rollback merge, cluster must be available to + // process requests + cluster.must_put(b"k0", b"v0"); + cluster.must_put(b"k4", b"v4"); +} + struct MsgVoteFilter {} impl Filter for MsgVoteFilter { From 08a2d654549105104bb701179586256402dbcadd Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 12 Oct 2023 12:10:55 +0800 Subject: [PATCH 085/203] coprocessor: do not treat deadline exceeded error as other error (#15709) ref tikv/tikv#15566 Signed-off-by: glorv --- src/coprocessor/dag/mod.rs | 50 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 31a6df181d5b..bd077c5c0ba5 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -143,7 +143,9 @@ fn handle_qe_response( can_be_cached: bool, data_version: Option, ) -> Result { - use tidb_query_common::error::ErrorInner; + use tidb_query_common::error::{ErrorInner, EvaluateError}; + + use crate::coprocessor::Error; match result { Ok((sel_resp, range)) => { @@ -162,6 +164,7 @@ fn handle_qe_response( } Err(err) => match *err.0 { ErrorInner::Storage(err) => Err(err.into()), + ErrorInner::Evaluate(EvaluateError::DeadlineExceeded) => Err(Error::DeadlineExceeded), ErrorInner::Evaluate(err) => { let mut resp = Response::default(); let mut sel_resp = SelectResponse::default(); @@ -179,7 +182,9 @@ fn handle_qe_response( fn handle_qe_stream_response( result: tidb_query_common::Result<(Option<(StreamResponse, IntervalRange)>, bool)>, ) -> Result<(Option, bool)> { - use tidb_query_common::error::ErrorInner; + use tidb_query_common::error::{ErrorInner, EvaluateError}; + + use crate::coprocessor::Error; match result { Ok((Some((s_resp, range)), finished)) => { @@ -192,6 +197,7 @@ fn handle_qe_stream_response( Ok((None, finished)) => Ok((None, finished)), Err(err) => match *err.0 { ErrorInner::Storage(err) => Err(err.into()), + ErrorInner::Evaluate(EvaluateError::DeadlineExceeded) => Err(Error::DeadlineExceeded), ErrorInner::Evaluate(err) => { let mut resp = Response::default(); let mut s_resp = StreamResponse::default(); @@ -203,3 +209,43 @@ fn handle_qe_stream_response( }, } } + +#[cfg(test)] +mod tests { + use anyhow::anyhow; + use protobuf::Message; + use tidb_query_common::error::{Error as CommonError, EvaluateError, StorageError}; + + use super::*; + use crate::coprocessor::Error; + + #[test] + fn test_handle_qe_response() { + // Ok Response + let ok_res = Ok((SelectResponse::default(), None)); + let res = handle_qe_response(ok_res, true, Some(1)).unwrap(); + assert!(res.can_be_cached); + assert_eq!(res.get_cache_last_version(), 1); + let mut select_res = SelectResponse::new(); + Message::merge_from_bytes(&mut select_res, res.get_data()).unwrap(); + assert!(!select_res.has_error()); + + // Storage Error + let storage_err = CommonError::from(StorageError(anyhow!("unknown"))); + let res = handle_qe_response(Err(storage_err), false, None); + assert!(matches!(res, Err(Error::Other(_)))); + + // Evaluate Error + let err = CommonError::from(EvaluateError::DeadlineExceeded); + let res = handle_qe_response(Err(err), false, None); + assert!(matches!(res, Err(Error::DeadlineExceeded))); + + let err = CommonError::from(EvaluateError::InvalidCharacterString { + charset: "test".into(), + }); + let res = handle_qe_response(Err(err), false, None).unwrap(); + let mut select_res = SelectResponse::new(); + Message::merge_from_bytes(&mut select_res, res.get_data()).unwrap(); + assert_eq!(select_res.get_error().get_code(), 1300); + } +} From 2d7616e3f8e3d254bbfa8d82f3980547073d1948 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 12 Oct 2023 12:25:55 +0800 Subject: [PATCH 086/203] raftstore-v2: adjust max-background-flushes default value (#15723) ref tikv/tikv#14470 Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 185 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 56 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 0eb006363f0a..74f25a22ef65 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -244,22 +244,30 @@ const RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS: BackgroundJobLimits = BackgroundJobL // `defaults` serves as an upper bound for returning limits. fn get_background_job_limits_impl( + engine_type: EngineType, cpu_num: u32, defaults: &BackgroundJobLimits, ) -> BackgroundJobLimits { // At the minimum, we should have two background jobs: one for flush and one for // compaction. Otherwise, the number of background jobs should not exceed // cpu_num - 1. - let max_background_jobs = cmp::max(2, cmp::min(defaults.max_background_jobs, cpu_num - 1)); + let mut max_background_jobs = cmp::max(2, cmp::min(defaults.max_background_jobs, cpu_num - 1)); // Scale flush threads proportionally to cpu cores. Also make sure the number of // flush threads doesn't exceed total jobs. let max_background_flushes = cmp::min( (max_background_jobs + 3) / 4, defaults.max_background_flushes, ); - // Cap max_sub_compactions to allow at least two compactions. - let max_compactions = max_background_jobs - max_background_flushes; + + // set the default compaction threads differently for v1 and v2: + // v1: cap max_sub_compactions to allow at least two compactions. + // v2: decrease the compaction threads to make the qps more stable. + let max_compactions = match engine_type { + EngineType::RaftKv => max_background_jobs - max_background_flushes, + EngineType::RaftKv2 => (max_background_jobs + 7) / 8, + }; let max_sub_compactions: u32 = (max_compactions - 1).clamp(1, defaults.max_sub_compactions); + max_background_jobs = max_background_flushes + max_compactions; // Maximum background GC threads for Titan let max_titan_background_gc = cmp::min(defaults.max_titan_background_gc, cpu_num); @@ -271,9 +279,12 @@ fn get_background_job_limits_impl( } } -fn get_background_job_limits(defaults: &BackgroundJobLimits) -> BackgroundJobLimits { +fn get_background_job_limits( + engine_type: EngineType, + defaults: &BackgroundJobLimits, +) -> BackgroundJobLimits { let cpu_num = cmp::max(SysQuota::cpu_cores_quota() as u32, 1); - get_background_job_limits_impl(cpu_num, defaults) + get_background_job_limits_impl(engine_type, cpu_num, defaults) } macro_rules! cf_config { @@ -1308,19 +1319,14 @@ pub struct DbResources { impl Default for DbConfig { fn default() -> DbConfig { - let bg_job_limits = get_background_job_limits(&KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); - let titan_config = TitanDbConfig { - max_background_gc: bg_job_limits.max_titan_background_gc as i32, - ..Default::default() - }; DbConfig { wal_recovery_mode: DBRecoveryMode::PointInTime, wal_dir: "".to_owned(), wal_ttl_seconds: 0, wal_size_limit: ReadableSize::kb(0), max_total_wal_size: None, - max_background_jobs: bg_job_limits.max_background_jobs as i32, - max_background_flushes: bg_job_limits.max_background_flushes as i32, + max_background_jobs: 0, + max_background_flushes: 0, max_manifest_file_size: ReadableSize::mb(128), create_if_missing: true, max_open_files: 40960, @@ -1339,7 +1345,7 @@ impl Default for DbConfig { rate_limiter_auto_tuned: true, bytes_per_sync: ReadableSize::mb(1), wal_bytes_per_sync: ReadableSize::kb(512), - max_sub_compactions: bg_job_limits.max_sub_compactions, + max_sub_compactions: 0, writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, enable_pipelined_write: false, @@ -1354,7 +1360,7 @@ impl Default for DbConfig { writecf: WriteCfConfig::default(), lockcf: LockCfConfig::default(), raftcf: RaftCfConfig::default(), - titan: titan_config, + titan: TitanDbConfig::default(), } } } @@ -1410,6 +1416,19 @@ impl DbConfig { .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); } } + let bg_job_limits = get_background_job_limits(engine, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); + if self.max_background_jobs == 0 { + self.max_background_jobs = bg_job_limits.max_background_jobs as i32; + } + if self.max_background_flushes == 0 { + self.max_background_flushes = bg_job_limits.max_background_flushes as i32; + } + if self.max_sub_compactions == 0 { + self.max_sub_compactions = bg_job_limits.max_sub_compactions; + } + if self.titan.max_background_gc == 0 { + self.titan.max_background_gc = bg_job_limits.max_titan_background_gc as i32; + } } pub fn build_resources(&self, env: Arc, engine: EngineType) -> DbResources { @@ -1807,7 +1826,9 @@ pub struct RaftDbConfig { impl Default for RaftDbConfig { fn default() -> RaftDbConfig { - let bg_job_limits = get_background_job_limits(&RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); + // raftdb should only be used for raftkv + let bg_job_limits = + get_background_job_limits(EngineType::RaftKv, &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); let titan_config = TitanDbConfig { max_background_gc: bg_job_limits.max_titan_background_gc as i32, ..Default::default() @@ -5913,61 +5934,95 @@ mod tests { #[test] fn test_background_job_limits() { - // cpu num = 1 + for engine in [EngineType::RaftKv, EngineType::RaftKv2] { + // cpu num = 1 + assert_eq!( + get_background_job_limits_impl( + engine, + 1, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 1, + } + ); + assert_eq!( + get_background_job_limits_impl( + engine, + 1, // cpu_num + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 1, + } + ); + // cpu num = 2 + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, + 2, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 2, + } + ); + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, + 2, // cpu_num + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 2, + } + ); + } + + // cpu num = 4 assert_eq!( get_background_job_limits_impl( - 1, // cpu_num + EngineType::RaftKv, + 4, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 2, - max_background_flushes: 1, - max_sub_compactions: 1, - max_titan_background_gc: 1, - } - ); - assert_eq!( - get_background_job_limits_impl( - 1, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS - ), - BackgroundJobLimits { - max_background_jobs: 2, + max_background_jobs: 3, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 1, + max_titan_background_gc: 4, } ); - // cpu num = 2 assert_eq!( get_background_job_limits_impl( - 2, // cpu_num + EngineType::RaftKv2, + 4, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { max_background_jobs: 2, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 2, - } - ); - assert_eq!( - get_background_job_limits_impl( - 2, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS - ), - BackgroundJobLimits { - max_background_jobs: 2, - max_background_flushes: 1, - max_sub_compactions: 1, - max_titan_background_gc: 2, + max_titan_background_gc: 4, } ); - // cpu num = 4 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 4, // cpu_num - &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { max_background_jobs: 3, @@ -5976,33 +6031,36 @@ mod tests { max_titan_background_gc: 4, } ); + // cpu num = 8 assert_eq!( get_background_job_limits_impl( - 4, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + EngineType::RaftKv, + 8, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 3, - max_background_flushes: 1, - max_sub_compactions: 1, + max_background_jobs: 7, + max_background_flushes: 2, + max_sub_compactions: 3, max_titan_background_gc: 4, } ); - // cpu num = 8 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv2, 8, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 7, + max_background_jobs: 3, max_background_flushes: 2, - max_sub_compactions: 3, + max_sub_compactions: 1, max_titan_background_gc: 4, } ); assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 8, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), @@ -6011,6 +6069,7 @@ mod tests { // cpu num = 16 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 16, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), @@ -6018,6 +6077,20 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv2, + 16, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 5, + max_background_flushes: 3, + max_sub_compactions: 1, + max_titan_background_gc: 4, + } + ); + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, 16, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), From 272fcd04f645479c4fdc265e3083c250796c60df Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:18:25 +0800 Subject: [PATCH 087/203] raftstore-v2: avoid follower forwarding propose msg (#15704) ref tikv/tikv#14390 avoid follower forwarding propose msg Signed-off-by: SpadeA-Tang Co-authored-by: tonyxuqqi --- .../src/operation/command/write/mod.rs | 31 ++---- components/raftstore-v2/src/operation/mod.rs | 4 +- components/raftstore/src/store/fsm/apply.rs | 1 - .../raftstore/src/store/simple_write.rs | 30 +----- tests/failpoints/cases/test_transaction.rs | 101 +++++++++++++++++- 5 files changed, 112 insertions(+), 55 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 6eacc75c0f1a..cc71533a29a8 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -12,7 +12,7 @@ use raftstore::{ fsm::{apply, MAX_PROPOSAL_SIZE_RATIO}, metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, - util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, + util::{self}, }, Error, Result, }; @@ -80,13 +80,10 @@ impl Peer { ch.report_error(resp); return; } - // ProposalControl is reliable only when applied to current term. - let call_proposed_on_success = self.applied_to_current_term(); let mut encoder = SimpleWriteReqEncoder::new( header, data, (ctx.cfg.raft_entry_max_size.0 as f64 * MAX_PROPOSAL_SIZE_RATIO) as usize, - call_proposed_on_success, ); encoder.add_response_channel(ch); self.set_has_ready(); @@ -106,7 +103,6 @@ impl Peer { Box::::default(), data, ctx.cfg.raft_entry_max_size.0 as usize, - false, ) .encode() .0 @@ -118,30 +114,17 @@ impl Peer { pub fn propose_pending_writes(&mut self, ctx: &mut StoreContext) { if let Some(encoder) = self.simple_write_encoder_mut().take() { - let call_proposed_on_success = if encoder.notify_proposed() { - // The request has pass conflict check and called all proposed callbacks. + let header = encoder.header(); + let res = self.validate_command(header, None, &mut ctx.raft_metrics); + let call_proposed_on_success = if matches!(res, Err(Error::EpochNotMatch { .. })) { false } else { - // Epoch may have changed since last check. - let from_epoch = encoder.header().get_region_epoch(); - let res = util::compare_region_epoch( - from_epoch, - self.region(), - NORMAL_REQ_CHECK_CONF_VER, - NORMAL_REQ_CHECK_VER, - true, - ); - if let Err(e) = res { - // TODO: query sibling regions. - ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); - encoder.encode().1.report_error(cmd_resp::new_error(e)); - return; - } - // Only when it applies to current term, the epoch check can be reliable. self.applied_to_current_term() }; + let (data, chs) = encoder.encode(); - let res = self.propose(ctx, data); + let res = res.and_then(|_| self.propose(ctx, data)); + fail_point!("after_propose_pending_writes"); self.post_propose_command(ctx, res, chs, call_proposed_on_success); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 8ce592dd753b..6d5cba9fff81 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -87,7 +87,7 @@ pub mod test_util { let mut header = Box::::default(); header.set_region_id(region_id); header.set_region_epoch(region_epoch); - let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512); let (bin, _) = req_encoder.encode(); let mut e = Entry::default(); e.set_entry_type(EntryType::EntryNormal); @@ -112,7 +112,7 @@ pub mod test_util { let mut header = Box::::default(); header.set_region_id(region_id); header.set_region_epoch(region_epoch); - let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512); let (bin, _) = req_encoder.encode(); let mut e = Entry::default(); e.set_entry_type(EntryType::EntryNormal); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c170e5a35f98..038171d97151 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5745,7 +5745,6 @@ mod tests { self.header.clone(), bin, 1000, - false, ); let (bytes, _) = req_encoder.encode(); self.entry.set_data(bytes.into()); diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index a303a5869356..dd461e618676 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -49,7 +49,6 @@ where channels: Vec, size_limit: usize, write_type: WriteType, - notify_proposed: bool, } impl SimpleWriteReqEncoder @@ -57,14 +56,10 @@ where C: ErrorCallback + WriteCallback, { /// Create a request encoder. - /// - /// If `notify_proposed` is true, channels will be called `notify_proposed` - /// when it's appended. pub fn new( header: Box, bin: SimpleWriteBinary, size_limit: usize, - notify_proposed: bool, ) -> SimpleWriteReqEncoder { let mut buf = Vec::with_capacity(256); buf.push(MAGIC_PREFIX); @@ -77,7 +72,6 @@ where channels: vec![], size_limit, write_type: bin.write_type, - notify_proposed, } } @@ -112,18 +106,10 @@ where } #[inline] - pub fn add_response_channel(&mut self, mut ch: C) { - if self.notify_proposed { - ch.notify_proposed(); - } + pub fn add_response_channel(&mut self, ch: C) { self.channels.push(ch); } - #[inline] - pub fn notify_proposed(&self) -> bool { - self.notify_proposed - } - #[inline] pub fn header(&self) -> &RaftRequestHeader { &self.header @@ -558,7 +544,6 @@ mod tests { header.clone(), bin, usize::MAX, - false, ); let mut encoder = SimpleWriteEncoder::with_capacity(512); @@ -570,7 +555,6 @@ mod tests { header.clone(), bin, 0, - false, ); let (bytes, _) = req_encoder.encode(); @@ -619,9 +603,8 @@ mod tests { .collect(); encoder.ingest(exp.clone()); let bin = encoder.encode(); - let req_encoder = SimpleWriteReqEncoder::>::new( - header, bin, 0, false, - ); + let req_encoder = + SimpleWriteReqEncoder::>::new(header, bin, 0); let (bytes, _) = req_encoder.encode(); let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); @@ -683,7 +666,6 @@ mod tests { header.clone(), bin.clone(), 512, - false, ); let mut header2 = Box::::default(); @@ -700,7 +682,6 @@ mod tests { header.clone(), bin2.clone(), 512, - false, ); assert!(!req_encoder2.amend(&header, &bin)); @@ -735,7 +716,6 @@ mod tests { header.clone(), SimpleWriteEncoder::with_capacity(512).encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); assert_eq!( @@ -753,7 +733,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -771,7 +750,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -788,7 +766,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -816,7 +793,6 @@ mod tests { header, encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 14f4161c7ae1..0b6e6269e959 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -2,6 +2,7 @@ use std::{ sync::{ + atomic::{AtomicBool, Ordering}, mpsc::{channel, sync_channel}, Arc, Mutex, }, @@ -9,13 +10,15 @@ use std::{ time::Duration, }; -use futures::executor::block_on; +use engine_traits::CF_DEFAULT; +use futures::{executor::block_on, StreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{ self as pb, AssertionLevel, Context, GetRequest, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, }, + raft_serverpb::RaftMessage, tikvpb::TikvClient, }; use raft::prelude::{ConfChangeType, MessageType}; @@ -45,7 +48,9 @@ use tikv::{ Snapshot, TestEngineBuilder, TestStorageBuilderApiV1, }, }; +use tikv_kv::{Engine, Modify, WriteData, WriteEvent}; use tikv_util::{ + config::ReadableDuration, store::{new_peer, peer::new_incoming_voter}, HandyRwLock, }; @@ -803,3 +808,97 @@ fn test_next_last_change_info_called_when_gc() { assert_eq!(h.join().unwrap().unwrap().as_slice(), b"v"); } + +fn must_put(ctx: &Context, engine: &E, key: &[u8], value: &[u8]) { + engine.put(ctx, Key::from_raw(key), value.to_vec()).unwrap(); +} + +fn must_delete(ctx: &Context, engine: &E, key: &[u8]) { + engine.delete(ctx, Key::from_raw(key)).unwrap(); +} + +// Before the fix, a proposal can be proposed twice, which is caused by that +// write proposal validation and propose are not atomic. So a raft message with +// higher term between them can make the proposal goes to msg proposal +// forwarding logic. However, raft proposal forawrd logic is not compatible with +// the raft store, as the failed proposal makes client retry. The retried +// proposal coupled with forward proposal makes the propsal applied twice. +#[test] +fn test_forbid_forward_propose() { + use test_raftstore_v2::*; + let count = 3; + let mut cluster = new_server_cluster(0, count); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.run(); + + let region = cluster.get_region(b""); + let peer1 = new_peer(1, 1); + let peer2 = new_peer(2, 2); + cluster.must_transfer_leader(region.id, peer2.clone()); + let storage = cluster.sim.rl().storages[&1].clone(); + let storage2 = cluster.sim.rl().storages[&2].clone(); + + let p = Arc::new(AtomicBool::new(false)); + let p2 = p.clone(); + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + cluster.add_recv_filter_on_node( + 2, + Box::new(DropMessageFilter::new(Arc::new(move |_| { + if p2.load(Ordering::Relaxed) { + tx.lock().unwrap().send(()).unwrap(); + // One msg is enough + p2.store(false, Ordering::Relaxed); + true + } else { + false + } + }))), + ); + + let k = Key::from_raw(b"k"); + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(peer2); + + // block node when collecting message to make async write proposal and a raft + // message with higher term occured in a single batch. + fail::cfg("on_peer_collect_message_2", "pause").unwrap(); + let mut res = storage2.async_write( + &ctx, + WriteData::from_modifies(vec![Modify::Put(CF_DEFAULT, k.clone(), b"val".to_vec())]), + WriteEvent::EVENT_PROPOSED, + None, + ); + + // Make node 1 become leader + let router = cluster.get_router(1).unwrap(); + let mut raft_msg = RaftMessage::default(); + raft_msg.set_region_id(1); + raft_msg.set_to_peer(peer1.clone()); + raft_msg.set_region_epoch(region.get_region_epoch().clone()); + raft_msg + .mut_message() + .set_msg_type(MessageType::MsgTimeoutNow); + router.send_raft_message(Box::new(raft_msg)).unwrap(); + + std::thread::sleep(Duration::from_secs(1)); + + ctx.set_peer(peer1); + must_put(&ctx, &storage, b"k", b"val"); + must_delete(&ctx, &storage, b"k"); + + p.store(true, Ordering::Release); + rx.recv().unwrap(); + // Ensure the msg is sent by router. + std::thread::sleep(Duration::from_millis(100)); + fail::remove("on_peer_collect_message_2"); + + let r = block_on(async { res.next().await }).unwrap(); + assert!(matches!(r, WriteEvent::Finished(Err { .. }))); + + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(cluster.get(k.as_encoded()), None); +} From b3ffab6d4e4fc3278eec51df88b8571724ba12c5 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 12 Oct 2023 19:34:56 +0800 Subject: [PATCH 088/203] [Dynamic Regions] Supplement extra uts for test_storage.rs. (#15750) ref tikv/tikv#15409 This pr contains several necessary uts and some enhancements for `raftstore-v2`: - Supply extra test cases, including integration tests and unit tests for raftstore-v2 on `storage`. - Transplant the necessary options on setting `deadline` for `SimpleWrite` in raftstore-v2. Signed-off-by: lucasliang --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../src/operation/command/write/mod.rs | 24 ++++-- .../raftstore-v2/src/operation/txn_ext.rs | 9 ++- components/raftstore-v2/src/router/message.rs | 11 ++- components/test_raftstore-v2/src/cluster.rs | 11 ++- src/server/raftkv2/mod.rs | 50 +++++++++---- tests/failpoints/cases/test_storage.rs | 73 +++++++++++-------- 7 files changed, 117 insertions(+), 63 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 6896f8caa5e8..1734b46b25a7 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -267,7 +267,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, write.header, write.data, write.ch, - Some(write.disk_full_opt), + Some(write.extra_opts), ); } PeerMsg::UnsafeWrite(write) => { diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index cc71533a29a8..5806614e1929 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -5,7 +5,7 @@ use engine_traits::{ }; use fail::fail_point; use futures::channel::oneshot; -use kvproto::{kvrpcpb::DiskFullOpt, raft_cmdpb::RaftRequestHeader}; +use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ cmd_resp, @@ -13,6 +13,7 @@ use raftstore::{ metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, util::{self}, + RaftCmdExtraOpts, }, Error, Result, }; @@ -42,7 +43,7 @@ impl Peer { header: Box, data: SimpleWriteBinary, ch: CmdResChannel, - disk_full_opt: Option, + extra_opts: Option, ) { if !self.serving() { apply::notify_req_region_removed(self.region_id(), ch); @@ -60,12 +61,19 @@ impl Peer { ch.report_error(resp); return; } - // Check whether the write request can be proposed with the given disk full - // option. - if let Some(opt) = disk_full_opt && let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opt) { - let resp = cmd_resp::new_error(e); - ch.report_error(resp); - return; + if let Some(opts) = extra_opts { + if let Some(Err(e)) = opts.deadline.map(|deadline| deadline.check()) { + let resp = cmd_resp::new_error(e.into()); + ch.report_error(resp); + return; + } + // Check whether the write request can be proposed with the given disk full + // option. + if let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opts.disk_full_opt) { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } } // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 4c875a675ef2..6a379b9a1a2d 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -17,7 +17,7 @@ use kvproto::{ use parking_lot::RwLockWriteGuard; use raft::eraftpb; use raftstore::store::{ - LocksStatus, PeerPessimisticLocks, TxnExt, TRANSFER_LEADER_COMMAND_REPLY_CTX, + LocksStatus, PeerPessimisticLocks, RaftCmdExtraOpts, TxnExt, TRANSFER_LEADER_COMMAND_REPLY_CTX, }; use slog::{error, info, Logger}; @@ -270,13 +270,16 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), DiskFullOpt::AllowedOnAlmostFull).0 else {unreachable!()}; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), RaftCmdExtraOpts { + disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, + ..Default::default() + }).0 else {unreachable!()}; self.on_simple_write( ctx, write.header, write.data, write.ch, - Some(write.disk_full_opt), + Some(write.extra_opts), ); true } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 830286bb1425..c9da5241fa88 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -6,7 +6,6 @@ use std::sync::{mpsc::SyncSender, Arc}; use collections::HashSet; use kvproto::{ import_sstpb::SstMeta, - kvrpcpb::DiskFullOpt, metapb, metapb::RegionEpoch, pdpb, @@ -15,7 +14,7 @@ use kvproto::{ }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, - util::LatencyInspector, FetchedLogs, GenSnapRes, TabletSnapKey, + util::LatencyInspector, FetchedLogs, GenSnapRes, RaftCmdExtraOpts, TabletSnapKey, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, }; @@ -135,7 +134,7 @@ pub struct SimpleWrite { pub header: Box, pub data: SimpleWriteBinary, pub ch: CmdResChannel, - pub disk_full_opt: DiskFullOpt, + pub extra_opts: RaftCmdExtraOpts, } #[derive(Debug)] @@ -299,13 +298,13 @@ impl PeerMsg { header: Box, data: SimpleWriteBinary, ) -> (Self, CmdResSubscriber) { - PeerMsg::simple_write_with_opt(header, data, DiskFullOpt::default()) + PeerMsg::simple_write_with_opt(header, data, RaftCmdExtraOpts::default()) } pub fn simple_write_with_opt( header: Box, data: SimpleWriteBinary, - disk_full_opt: DiskFullOpt, + extra_opts: RaftCmdExtraOpts, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -314,7 +313,7 @@ impl PeerMsg { header, data, ch, - disk_full_opt, + extra_opts, }), sub, ) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 496f8cc87dc2..53ff2c0f0b62 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -69,6 +69,9 @@ use tikv_util::{ }; use txn_types::WriteBatchFlags; +// MAX duration waiting for releasing store metas, default: 10s. +const MAX_WAIT_RELEASE_INTERVAL: u32 = 1000; + // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id // isn't allocated by pd, and node id, store id are same. @@ -328,7 +331,7 @@ pub trait Simulator { PeerMsg::simple_write_with_opt( Box::new(request.take_header()), write_encoder.encode(), - opts.disk_full_opt, + opts, ) }; @@ -1874,15 +1877,17 @@ impl, EK: KvEngine> Cluster { } self.leaders.clear(); for store_meta in self.store_metas.values() { - while Arc::strong_count(store_meta) != 1 { + // Limits the loop count of checking. + let mut idx = 0; + while Arc::strong_count(store_meta) != 1 && idx < MAX_WAIT_RELEASE_INTERVAL { std::thread::sleep(Duration::from_millis(10)); + idx += 1; } } self.store_metas.clear(); for sst_worker in self.sst_workers.drain(..) { sst_worker.stop_worker(); } - debug!("all nodes are shut down."); } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index a80cdda392f8..a9f7eb7586e5 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -19,7 +19,13 @@ use kvproto::{ }; pub use node::NodeV2; pub use raft_extension::Extension; -use raftstore::store::{util::encode_start_ts_into_flag_data, RegionSnapshot}; +use raftstore::{ + store::{ + cmd_resp, msg::ErrorCallback, util::encode_start_ts_into_flag_data, RaftCmdExtraOpts, + RegionSnapshot, + }, + Error, +}; use raftstore_v2::{ router::{ message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, @@ -265,6 +271,17 @@ impl tikv_kv::Engine for RaftKv2 { let region_id = ctx.region_id; ASYNC_REQUESTS_COUNTER_VEC.write.all.inc(); + + let inject_region_not_found = (|| { + // If rid is some, only the specified region reports error. + // If rid is None, all regions report error. + fail_point!("raftkv_early_error_report", |rid| -> bool { + rid.and_then(|rid| rid.parse().ok()) + .map_or(true, |rid: u64| rid == region_id) + }); + false + })(); + let begin_instant = Instant::now_coarse(); let mut header = Box::new(new_request_header(ctx)); let mut flags = 0; @@ -299,18 +316,25 @@ impl tikv_kv::Engine for RaftKv2 { }); } let (ch, sub) = builder.build(); - let msg = PeerMsg::SimpleWrite(SimpleWrite { - header, - data, - ch, - send_time: Instant::now_coarse(), - disk_full_opt: batch.disk_full_opt, - }); - let res = self - .router - .store_router() - .check_send(region_id, msg) - .map_err(tikv_kv::Error::from); + let res = if inject_region_not_found { + ch.report_error(cmd_resp::new_error(Error::RegionNotFound(region_id))); + Err(tikv_kv::Error::from(Error::RegionNotFound(region_id))) + } else { + let msg = PeerMsg::SimpleWrite(SimpleWrite { + header, + data, + ch, + send_time: Instant::now_coarse(), + extra_opts: RaftCmdExtraOpts { + deadline: batch.deadline, + disk_full_opt: batch.disk_full_opt, + }, + }); + self.router + .store_router() + .check_send(region_id, msg) + .map_err(tikv_kv::Error::from) + }; (Transform { resp: CmdResStream::new(sub), early_err: res.err(), diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 57047bef9d41..533d8d0abd4b 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -24,6 +24,7 @@ use kvproto::{ }; use resource_control::ResourceGroupManager; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::{ config::{ConfigController, Module}, storage::{ @@ -44,10 +45,11 @@ use tikv::{ use tikv_util::{future::paired_future_callback, worker::dummy_scheduler, HandyRwLock}; use txn_types::{Key, Mutation, TimeStamp}; -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_scheduler_leader_change_twice() { let snapshot_fp = "scheduler_async_snapshot_finish"; - let mut cluster = new_server_cluster(0, 2); + let mut cluster = new_cluster(0, 2); cluster.run(); let region0 = cluster.get_region(b""); let peers = region0.get_peers(); @@ -108,10 +110,11 @@ fn test_scheduler_leader_change_twice() { } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_server_catching_api_error() { let raftkv_fp = "raftkv_early_error_report"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); @@ -168,10 +171,11 @@ fn test_server_catching_api_error() { must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_raftkv_early_error_report() { let raftkv_fp = "raftkv_early_error_report"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); cluster.must_split(&cluster.get_region(b"k0"), b"k1"); @@ -233,10 +237,12 @@ fn test_raftkv_early_error_report() { fail::remove(raftkv_fp); } -#[test] +// FIXME: #[test_case(test_raftstore_v2::new_server_cluster)] +// Raftstore-v2 not support get the storage engine, returning `None` currently. +#[test_case(test_raftstore::new_server_cluster)] fn test_scale_scheduler_pool() { let snapshot_fp = "scheduler_start_execute"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let origin_pool_size = cluster.cfg.storage.scheduler_worker_pool_size; @@ -332,9 +338,10 @@ fn test_scale_scheduler_pool() { fail::remove(snapshot_fp); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_scheduler_pool_auto_switch_for_resource_ctl() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1090,9 +1097,10 @@ fn test_async_apply_prewrite_impl( } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1149,7 +1157,6 @@ fn test_async_apply_prewrite() { true, true, ); - test_async_apply_prewrite_impl( &storage, ctx.clone(), @@ -1188,9 +1195,10 @@ fn test_async_apply_prewrite() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite_fallback() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1378,9 +1386,10 @@ fn test_async_apply_prewrite_1pc_impl( } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite_1pc() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1405,9 +1414,10 @@ fn test_async_apply_prewrite_1pc() { test_async_apply_prewrite_1pc_impl(&storage, ctx, b"key", b"value2", 20, true); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_atomic_cas_lock_by_latch() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1493,9 +1503,10 @@ fn test_atomic_cas_lock_by_latch() { assert_eq!(b"v2".to_vec(), ret); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_before_async_write_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1532,12 +1543,13 @@ fn test_before_async_write_deadline() { )); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_deadline_exceeded_on_get_and_batch_get() { use tikv_util::time::Instant; use tracker::INVALID_TRACKER_TOKEN; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1591,9 +1603,10 @@ fn test_deadline_exceeded_on_get_and_batch_get() { fail::remove("after-snapshot"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_before_propose_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); @@ -1629,9 +1642,10 @@ fn test_before_propose_deadline() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_resolve_lock_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); @@ -1789,10 +1803,11 @@ fn test_mvcc_concurrent_commit_and_rollback_at_shutdown() { assert_eq!(get_resp.value, v); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_raw_put_deadline() { let deadline_fp = "deadline_check_fail"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); From b4e0bf7bab6ad395b74c0be938119d82ded4cd2a Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 12 Oct 2023 21:16:56 -0500 Subject: [PATCH 089/203] raftstore: Introduce failed state for unsafe recovery to fix rollback merge timeout (#15635) close tikv/tikv#15629 Introduce failed state for unsafe recovery to fix rollback merge timeout. To rollback merge, it has to be in force leader state when performing online recovery. Force leader state would exit after executing the plan no matter succeeded or failed. While rollback merge is triggered on check merge tick periodically. So there is a chance that check merge can't always be in the time window of being force leader state. To solve that, let it skip exiting force leader state when failed to demote, so later rollback merge can be triggered. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 7 ++- .../src/operation/unsafe_recovery/demote.rs | 2 + .../operation/unsafe_recovery/force_leader.rs | 15 ++++- .../src/operation/unsafe_recovery/report.rs | 20 ++++-- components/raftstore/src/store/fsm/peer.rs | 63 +++++++++++++------ .../raftstore/src/store/unsafe_recovery.rs | 16 +++-- .../failpoints/cases/test_unsafe_recovery.rs | 44 ++++++++++++- 7 files changed, 132 insertions(+), 35 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 1734b46b25a7..94506a8a19f3 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -382,9 +382,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, syncer, failed_stores, ), - PeerMsg::ExitForceLeaderState => { - self.fsm.peer_mut().on_exit_force_leader(self.store_ctx) - } + PeerMsg::ExitForceLeaderState => self + .fsm + .peer_mut() + .on_exit_force_leader(self.store_ctx, false), PeerMsg::ExitForceLeaderStateCampaign => { self.fsm.peer_mut().on_exit_force_leader_campaign() } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 37962a454527..20a42b9f9784 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -75,6 +75,7 @@ impl Peer { "Unsafe recovery, fail to finish demotion"; "err" => ?resp.get_header().get_error(), ); + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::Failed); return; } *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::DemoteFailedVoters { @@ -129,6 +130,7 @@ impl Peer { "Unsafe recovery, fail to exit joint state"; "err" => ?resp.get_header().get_error(), ); + *self.unsafe_recovery_state_mut()= Some(UnsafeRecoveryState::Failed); } } else { error!(self.logger, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs index ba7e391dbeff..e6af0fddb7b5 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs @@ -5,7 +5,9 @@ use std::mem; use collections::HashSet; use engine_traits::{KvEngine, RaftEngine}; use raft::{eraftpb::MessageType, StateRole, Storage}; -use raftstore::store::{util::LeaseState, ForceLeaderState, UnsafeRecoveryForceLeaderSyncer}; +use raftstore::store::{ + util::LeaseState, ForceLeaderState, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, +}; use slog::{info, warn}; use tikv_util::time::Instant as TiInstant; @@ -182,11 +184,20 @@ impl Peer { self.set_has_ready(); } - pub fn on_exit_force_leader(&mut self, ctx: &StoreContext) { + // TODO: add exit force leader check tick for raftstore v2 + pub fn on_exit_force_leader(&mut self, ctx: &StoreContext, force: bool) { if !self.has_force_leader() { return; } + if let Some(UnsafeRecoveryState::Failed) = self.unsafe_recovery_state() && !force { + // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding + info!( + self.logger, "skip exiting force leader state" + ); + return; + } + info!(self.logger, "exit force leader state"); *self.force_leader_mut() = None; // leader lease shouldn't be renewed in force leader state. diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 7173d00363ad..90c8e3db34d3 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -44,11 +44,19 @@ impl Peer { self.raft_group().raft.raft_log.committed }; - *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitApply { - target_index, - syncer, - }); - self.unsafe_recovery_maybe_finish_wait_apply(!self.serving()); + if target_index > self.raft_group().raft.raft_log.applied { + info!( + self.logger, + "Unsafe recovery, start wait apply"; + "target_index" => target_index, + "applied" => self.raft_group().raft.raft_log.applied, + ); + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitApply { + target_index, + syncer, + }); + self.unsafe_recovery_maybe_finish_wait_apply(!self.serving()); + } } pub fn unsafe_recovery_maybe_finish_wait_apply(&mut self, force: bool) { @@ -113,7 +121,7 @@ impl Peer { Some(UnsafeRecoveryState::DemoteFailedVoters { .. }) => { self.unsafe_recovery_maybe_finish_demote_failed_voters(ctx) } - Some(UnsafeRecoveryState::Destroy(_)) | None => {} + Some(UnsafeRecoveryState::Destroy(_)) | Some(UnsafeRecoveryState::Failed) | None => {} } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index b6d7f8fcfcc3..584db92e8be1 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -824,6 +824,8 @@ where target_index: self.fsm.peer.raft_group.raft.raft_log.last_index(), demote_after_exit: true, }); + } else { + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::Failed); } } else { self.unsafe_recovery_demote_failed_voters(syncer, failed_voters); @@ -863,6 +865,8 @@ where target_index: self.fsm.peer.raft_group.raft.raft_log.last_index(), demote_after_exit: false, }); + } else { + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::Failed); } } else { warn!( @@ -913,13 +917,22 @@ where self.fsm.peer.raft_group.raft.raft_log.committed }; - self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::WaitApply { - target_index, - syncer, - }); - self.fsm - .peer - .unsafe_recovery_maybe_finish_wait_apply(/* force= */ self.fsm.stopped); + if target_index > self.fsm.peer.raft_group.raft.raft_log.applied { + info!( + "Unsafe recovery, start wait apply"; + "region_id" => self.region().get_id(), + "peer_id" => self.fsm.peer_id(), + "target_index" => target_index, + "applied" => self.fsm.peer.raft_group.raft.raft_log.applied, + ); + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::WaitApply { + target_index, + syncer, + }); + self.fsm + .peer + .unsafe_recovery_maybe_finish_wait_apply(/* force= */ self.fsm.stopped); + } } // func be invoked firstly after assigned leader by BR, wait all leader apply to @@ -1466,7 +1479,7 @@ where } => { self.on_enter_pre_force_leader(syncer, failed_stores); } - SignificantMsg::ExitForceLeaderState => self.on_exit_force_leader(), + SignificantMsg::ExitForceLeaderState => self.on_exit_force_leader(false), SignificantMsg::UnsafeRecoveryDemoteFailedVoters { syncer, failed_voters, @@ -1700,10 +1713,19 @@ where self.fsm.has_ready = true; } - fn on_exit_force_leader(&mut self) { + fn on_exit_force_leader(&mut self, force: bool) { if self.fsm.peer.force_leader.is_none() { return; } + if let Some(UnsafeRecoveryState::Failed) = self.fsm.peer.unsafe_recovery_state && !force { + // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding + info!( + "skip exiting force leader state"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + ); + return; + } info!( "exit force leader state"; @@ -1712,7 +1734,7 @@ where ); self.fsm.peer.force_leader = None; // make sure it's not hibernated - assert_eq!(self.fsm.hibernate_state.group_state(), GroupState::Ordered); + assert_ne!(self.fsm.hibernate_state.group_state(), GroupState::Idle); // leader lease shouldn't be renewed in force leader state. assert_eq!( self.fsm.peer.leader_lease().inspect(None), @@ -2274,7 +2296,10 @@ where } } // Destroy does not need be processed, the state is cleaned up together with peer. - Some(_) | None => {} + Some(UnsafeRecoveryState::Destroy { .. }) + | Some(UnsafeRecoveryState::Failed) + | Some(UnsafeRecoveryState::WaitInitialize(..)) + | None => {} } } @@ -6360,13 +6385,6 @@ where return; } - if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { - // Clean up the force leader state after a timeout, since the PD recovery - // process may have been aborted for some reasons. - if time.saturating_elapsed() > UNSAFE_RECOVERY_STATE_TIMEOUT { - self.on_exit_force_leader(); - } - } if let Some(state) = &mut self.fsm.peer.unsafe_recovery_state { let unsafe_recovery_state_timeout_failpoint = || -> bool { fail_point!("unsafe_recovery_state_timeout", |_| true); @@ -6379,6 +6397,15 @@ where { info!("timeout, abort unsafe recovery"; "state" => ?state); state.abort(); + self.fsm.peer.unsafe_recovery_state = None; + } + } + + if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { + // Clean up the force leader state after a timeout, since the PD recovery + // process may have been aborted for some reasons. + if time.saturating_elapsed() > UNSAFE_RECOVERY_STATE_TIMEOUT { + self.on_exit_force_leader(true); } } diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index f98fcaea5813..28943ae73397 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -241,7 +241,7 @@ pub struct UnsafeRecoveryForceLeaderSyncer(Arc); impl UnsafeRecoveryForceLeaderSyncer { pub fn new(report_id: u64, router: Arc) -> Self { let inner = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, force leader finished."); + info!("Unsafe recovery, force leader finished."; "report_id" => report_id); start_unsafe_recovery_report(router, report_id, false); }))); UnsafeRecoveryForceLeaderSyncer(Arc::new(inner)) @@ -260,11 +260,11 @@ impl UnsafeRecoveryExecutePlanSyncer { let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, plan execution finished"); if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, plan execution aborted"); + warn!("Unsafe recovery, plan execution aborted"; "report_id" => report_id); return; } + info!("Unsafe recovery, plan execution finished"; "report_id" => report_id); start_unsafe_recovery_report(router, report_id, true); }))); UnsafeRecoveryExecutePlanSyncer { @@ -330,7 +330,7 @@ impl UnsafeRecoveryWaitApplySyncer { let abort_clone = abort.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, wait apply aborted"); + warn!("Unsafe recovery, wait apply aborted"; "report_id" => report_id); return; } info!("Unsafe recovery, wait apply finished"); @@ -363,7 +363,7 @@ impl UnsafeRecoveryFillOutReportSyncer { let reports = Arc::new(Mutex::new(vec![])); let reports_clone = reports.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, peer reports collected"); + info!("Unsafe recovery, peer reports collected"; "report_id" => report_id); let mut store_report = StoreReport::default(); { let mut reports_ptr = reports_clone.lock().unwrap(); @@ -420,6 +420,9 @@ pub enum UnsafeRecoveryState { }, Destroy(UnsafeRecoveryExecutePlanSyncer), WaitInitialize(UnsafeRecoveryExecutePlanSyncer), + // DemoteFailedVoter may fail due to some reasons. It's just a marker to avoid exiting force + // leader state + Failed, } impl UnsafeRecoveryState { @@ -429,6 +432,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.time, + UnsafeRecoveryState::Failed => return false, }; time.saturating_elapsed() >= timeout } @@ -439,6 +443,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => &syncer.abort, + UnsafeRecoveryState::Failed => return true, }; *abort.lock().unwrap() } @@ -449,6 +454,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.abort(), + UnsafeRecoveryState::Failed => (), } } } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 978489b5cd68..9e5a5dffcd94 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -458,7 +458,7 @@ fn test_unsafe_recovery_rollback_merge() { } // Block merge commit, let go of the merge prepare. - fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + fail::cfg("on_schedule_merge", "return()").unwrap(); let region = pd_client.get_region(b"k1").unwrap(); cluster.must_split(®ion, b"k2"); @@ -521,6 +521,48 @@ fn test_unsafe_recovery_rollback_merge() { pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); cluster.must_send_store_heartbeat(nodes[0]); + // Can't propose demotion as it's in merging mode + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + let has_force_leader = store_report + .unwrap() + .get_peer_reports() + .iter() + .any(|p| p.get_is_force_leader()); + // Force leader is not exited due to demotion failure + assert!(has_force_leader); + + fail::remove("on_schedule_merge"); + fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + + // Make sure merge check is scheduled, and rollback merge is triggered + sleep_ms(50); + + // Re-triggers the unsafe recovery plan execution. + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); + cluster.must_send_store_heartbeat(nodes[0]); + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + // No force leader + for peer_report in store_report.unwrap().get_peer_reports() { + assert!(!peer_report.get_is_force_leader()); + } + + // Demotion is done let mut demoted = false; for _ in 0..10 { let new_left = block_on(pd_client.get_region_by_id(left.get_id())) From cb27f24b89c8107c9ead57be5016ee779996ac25 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 16 Oct 2023 12:36:27 +0800 Subject: [PATCH 090/203] retry leader read when stale read encounters data not ready (#15726) ref tikv/tikv#14553 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore/src/store/worker/metrics.rs | 20 ++ components/raftstore/src/store/worker/read.rs | 330 +++++++++++++++--- tests/failpoints/cases/test_kv_service.rs | 57 ++- .../cases/test_replica_stale_read.rs | 18 +- 4 files changed, 358 insertions(+), 67 deletions(-) diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index fd3f54d239d0..8dca3bcfd443 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -70,6 +70,8 @@ make_static_metric! { pub struct LocalReadMetrics { pub local_executed_requests: LocalIntCounter, pub local_executed_stale_read_requests: LocalIntCounter, + pub local_executed_stale_read_fallback_success_requests: LocalIntCounter, + pub local_executed_stale_read_fallback_failure_requests: LocalIntCounter, pub local_executed_replica_read_requests: LocalIntCounter, pub local_executed_snapshot_cache_hit: LocalIntCounter, pub reject_reason: LocalReadRejectCounter, @@ -82,6 +84,8 @@ thread_local! { LocalReadMetrics { local_executed_requests: LOCAL_READ_EXECUTED_REQUESTS.local(), local_executed_stale_read_requests: LOCAL_READ_EXECUTED_STALE_READ_REQUESTS.local(), + local_executed_stale_read_fallback_success_requests: LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_SUCCESS_REQUESTS.local(), + local_executed_stale_read_fallback_failure_requests: LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_FAILURE_REQUESTS.local(), local_executed_replica_read_requests: LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS.local(), local_executed_snapshot_cache_hit: LOCAL_READ_EXECUTED_CACHE_REQUESTS.local(), reject_reason: LocalReadRejectCounter::from(&LOCAL_READ_REJECT_VEC), @@ -100,6 +104,10 @@ pub fn maybe_tls_local_read_metrics_flush() { if m.last_flush_time.saturating_elapsed() >= Duration::from_millis(METRICS_FLUSH_INTERVAL) { m.local_executed_requests.flush(); m.local_executed_stale_read_requests.flush(); + m.local_executed_stale_read_fallback_success_requests + .flush(); + m.local_executed_stale_read_fallback_failure_requests + .flush(); m.local_executed_replica_read_requests.flush(); m.local_executed_snapshot_cache_hit.flush(); m.reject_reason.flush(); @@ -189,6 +197,18 @@ lazy_static! { "Total number of stale read requests directly executed by local reader." ) .unwrap(); + pub static ref LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_SUCCESS_REQUESTS: IntCounter = + register_int_counter!( + "tikv_raftstore_local_read_executed_stale_read_fallback_success_requests", + "Total number of stale read requests executed by local leader peer as snapshot read." + ) + .unwrap(); + pub static ref LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_FAILURE_REQUESTS: IntCounter = + register_int_counter!( + "tikv_raftstore_local_read_executed_stale_read_fallback_failure_requests", + "Total number of stale read requests failed to be executed by local leader peer as snapshot read." + ) + .unwrap(); pub static ref LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS: IntCounter = register_int_counter!( "tikv_raftstore_local_read_executed_replica_read_requests", "Total number of stale read requests directly executed by local reader." diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5d6ede9c1936..2d54c00baa69 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -30,7 +30,7 @@ use tikv_util::{ }; use time::Timespec; use tracker::GLOBAL_TRACKERS; -use txn_types::TimeStamp; +use txn_types::{TimeStamp, WriteBatchFlags}; use super::metrics::*; use crate::{ @@ -974,80 +974,155 @@ where cmd.callback.set_result(read_resp); } + /// Try to handle the read request using local read, if the leader is valid + /// the read response is returned, otherwise None is returned. + fn try_local_leader_read( + &mut self, + req: &RaftCmdRequest, + delegate: &mut CachedReadDelegate, + read_id: Option, + snap_updated: &mut bool, + last_valid_ts: Timespec, + ) -> Option> { + let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, read_id); + + (*snap_updated) = + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + + let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); + if !delegate.is_in_leader_lease(snapshot_ts) { + return None; + } + + let region = Arc::clone(&delegate.region); + let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } + // Try renew lease in advance + delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); + Some(response) + } + + /// Try to handle the stale read request, if the read_ts < safe_ts the read + /// response is returned, otherwise the raft command response with + /// `DataIsNotReady` error is returned. + fn try_local_stale_read( + &mut self, + req: &RaftCmdRequest, + delegate: &mut CachedReadDelegate, + snap_updated: &mut bool, + last_valid_ts: Timespec, + ) -> std::result::Result, RaftCmdResponse> { + let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); + delegate.check_stale_read_safe(read_ts)?; + + // Stale read does not use cache, so we pass None for read_id + let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); + (*snap_updated) = + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + + let region = Arc::clone(&delegate.region); + // Getting the snapshot + let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } + // Double check in case `safe_ts` change after the first check and before + // getting snapshot + delegate.check_stale_read_safe(read_ts)?; + + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); + Ok(response) + } + pub fn propose_raft_command( &mut self, read_id: Option, - req: RaftCmdRequest, + mut req: RaftCmdRequest, cb: Callback, ) { match self.pre_propose_raft_command(&req) { Ok(Some((mut delegate, policy))) => { - let snap_updated; + let mut snap_updated = false; let last_valid_ts = delegate.last_valid_ts; let mut response = match policy { // Leader can read local if and only if it is in lease. RequestPolicy::ReadLocal => { - let mut local_read_ctx = - LocalReadContext::new(&mut self.snap_cache, read_id); - - snap_updated = local_read_ctx - .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); - - let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); - if !delegate.is_in_leader_lease(snapshot_ts) { + if let Some(read_resp) = self.try_local_leader_read( + &req, + &mut delegate, + read_id, + &mut snap_updated, + last_valid_ts, + ) { + read_resp + } else { fail_point!("localreader_before_redirect", |_| {}); // Forward to raftstore. self.redirect(RaftCommand::new(req, cb)); return; } - - let region = Arc::clone(&delegate.region); - let mut response = - delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - if let Some(snap) = response.snapshot.as_mut() { - snap.bucket_meta = delegate.bucket_meta.clone(); - } - // Try renew lease in advance - delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); - response } // Replica can serve stale read if and only if its `safe_ts` >= `read_ts` RequestPolicy::StaleRead => { - let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); - if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.set_result(ReadResponse { - response: resp, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }); - return; + match self.try_local_stale_read( + &req, + &mut delegate, + &mut snap_updated, + last_valid_ts, + ) { + Ok(read_resp) => read_resp, + Err(err_resp) => { + // It's safe to change the header of the `RaftCmdRequest`, as it + // would not affect the `SnapCtx` used in upper layer like. + let unset_stale_flag = req.get_header().get_flags() + & (!WriteBatchFlags::STALE_READ.bits()); + req.mut_header().set_flags(unset_stale_flag); + let mut inspector = Inspector { + delegate: &delegate, + }; + // The read request could be handled using snapshot read if the + // local peer is a valid leader. + let allow_fallback_leader_read = inspector + .inspect(&req) + .map_or(false, |r| r == RequestPolicy::ReadLocal); + if !allow_fallback_leader_read { + cb.set_result(ReadResponse { + response: err_resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); + return; + } + if let Some(read_resp) = self.try_local_leader_read( + &req, + &mut delegate, + None, + &mut snap_updated, + last_valid_ts, + ) { + TLS_LOCAL_READ_METRICS.with(|m| { + m.borrow_mut() + .local_executed_stale_read_fallback_success_requests + .inc() + }); + read_resp + } else { + TLS_LOCAL_READ_METRICS.with(|m| { + m.borrow_mut() + .local_executed_stale_read_fallback_failure_requests + .inc() + }); + cb.set_result(ReadResponse { + response: err_resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); + return; + } + } } - - // Stale read does not use cache, so we pass None for read_id - let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); - snap_updated = local_read_ctx - .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); - - let region = Arc::clone(&delegate.region); - // Getting the snapshot - let mut response = - delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - if let Some(snap) = response.snapshot.as_mut() { - snap.bucket_meta = delegate.bucket_meta.clone(); - } - // Double check in case `safe_ts` change after the first check and before - // getting snapshot - if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.set_result(ReadResponse { - response: resp, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }); - return; - } - TLS_LOCAL_READ_METRICS - .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); - response } _ => unreachable!(), }; @@ -1598,6 +1673,8 @@ mod tests { read_progress.update_safe_ts(1, 1); assert_eq!(read_progress.safe_ts(), 1); + // Expire lease manually to avoid local retry on leader peer. + lease.expire(); let data = { let mut d = [0u8; 8]; (&mut d[..]).encode_u64(2).unwrap(); @@ -1755,13 +1832,14 @@ mod tests { assert_eq!(kv_engine.path(), tablet.path()); } - fn prepare_read_delegate( + fn prepare_read_delegate_with_lease( store_id: u64, region_id: u64, term: u64, pr_ids: Vec, region_epoch: RegionEpoch, store_meta: Arc>, + max_lease: Duration, ) { let mut region = metapb::Region::default(); region.set_id(region_id); @@ -1770,7 +1848,7 @@ mod tests { let leader = prs[0].clone(); region.set_region_epoch(region_epoch); - let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. + let mut lease = Lease::new(max_lease, Duration::milliseconds(250)); // 1s is long enough. let read_progress = Arc::new(RegionReadProgress::new(®ion, 1, 1, 1)); // Register region @@ -1799,6 +1877,25 @@ mod tests { } } + fn prepare_read_delegate( + store_id: u64, + region_id: u64, + term: u64, + pr_ids: Vec, + region_epoch: RegionEpoch, + store_meta: Arc>, + ) { + prepare_read_delegate_with_lease( + store_id, + region_id, + term, + pr_ids, + region_epoch, + store_meta, + Duration::seconds(1), + ) + } + #[test] fn test_snap_across_regions() { let store_id = 2; @@ -2165,4 +2262,123 @@ mod tests { must_not_redirect(&mut reader, &rx, task); notify_rx.recv().unwrap(); } + + #[test] + fn test_stale_read_local_leader_fallback() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx) = new_reader( + "test-stale-local-leader-fallback", + store_id, + store_meta.clone(), + ); + reader.kv_engine.put(b"key", b"value").unwrap(); + + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let term6 = 6; + + // Register region1. + let pr_ids1 = vec![2, 3, 4]; + let prs1 = new_peers(store_id, pr_ids1.clone()); + // Ensure the leader lease is long enough so the fallback would work. + prepare_read_delegate_with_lease( + store_id, + 1, + term6, + pr_ids1.clone(), + epoch13.clone(), + store_meta.clone(), + Duration::seconds(10), + ); + let leader1 = prs1[0].clone(); + + // Local read. + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader1); + header.set_region_epoch(epoch13.clone()); + header.set_term(term6); + header.set_flags(header.get_flags() | WriteBatchFlags::STALE_READ.bits()); + cmd.set_header(header.clone()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + // A peer can serve read_ts < safe_ts. + let safe_ts = TimeStamp::compose(2, 0); + { + let mut meta = store_meta.lock().unwrap(); + let delegate = meta.readers.get_mut(&1).unwrap(); + delegate + .read_progress + .update_safe_ts(1, safe_ts.into_inner()); + assert_eq!(delegate.read_progress.safe_ts(), safe_ts.into_inner()); + } + let read_ts_1 = TimeStamp::compose(1, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_1.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task); + snap_rx.recv().unwrap().snapshot.unwrap(); + + // When read_ts > safe_ts, the leader peer could still serve if its lease is + // valid. + let read_ts_2 = TimeStamp::compose(safe_ts.physical() + 201, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_2.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task); + snap_rx.recv().unwrap().snapshot.unwrap(); + + // The fallback would not happen if the lease is not valid. + prepare_read_delegate_with_lease( + store_id, + 1, + term6, + pr_ids1, + epoch13, + store_meta, + Duration::milliseconds(1), + ); + thread::sleep(std::time::Duration::from_millis(50)); + let (snap_tx, snap_rx) = channel(); + let task2 = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task2); + assert!( + snap_rx + .recv() + .unwrap() + .response + .get_header() + .get_error() + .has_data_is_not_ready() + ); + } } diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index f3831bb984b2..00f5c3c778e1 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -5,9 +5,10 @@ use std::{sync::Arc, time::Duration}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use test_raftstore::{ - must_kv_prewrite, must_new_cluster_and_kv_client, must_new_cluster_mul, - try_kv_prewrite_with_impl, + configure_for_lease_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, + must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with_impl, }; +use tikv_util::{config::ReadableDuration, HandyRwLock}; #[test] fn test_batch_get_memory_lock() { @@ -103,3 +104,55 @@ fn test_undetermined_write_err() { // The previous panic hasn't been captured. assert!(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| drop(cluster))).is_err()); } +#[test] +fn test_stale_read_on_local_leader() { + let mut cluster = new_server_cluster(0, 1); + // Increase the election tick to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); + let max_lease = Duration::from_secs(2); + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); + cluster.pd_client.disable_default_operator(); + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(epoch); + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + let v1 = b"value1".to_vec(); + + // Write record. + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 10); + must_kv_commit(&client, ctx.clone(), vec![k.clone()], 10, 30, 30); + + // Prewrite and leave a lock. + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v1); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 50); + + let mut req = GetRequest::default(); + req.set_context(ctx); + req.set_key(k); + req.version = 40; + req.mut_context().set_stale_read(true); + + // The stale read should fallback and succeed on the leader peer. + let resp = client.kv_get(&req).unwrap(); + assert!(resp.error.is_none()); + assert!(resp.region_error.is_none()); + assert_eq!(v, resp.get_value()); +} diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index b7d436d92d71..cb986250d82e 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -288,9 +288,11 @@ fn test_update_resoved_ts_before_apply_index() { sleep_ms(100); // The leader can't handle stale read with `commit_ts2` because its `safe_ts` - // can't update due to its `apply_index` not update + // can't update due to its `apply_index` not update. + // The request would be handled as a snapshot read on the valid leader peer + // after fallback. let resp = leader_client.kv_read(b"key1".to_vec(), commit_ts2); - assert!(resp.get_region_error().has_data_is_not_ready(),); + assert_eq!(resp.get_value(), b"value2"); // The follower can't handle stale read with `commit_ts2` because it don't // have enough data let resp = follower_client2.kv_read(b"key1".to_vec(), commit_ts2); @@ -667,10 +669,10 @@ fn test_stale_read_future_ts_not_update_max_ts() { b"key1".to_vec(), ); - // Perform stale read with a future ts should return error + // Perform stale read with a future ts, the stale read could be processed + // falling back to snapshot read on the leader peer. let read_ts = get_tso(&pd_client) + 10000000; - let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); - assert!(resp.get_region_error().has_data_is_not_ready()); + leader_client.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), read_ts); // The `max_ts` should not updated by the stale read request, so we can prewrite // and commit `async_commit` transaction with a ts that smaller than the @@ -687,10 +689,10 @@ fn test_stale_read_future_ts_not_update_max_ts() { leader_client.must_kv_commit(vec![b"key2".to_vec()], prewrite_ts, commit_ts); leader_client.must_kv_read_equal(b"key2".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); - // Perform stale read with a future ts should return error + // Perform stale read with a future ts, the stale read could be processed + // falling back to snapshot read on the leader peer. let read_ts = get_tso(&pd_client) + 10000000; - let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); - assert!(resp.get_region_error().has_data_is_not_ready()); + leader_client.must_kv_read_equal(b"key2".to_vec(), b"value1".to_vec(), read_ts); // The `max_ts` should not updated by the stale read request, so 1pc transaction // with a ts that smaller than the `read_ts` should not be fallbacked to 2pc From 9fb1ce63a079cd486f0fc4661ff28abb76d0e734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:18:29 +0800 Subject: [PATCH 091/203] snap_restore: Abort last recover region (#15685) close tikv/tikv#15684 This PR will make `recover_region` return `ABORTED` once there are new `recover_region` RPCs in. Signed-off-by: hillium Co-authored-by: qupeng --- Cargo.toml | 2 +- components/raftstore/src/store/transport.rs | 16 +- components/snap_recovery/Cargo.toml | 7 + components/snap_recovery/src/leader_keeper.rs | 16 +- components/snap_recovery/src/services.rs | 147 ++++++++++++++++-- 5 files changed, 165 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4d8cefa9fa46..bd2b49469509 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -334,7 +334,7 @@ resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } server = { path = "components/server" } service = { path = "components/service" } -snap_recovery = { path = "components/snap_recovery" } +snap_recovery = { path = "components/snap_recovery", default-features = false } sst_importer = { path = "components/sst_importer", default-features = false } test_backup = { path = "components/test_backup" } test_coprocessor = { path = "components/test_coprocessor", default-features = false } diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index 7f10e7cd2499..2ca19fbe5fe3 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::sync::mpsc; +use std::sync::{mpsc, Mutex}; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, Snapshot}; @@ -46,6 +46,13 @@ where fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()>; } +impl<'a, T: SignificantRouter, EK: KvEngine> SignificantRouter for &'a Mutex { + #[inline] + fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { + Mutex::lock(self).unwrap().significant_send(region_id, msg) + } +} + /// Routes proposal to target region. pub trait ProposalRouter where @@ -79,6 +86,13 @@ where } } +impl<'a, EK: KvEngine, T: CasualRouter> CasualRouter for &'a Mutex { + #[inline] + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::send(&*Mutex::lock(self).unwrap(), region_id, msg) + } +} + impl SignificantRouter for RaftRouter where EK: KvEngine, diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 8b0b0ec4c3a9..23cbdcfe0982 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -5,6 +5,13 @@ edition = "2021" publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] +test-engine-kv-rocksdb = ["tikv/test-engine-kv-rocksdb"] +test-engine-raft-raft-engine = ["tikv/test-engine-raft-raft-engine"] +test-engines-rocksdb = ["tikv/test-engines-rocksdb"] +test-engines-panic = ["tikv/test-engines-panic"] + [dependencies] chrono = "0.4" encryption = { workspace = true } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 417d5becca31..ca2623c82ca3 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -9,18 +9,17 @@ use std::{ use engine_traits::KvEngine; use futures::compat::Future01CompatExt; -use itertools::Itertools; use raftstore::{ errors::{Error, Result}, store::{Callback, CasualMessage, CasualRouter, SignificantMsg, SignificantRouter}, }; use tikv_util::{future::paired_future_callback, timer::GLOBAL_TIMER_HANDLE}; -pub struct LeaderKeeper { +pub struct LeaderKeeper<'a, EK, Router: 'a> { router: Router, not_leader: HashSet, - _ek: PhantomData, + _ek: PhantomData<&'a EK>, } #[derive(Default)] @@ -51,10 +50,10 @@ impl std::fmt::Debug for StepResult { } } -impl LeaderKeeper +impl<'a, EK, Router> LeaderKeeper<'a, EK, Router> where EK: KvEngine, - Router: CasualRouter + SignificantRouter + 'static, + Router: CasualRouter + SignificantRouter + 'a, { pub fn new(router: Router, to_keep: impl IntoIterator) -> Self { Self { @@ -85,8 +84,9 @@ where const CONCURRENCY: usize = 256; let r = Mutex::new(StepResult::default()); let success = Mutex::new(HashSet::new()); - for batch in &self.not_leader.iter().chunks(CONCURRENCY) { - let tasks = batch.map(|region_id| async { + let regions = self.not_leader.iter().copied().collect::>(); + for batch in regions.as_slice().chunks(CONCURRENCY) { + let tasks = batch.iter().map(|region_id| async { match self.check_leader(*region_id).await { Ok(_) => { success.lock().unwrap().insert(*region_id); @@ -150,7 +150,7 @@ mod test { leaders: RefCell>, } - impl LeaderKeeper { + impl<'a, EK, Router> LeaderKeeper<'a, EK, Router> { fn mut_router(&mut self) -> &mut Router { &mut self.router } diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index 10f82d64917d..daf6e7ed30ff 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -2,8 +2,14 @@ use std::{ error::Error as StdError, + fmt::Display, + future::Future, result, - sync::mpsc::{sync_channel, SyncSender}, + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc::{sync_channel, SyncSender}, + Arc, Mutex, + }, thread::Builder, time::Instant, }; @@ -17,10 +23,12 @@ use engine_traits::{CfNamesExt, CfOptionsExt, Engines, Peekable, RaftEngine}; use futures::{ channel::mpsc, executor::{ThreadPool, ThreadPoolBuilder}, + stream::{AbortHandle, Aborted}, FutureExt, SinkExt, StreamExt, }; use grpcio::{ - ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, + ClientStreamingSink, RequestStream, RpcContext, RpcStatus, RpcStatusCode, ServerStreamingSink, + UnarySink, WriteFlags, }; use kvproto::{raft_serverpb::StoreIdent, recoverdatapb::*}; use raftstore::{ @@ -65,6 +73,44 @@ pub struct RecoveryService { engines: Engines, router: RaftRouter, threads: ThreadPool, + + /// The handle to last call of recover region RPC. + /// + /// We need to make sure the execution of keeping leader exits before next + /// `RecoverRegion` rpc gets in. Or the previous call may stuck at keep + /// leader forever, once the second caller request the leader to be at + /// another store. + // NOTE: Perhaps it would be better to abort the procedure as soon as the client + // stream has been closed, but yet it seems there isn't such hook like + // `on_client_go` for us, and the current implementation only start + // work AFTER the client closes their sender part(!) + last_recovery_region_rpc: Arc>>, +} + +struct RecoverRegionState { + start_at: Instant, + finished: Arc, + abort: AbortHandle, +} + +impl RecoverRegionState { + /// Create the state by wrapping a execution of recover region. + fn wrap_task, T>( + task: F, + ) -> (Self, impl Future>) { + let finished = Arc::new(AtomicBool::new(false)); + let (cancelable_task, abort) = futures::future::abortable(task); + let state = Self { + start_at: Instant::now(), + finished: Arc::clone(&finished), + abort, + }; + (state, async move { + let res = cancelable_task.await; + finished.store(true, Ordering::SeqCst); + res + }) + } } impl RecoveryService { @@ -99,6 +145,7 @@ impl RecoveryService { engines, router, threads, + last_recovery_region_rpc: Arc::default(), } } @@ -140,6 +187,34 @@ impl RecoveryService { Ok(store_id) } + fn abort_last_recover_region(&self, place: impl Display) { + let mut last_state_lock = self.last_recovery_region_rpc.lock().unwrap(); + Self::abort_last_recover_region_of(place, &mut last_state_lock) + } + + fn replace_last_recover_region(&self, place: impl Display, new_state: RecoverRegionState) { + let mut last_state_lock = self.last_recovery_region_rpc.lock().unwrap(); + Self::abort_last_recover_region_of(place, &mut last_state_lock); + *last_state_lock = Some(new_state); + } + + fn abort_last_recover_region_of( + place: impl Display, + last_state_lock: &mut Option, + ) { + if let Some(last_state) = last_state_lock.take() { + info!("Another task enter, checking last task."; + "finished" => ?last_state.finished, + "start_before" => ?last_state.start_at.elapsed(), + "abort_by" => %place, + ); + if !last_state.finished.load(Ordering::SeqCst) { + last_state.abort.abort(); + warn!("Last task not finished, aborting it."); + } + } + } + // a new wait apply syncer share with all regions, // when all region reached the target index, share reference decreased to 0, // trigger closure to send finish info back. @@ -190,7 +265,7 @@ impl RecoverData for RecoveryService { // 1. br start to ready region meta fn read_region_meta( &mut self, - _ctx: RpcContext<'_>, + ctx: RpcContext<'_>, _req: ReadRegionMetaRequest, mut sink: ServerStreamingSink, ) { @@ -215,6 +290,11 @@ impl RecoverData for RecoveryService { } }); + // Hacking: Sometimes, the client may omit the RPC call to `recover_region` if + // no leader should be register to some (unfortunate) store. So we abort + // last recover region here too, anyway this RPC implies a consequent + // `recover_region` for now. + self.abort_last_recover_region(format_args!("read_region_meta by {}", ctx.peer())); self.threads.spawn_ok(send_task); } @@ -222,11 +302,11 @@ impl RecoverData for RecoveryService { // assign region leader and wait leader apply to last log fn recover_region( &mut self, - _ctx: RpcContext<'_>, + ctx: RpcContext<'_>, mut stream: RequestStream, sink: ClientStreamingSink, ) { - let raft_router = self.router.clone(); + let mut raft_router = Mutex::new(self.router.clone()); let store_id = self.get_store_id(); info!("start to recover the region"); let task = async move { @@ -241,17 +321,15 @@ impl RecoverData for RecoveryService { } } - let mut lk = LeaderKeeper::new(raft_router.clone(), leaders.clone()); + let mut lk = LeaderKeeper::new(&raft_router, leaders.clone()); // We must use the tokio runtime here because there isn't a `block_in_place` // like thing in the futures executor. It simply panics when block // on the block_on context. // It is also impossible to directly `await` here, because that will make // borrowing to the raft router crosses the await point. - tokio::runtime::Builder::new_current_thread() - .build() - .expect("failed to build temporary tokio runtime.") - .block_on(lk.elect_and_wait_all_ready()); + lk.elect_and_wait_all_ready().await; info!("all region leader assigned done"; "count" => %leaders.len()); + drop(lk); let now = Instant::now(); // wait apply to the last log @@ -260,7 +338,7 @@ impl RecoverData for RecoveryService { let (tx, rx) = sync_channel(1); REGION_EVENT_COUNTER.start_wait_leader_apply.inc(); let wait_apply = SnapshotRecoveryWaitApplySyncer::new(region_id, tx.clone()); - if let Err(e) = raft_router.significant_send( + if let Err(e) = raft_router.get_mut().unwrap().significant_send( region_id, SignificantMsg::SnapshotRecoveryWaitApply(wait_apply.clone()), ) { @@ -277,6 +355,10 @@ impl RecoverData for RecoveryService { for (rid, rx) in leaders.iter().zip(rx_apply) { if let Some(rx) = rx { CURRENT_WAIT_APPLY_LEADER.set(*rid as _); + // FIXME: we cannot the former RPC when we get stuck at here. + // Perhaps we need to make `SnapshotRecoveryWaitApplySyncer` be able to support + // asynchronous channels. But for now, waiting seems won't cause live lock, so + // we are keeping it unchanged. match rx.recv() { Ok(region_id) => { debug!("leader apply to last log"; "region_id" => region_id); @@ -301,10 +383,20 @@ impl RecoverData for RecoveryService { Err(e) => error!("failed to get store id"; "error" => ?e), }; - let _ = sink.success(resp).await; + resp }; - self.threads.spawn_ok(task); + let (state, task) = RecoverRegionState::wrap_task(task); + self.replace_last_recover_region(format!("recover_region by {}", ctx.peer()), state); + self.threads.spawn_ok(async move { + let res = match task.await { + Ok(resp) => sink.success(resp), + Err(Aborted) => sink.fail(RpcStatus::new(RpcStatusCode::ABORTED)), + }; + if let Err(err) = res.await { + warn!("failed to response recover region rpc"; "err" => %err); + } + }); } // 3. ensure all region peer/follower apply to last @@ -381,3 +473,32 @@ impl RecoverData for RecoveryService { self.threads.spawn_ok(send_task); } } + +#[cfg(test)] +mod test { + use std::{sync::atomic::Ordering, time::Duration}; + + use futures::never::Never; + + use super::RecoverRegionState; + + #[test] + fn test_state() { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_time() + .build() + .unwrap(); + let (state, task) = RecoverRegionState::wrap_task(futures::future::pending::()); + let hnd = rt.spawn(task); + state.abort.abort(); + rt.block_on(async { tokio::time::timeout(Duration::from_secs(10), hnd).await }) + .unwrap() + .unwrap() + .unwrap_err(); + + let (state, task) = RecoverRegionState::wrap_task(futures::future::ready(42)); + assert_eq!(state.finished.load(Ordering::SeqCst), false); + assert_eq!(rt.block_on(task), Ok(42)); + assert_eq!(state.finished.load(Ordering::SeqCst), true); + } +} From 8c7d9e3b7d71b012fdf2a7e50423b61af1bf6092 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 16 Oct 2023 21:00:29 +0800 Subject: [PATCH 092/203] config: adjust rocksdb background compaction threads (#15769) ref tikv/tikv#14470 Signed-off-by: glorv Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 74f25a22ef65..d1fb1e4f8d8f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -264,7 +264,7 @@ fn get_background_job_limits_impl( // v2: decrease the compaction threads to make the qps more stable. let max_compactions = match engine_type { EngineType::RaftKv => max_background_jobs - max_background_flushes, - EngineType::RaftKv2 => (max_background_jobs + 7) / 8, + EngineType::RaftKv2 => (max_background_jobs + 3) / 4, }; let max_sub_compactions: u32 = (max_compactions - 1).clamp(1, defaults.max_sub_compactions); max_background_jobs = max_background_flushes + max_compactions; @@ -6052,7 +6052,7 @@ mod tests { &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 3, + max_background_jobs: 4, max_background_flushes: 2, max_sub_compactions: 1, max_titan_background_gc: 4, @@ -6082,9 +6082,9 @@ mod tests { &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 5, + max_background_jobs: 6, max_background_flushes: 3, - max_sub_compactions: 1, + max_sub_compactions: 2, max_titan_background_gc: 4, } ); From d8756403ef730142d7eb5b3b79567b1576d5ed50 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 16 Oct 2023 12:56:00 -0500 Subject: [PATCH 093/203] import: write RPC will check region epoch before continue (#15013) close tikv/tikv#15003 Signed-off-by: lance6716 Co-authored-by: tonyxuqqi --- Makefile | 8 + .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 87 ++------ components/raftstore/src/store/msg.rs | 6 - .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +----------- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/sst_importer/src/import_file.rs | 49 +++-- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 8 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/import/sst_service.rs | 185 +++++++++++++++++- 14 files changed, 271 insertions(+), 228 deletions(-) diff --git a/Makefile b/Makefile index bb1d7316e1b6..ce8d4e8b793d 100644 --- a/Makefile +++ b/Makefile @@ -406,6 +406,14 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test +docker_shell: + docker build -f Dockerfile.test \ + -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + . + docker run -it -v $(shell pwd):/tikv \ + ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + /bin/bash + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index e963434fe837..3d39c9a73697 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,6 +43,11 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); + // filter old version SSTs + let ssts: Vec<_> = ssts + .into_iter() + .filter(|sst| sst.api_version >= sst_importer::API_VERSION_2) + .collect(); if ssts.is_empty() { return Ok(()); } @@ -50,9 +55,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.get_region_id()) + .entry(sst.meta.get_region_id()) .or_default() - .push(sst); + .push(sst.meta); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2434dfdd8e63..33010a993a24 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,14 +36,13 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ - import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; +use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -810,9 +809,6 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), - StoreMsg::ValidateSstResult { invalid_ssts } => { - self.on_validate_sst_result(invalid_ssts) - } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1652,12 +1648,7 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new( - meta.get_id(), - self.router.clone(), - Arc::clone(&importer), - Arc::clone(&pd_client), - ); + let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2755,44 +2746,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { - fn on_validate_sst_result(&mut self, ssts: Vec) { - if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { - return; - } - // A stale peer can still ingest a stale Sst before it is - // destroyed. We need to make sure that no stale peer exists. - let mut delete_ssts = Vec::new(); - { - let meta = self.ctx.store_meta.lock().unwrap(); - for sst in ssts { - if !meta.regions.contains_key(&sst.get_region_id()) { - delete_ssts.push(sst); - } - } - } - if delete_ssts.is_empty() { - return; - } - - let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to delete ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); - let mut validate_ssts = Vec::new(); - let import_size = box_try!(self.ctx.importer.get_total_size()); - STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { @@ -2801,15 +2756,22 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if let Some(r) = meta.regions.get(&sst.get_region_id()) { + if sst.api_version < sst_importer::API_VERSION_2 { + // SST of old versions are created by old TiKV and have different prerequisite + // we can't delete them here. They can only be deleted manually + continue; + } + if let Some(r) = meta.regions.get(&sst.meta.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.meta.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst); + delete_ssts.push(sst.meta); } } else { - // If the peer doesn't exist, we need to validate the SST through PD. - validate_ssts.push(sst); + // The write RPC of import sst service have make sure the region do exist at the + // write time, and now the region is not found, sst can be + // deleted because it won't be used by ingest in future. + delete_ssts.push(sst.meta); } } } @@ -2829,27 +2791,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } - // When there is an import job running, the region which this sst belongs may - // has not been split from the origin region because the apply thread is so busy - // that it can not apply SplitRequest as soon as possible. So we can not - // delete this sst file. - if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { - let task = CleanupSstTask::ValidateSst { - ssts: validate_ssts, - }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to validate ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 64c5be6d7e15..f7bf7f6d2973 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,7 +10,6 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, - import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -824,10 +823,6 @@ where { RaftMessage(InspectedRaftMessage), - ValidateSstResult { - invalid_ssts: Vec, - }, - // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -884,7 +879,6 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), - StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 632e85f40cc3..726b7abe5ceb 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,7 +3,6 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; -use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -11,7 +10,6 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; -use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -29,29 +27,26 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, - S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -60,12 +55,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 8174b872f4b7..44f188e6f8fb 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,62 +1,30 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; +use std::{fmt, sync::Arc}; -use engine_traits::KvEngine; -use kvproto::{import_sstpb::SstMeta, metapb::Region}; -use pd_client::PdClient; +use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; -use tikv_util::{error, worker::Runnable}; - -use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; - -type Result = std::result::Result>; +use tikv_util::worker::Runnable; pub enum Task { DeleteSst { ssts: Vec }, - ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), - Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner -where - EK: KvEngine, - S: StoreRouter, -{ - store_id: u64, - store_router: S, +pub struct Runner { importer: Arc, - pd_client: Arc, - _engine: PhantomData, } -impl Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ - pub fn new( - store_id: u64, - store_router: S, - importer: Arc, - pd_client: Arc, - ) -> Runner { - Runner { - store_id, - store_router, - importer, - pd_client, - _engine: PhantomData, - } +impl Runner { + pub fn new(importer: Arc) -> Runner { + Runner { importer } } /// Deletes SST files from the importer. @@ -65,78 +33,9 @@ where let _ = self.importer.delete(sst); } } - - fn get_region_by_meta(&self, sst: &SstMeta) -> Result { - // The SST meta has been delivered with a range, use it directly. - // For now, no case will reach this. But this still could be a guard for - // reducing the superise in the future... - if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { - return self - .pd_client - .get_region(sst.get_range().get_start()) - .map_err(Into::into); - } - // Once there isn't range provided. - let query_by_start_key_of_full_meta = || { - let start_key = self - .importer - .load_start_key_by_meta::(sst)? - .ok_or_else(|| -> Box { - "failed to load start key from sst, the sst might be empty".into() - })?; - let region = self.pd_client.get_region(&start_key)?; - Result::Ok(region) - }; - query_by_start_key_of_full_meta() - .map_err(|err| - format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() - ) - } - - /// Validates whether the SST is stale or not. - fn handle_validate_sst(&self, ssts: Vec) { - let store_id = self.store_id; - let mut invalid_ssts = Vec::new(); - for sst in ssts { - match self.get_region_by_meta(&sst) { - Ok(r) => { - // The region id may or may not be the same as the - // SST file, but it doesn't matter, because the - // epoch of a range will not decrease anyway. - if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { - // Region has not been updated. - continue; - } - if r.get_id() == sst.get_region_id() - && r.get_peers().iter().any(|p| p.get_store_id() == store_id) - { - // The SST still belongs to this store. - continue; - } - invalid_ssts.push(sst); - } - Err(e) => { - error!("get region failed"; "err" => %e); - } - } - } - - // We need to send back the result to check for the stale - // peer, which may ingest the stale SST before it is - // destroyed. - let msg = StoreMsg::ValidateSstResult { invalid_ssts }; - if let Err(e) = self.store_router.send(msg) { - error!(%e; "send validate sst result failed"); - } - } } -impl Runnable for Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { @@ -144,9 +43,6 @@ where Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } - Task::ValidateSst { ssts } => { - self.handle_validate_sst(ssts); - } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8d44890e5a63..a4b6276a5878 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,6 +366,7 @@ where router.clone(), config.coprocessor.clone(), )); + let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1080,6 +1081,7 @@ where servers.importer.clone(), None, self.resource_manager.clone(), + Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 2593035618da..65d02f58c088 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,6 +948,7 @@ where backup_worker.start(backup_endpoint); // Import SST service. + let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -956,6 +957,7 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), + Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b270d26a4111..ae81cf016462 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -440,7 +440,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -458,20 +458,33 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; - +// version 2: compared to version 1 which is the default version, we will check +// epoch of request and local region in write API. +pub const API_VERSION_2: i32 = 2; + +/// sst_meta_to_path will encode the filepath with default api version (current +/// is 2). So when the SstMeta is created in old version of TiKV and filepath +/// will not correspond to the real file, in the deletion logic we can't remove +/// these files. pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), + API_VERSION_2, SST_SUFFIX, ))) } -pub fn parse_meta_from_path>(path: P) -> Result { +pub struct SstMetaWithApiVersion { + pub meta: SstMeta, + pub api_version: i32, // in future we may move api_version into SstMeta +} + +pub fn parse_meta_from_path>(path: P) -> Result { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -500,7 +513,11 @@ pub fn parse_meta_from_path>(path: P) -> Result { // cf_name to path. meta.set_cf_name(elems[4].to_owned()); } - Ok(meta) + let mut api_version = 1; + if elems.len() > 5 { + api_version = elems[5].parse()?; + } + Ok(SstMetaWithApiVersion { meta, api_version }) } #[cfg(test)] @@ -520,11 +537,12 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default.sst", uuid); + let expected_path = format!("{}_1_2_3_default_2.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.meta); + assert_eq!(2, meta_with_ver.api_version); } #[test] @@ -543,8 +561,9 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.meta); + assert_eq!(1, meta_with_ver.api_version); } #[cfg(feature = "test-engines-rocksdb")] @@ -596,14 +615,20 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta| { + ssts.iter_mut().for_each(|meta_with_ver| { + let meta = &mut meta_with_ver.meta; let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!(ssts, vec![meta]); + assert_eq!( + ssts.iter() + .map(|meta_with_ver| { meta_with_ver.meta.clone() }) + .collect(), + vec![meta] + ); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 0cfc3bab774c..ff137005b09b 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::sst_meta_to_path, + import_file::{sst_meta_to_path, API_VERSION_2}, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5530862e6a39..f36016eb3097 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -51,7 +51,7 @@ use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ caching::cache_map::{CacheMap, ShareOwned}, - import_file::{ImportDir, ImportFile}, + import_file::{ImportDir, ImportFile, SstMetaWithApiVersion}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, import_mode2::{HashRange, ImportModeSwitcherV2}, metrics::*, @@ -1387,7 +1387,7 @@ impl SstImporter { /// List the basic information of the current SST files. /// The information contains UUID, region ID, region Epoch. /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1587,9 +1587,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.get_uuid()) + .find(|s| s.get_uuid() == sst.meta.get_uuid()) .unwrap(); - dir.delete(sst, key_manager.as_deref()).unwrap(); + dir.delete(&sst.meta, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 299e93eb7461..5073304e17a6 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,6 +561,7 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0002f36d647e..f5c64fa86e91 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,6 +451,7 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 68403e226f8b..6f9f22c9cb45 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -15,6 +15,7 @@ use std::{ use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; +use futures_executor::block_on; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -27,7 +28,9 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, + metapb::RegionEpoch, }; +use raftstore::{coprocessor::RegionInfoProvider, store::util::is_epoch_stale, RegionInfoAccessor}; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ @@ -39,7 +42,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::create_stream_with_buffer, + future::{create_stream_with_buffer, paired_future_callback}, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -124,6 +127,7 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, + region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -318,6 +322,7 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, + region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -365,6 +370,7 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, + region_info_accessor, writer, store_meta, resource_manager, @@ -675,6 +681,59 @@ impl ImportSstService { } } +fn check_local_region_stale( + region_id: u64, + epoch: &RegionEpoch, + region_info_accessor: Arc, +) -> Result<()> { + let (cb, f) = paired_future_callback(); + region_info_accessor + .find_region_by_id(region_id, cb) + .map_err(|e| { + Error::Engine(format!("failed to find region {} err {:?}", region_id, e).into()) + })?; + match block_on(f)? { + Some(local_region_info) => { + let local_region_epoch = local_region_info.region.region_epoch.unwrap(); + + // TODO(lance6717): we should only need to check conf_ver because we require all + // peers have SST on the disk, and does not care about which one is + // leader. But since check_sst_for_ingestion also checks epoch version, + // we just keep it here for now. + + // when local region epoch is stale, client can retry write later + if is_epoch_stale(&local_region_epoch, epoch) { + return Err(Error::Engine( + format!("request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", + region_id, local_region_epoch, epoch).into(), + )); + } + // when local region epoch is ahead, client need to rescan region from PD to get + // latest region later + if is_epoch_stale(epoch, &local_region_epoch) { + return Err(Error::Engine( + format!("request region {} is staler than local region, local epoch {:?}, request epoch {:?}, please rescan region later", + region_id, local_region_epoch, epoch).into(), + )); + } + + // not match means to rescan + Ok(()) + } + None => { + // when region not found, we can't tell whether it's stale or ahead, so we just + // return the safest case + Err(Error::Engine( + format!( + "region {} is not found, please rescan region later", + region_id + ) + .into(), + )) + } + } +} + #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -686,6 +745,7 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); + let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -713,7 +773,15 @@ macro_rules! impl_write { } _ => return Err(Error::InvalidChunk), }; + // wait the region epoch on this TiKV to catch up with the epoch + // in request, which comes from PD and represents the majority + // peers' status. let region_id = meta.get_region_id(); + check_local_region_stale( + region_id, + meta.get_region_epoch(), + region_info_accessor, + )?; let tablet = match tablets.get(region_id) { Some(t) => t, None => { @@ -1387,19 +1455,30 @@ fn write_needs_restore(write: &[u8]) -> bool { #[cfg(test)] mod test { - use std::collections::HashMap; + use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + }; use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::RegionEpoch, + metapb::{Region, RegionEpoch}, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::Message; + use protobuf::{Message, SingularPtrField}; + use raft::StateRole::Follower; + use raftstore::{ + coprocessor::{region_info_accessor::Callback, RegionInfoProvider}, + RegionInfo, + }; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{import::sst_service::RequestCollector, server::raftkv}; + use crate::{ + import::sst_service::{check_local_region_stale, RequestCollector}, + server::raftkv, + }; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1683,4 +1762,100 @@ mod test { } assert_eq!(total, 100); } + + #[test] + fn test_write_rpc_check_region_epoch() { + struct MockRegionInfoProvider { + map: Mutex>, + } + impl RegionInfoProvider for MockRegionInfoProvider { + fn find_region_by_id( + &self, + region_id: u64, + callback: Callback>, + ) -> Result<(), raftstore::coprocessor::Error> { + callback(self.map.lock().unwrap().get(®ion_id).cloned()); + Ok(()) + } + } + + let mock_provider = Arc::new(MockRegionInfoProvider { + map: Mutex::new(HashMap::new()), + }); + + let mut req_epoch = RegionEpoch { + conf_ver: 10, + version: 10, + ..Default::default() + }; + // test for region not found + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + let mut local_region_info = RegionInfo { + region: Region { + id: 1, + region_epoch: SingularPtrField::some(req_epoch.clone()), + ..Default::default() + }, + role: Follower, + buckets: 1, + }; + mock_provider + .map + .lock() + .unwrap() + .insert(1, local_region_info.clone()); + // test the local region epoch is same as request + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + result.unwrap(); + + // test the local region epoch is ahead of request + local_region_info + .region + .region_epoch + .as_mut() + .unwrap() + .conf_ver = 11; + mock_provider + .map + .lock() + .unwrap() + .insert(1, local_region_info.clone()); + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + req_epoch.conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + result.unwrap(); + + // test the local region epoch is staler than request + req_epoch.version = 12; + let result = check_local_region_stale(1, &req_epoch, mock_provider); + assert!(result.is_err()); + // check error message contains "retry write later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("retry write later") + ); + } } From 6e826308b9ca246ee5572bcdd24e6b26fd19c156 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 16 Oct 2023 12:28:57 -0700 Subject: [PATCH 094/203] add more metrics for slow commit log diagnostics (#15716) ref tikv/tikv#15175 Add more metrics for slow commit log duration investigation. In this PR, it adds raft message process wait duration and exposes raft message recv by store counter. Together with raft-engine write duration, we can further narrow reason of the commit log duration. With this PR, we still cannot tell if the slowness comes from network or raft-client's (grpc client). Signed-off-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/peer.rs | 6 +- components/raftstore-v2/src/operation/life.rs | 6 +- .../raftstore-v2/src/operation/ready/mod.rs | 10 +- components/raftstore-v2/src/router/message.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 15 +- components/raftstore/src/store/fsm/store.rs | 24 ++- .../raftstore/src/store/local_metrics.rs | 5 + components/raftstore/src/store/metrics.rs | 7 + components/raftstore/src/store/msg.rs | 4 +- metrics/grafana/tikv_details.json | 194 +++++++++++++++++- src/server/server.rs | 11 +- tests/failpoints/cases/test_merge.rs | 5 +- 13 files changed, 261 insertions(+), 36 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 5ed84c709371..23e419140123 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -990,16 +990,16 @@ impl StoreRouter { msg: Box, ) -> std::result::Result<(), TrySendError>> { let id = msg.get_region_id(); - let peer_msg = PeerMsg::RaftMessage(msg); + let peer_msg = PeerMsg::RaftMessage(msg, Some(TiInstant::now())); let store_msg = match self.router.try_send(id, peer_msg) { Either::Left(Ok(())) => return Ok(()), - Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m)))) => { + Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m, _)))) => { return Err(TrySendError::Full(m)); } - Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m)))) => { + Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m, _)))) => { return Err(TrySendError::Disconnected(m)); } - Either::Right(PeerMsg::RaftMessage(m)) => StoreMsg::RaftMessage(m), + Either::Right(PeerMsg::RaftMessage(m, _)) => StoreMsg::RaftMessage(m), _ => unreachable!(), }; match self.router.send_control(store_msg) { diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 94506a8a19f3..47a1aee1ef4f 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -247,8 +247,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec) { for msg in peer_msgs_buf.drain(..) { match msg { - PeerMsg::RaftMessage(msg) => { - self.fsm.peer.on_raft_message(self.store_ctx, msg); + PeerMsg::RaftMessage(msg, send_time) => { + self.fsm + .peer + .on_raft_message(self.store_ctx, msg, send_time); } PeerMsg::RaftQuery(cmd) => { self.on_receive_command(cmd.send_time, cmd.ch.read_tracker()); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 5828a7bb661b..00df317f73a8 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -416,8 +416,8 @@ impl Store { ); let region_id = msg.get_region_id(); // The message can be sent when the peer is being created, so try send it first. - let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = - ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) + let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m, _))) = + ctx.router.send(region_id, PeerMsg::RaftMessage(msg, None)) { m } else { @@ -562,7 +562,7 @@ impl Store { if from_peer.id != raft::INVALID_ID { // For now the peer only exists in memory. It will persist its states when // handling its first readiness. - let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)); + let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg, None)); } true } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3ceb8693c0bc..a2697f29f027 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -55,7 +55,7 @@ use tikv_util::{ slog_panic, store::find_peer, sys::disk::DiskUsage, - time::{duration_to_sec, monotonic_raw_now, Duration}, + time::{duration_to_sec, monotonic_raw_now, Duration, Instant as TiInstant}, }; pub use self::{ @@ -259,6 +259,7 @@ impl Peer { &mut self, ctx: &mut StoreContext, mut msg: Box, + send_time: Option, ) { debug!( self.logger, @@ -268,6 +269,13 @@ impl Peer { "to_peer_id" => msg.get_to_peer().get_id(), "disk_usage" => ?msg.disk_usage, ); + if let Some(send_time) = send_time { + let process_wait_time = send_time.saturating_elapsed(); + ctx.raft_metrics + .process_wait_time + .observe(duration_to_sec(process_wait_time)); + } + if self.pause_for_replay() && msg.get_message().get_msg_type() == MessageType::MsgAppend { ctx.raft_metrics.message_dropped.recovery.inc(); return; diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index c9da5241fa88..59d1edd81980 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -157,7 +157,7 @@ pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. - RaftMessage(Box), + RaftMessage(Box, Option), /// Query won't change any state. A typical query is KV read. In most cases, /// it will be processed using lease or read index. RaftQuery(RaftRequest), diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 584db92e8be1..7504f746abef 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -206,7 +206,7 @@ where let callback = match msg { PeerMsg::RaftCommand(cmd) => cmd.callback, PeerMsg::CasualMessage(CasualMessage::SplitRegion { callback, .. }) => callback, - PeerMsg::RaftMessage(im) => { + PeerMsg::RaftMessage(im, _) => { raft_messages_size += im.heap_size; continue; } @@ -617,10 +617,16 @@ where let count = msgs.len(); for m in msgs.drain(..) { match m { - PeerMsg::RaftMessage(msg) => { + PeerMsg::RaftMessage(msg, sent_time) => { + if let Some(sent_time) = sent_time { + let wait_time = sent_time.saturating_elapsed().as_secs_f64(); + self.ctx.raft_metrics.process_wait_time.observe(wait_time); + } + if !self.ctx.coprocessor_host.on_raft_message(&msg.msg) { continue; } + if let Err(e) = self.on_raft_message(msg) { error!(%e; "handle raft message err"; @@ -4298,7 +4304,10 @@ where .pending_msgs .swap_remove_front(|m| m.get_to_peer() == &meta_peer) { - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size: 0, msg }); + let peer_msg = PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size: 0, msg }, + Some(TiInstant::now()), + ); if let Err(e) = self.ctx.router.force_send(new_region_id, peer_msg) { warn!("handle first requset failed"; "region_id" => region_id, "error" => ?e); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 33010a993a24..3a22ef8434d2 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -382,7 +382,10 @@ where for e in msg.get_message().get_entries() { heap_size += bytes_capacity(&e.data) + bytes_capacity(&e.context); } - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size, msg }, + Some(TiInstant::now()), + ); let event = TraceEvent::Add(heap_size); let send_failed = Cell::new(true); @@ -397,13 +400,13 @@ where send_failed.set(false); return Ok(()); } - Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(im)))) => { + Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(im, _)))) => { return Err(TrySendError::Full(im.msg)); } - Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im)))) => { + Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im, _)))) => { return Err(TrySendError::Disconnected(im.msg)); } - Either::Right(PeerMsg::RaftMessage(im)) => StoreMsg::RaftMessage(im), + Either::Right(PeerMsg::RaftMessage(im, _)) => StoreMsg::RaftMessage(im), _ => unreachable!(), }; match self.send_control(store_msg) { @@ -2067,14 +2070,18 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER }); let region_id = msg.msg.get_region_id(); - let msg = match self.ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) { + let msg = match self + .ctx + .router + .send(region_id, PeerMsg::RaftMessage(msg, None)) + { Ok(()) => { forwarded.set(true); return Ok(()); } Err(TrySendError::Full(_)) => return Ok(()), Err(TrySendError::Disconnected(_)) if self.ctx.router.is_shutdown() => return Ok(()), - Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im))) => im.msg, + Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im, None))) => im.msg, Err(_) => unreachable!(), }; @@ -2146,7 +2153,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER check_msg_status == CheckMsgStatus::NewPeerFirst, )? { // Peer created, send the message again. - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = + PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }, None); if self.ctx.router.send(region_id, peer_msg).is_ok() { forwarded.set(true); } @@ -2169,7 +2177,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER store_meta.pending_msgs.push(msg); } else { drop(store_meta); - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }, None); if let Err(e) = self.ctx.router.force_send(region_id, peer_msg) { warn!("handle first request failed"; "region_id" => region_id, "error" => ?e); } else { diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 5460a57ae0f8..aceacdb81ee6 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -112,7 +112,10 @@ pub struct RaftMetrics { // local histogram pub store_time: LocalHistogram, + // the wait time for processing a raft command pub propose_wait_time: LocalHistogram, + // the wait time for processing a raft message + pub process_wait_time: LocalHistogram, pub process_ready: LocalHistogram, pub event_time: RaftEventDurationVec, pub peer_msg_len: LocalHistogram, @@ -152,6 +155,7 @@ impl RaftMetrics { raft_log_gc_skipped: RaftLogGcSkippedCounterVec::from(&RAFT_LOG_GC_SKIPPED_VEC), store_time: STORE_TIME_HISTOGRAM.local(), propose_wait_time: REQUEST_WAIT_TIME_HISTOGRAM.local(), + process_wait_time: RAFT_MESSAGE_WAIT_TIME_HISTOGRAM.local(), process_ready: PEER_RAFT_PROCESS_DURATION .with_label_values(&["ready"]) .local(), @@ -190,6 +194,7 @@ impl RaftMetrics { self.store_time.flush(); self.propose_wait_time.flush(); + self.process_wait_time.flush(); self.process_ready.flush(); self.event_time.flush(); self.peer_msg_len.flush(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index a5aa164e63ea..a4f2b7820cb0 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -551,6 +551,13 @@ lazy_static! { exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); + pub static ref RAFT_MESSAGE_WAIT_TIME_HISTOGRAM: Histogram = + register_histogram!( + "tikv_raftstore_raft_msg_wait_time_duration_secs", + "Bucketed histogram of raft message wait time duration.", + exponential_buckets(0.00001, 2.0, 26).unwrap() + ).unwrap(); + pub static ref PEER_GC_RAFT_LOG_COUNTER: IntCounter = register_int_counter!( "tikv_raftstore_gc_raft_log_total", diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index f7bf7f6d2973..a92e5169549d 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -740,7 +740,7 @@ pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. - RaftMessage(InspectedRaftMessage), + RaftMessage(InspectedRaftMessage, Option), /// Raft command is the command that is expected to be proposed by the /// leader of the target raft group. If it's failed to be sent, callback /// usually needs to be called before dropping in case of resource leak. @@ -778,7 +778,7 @@ impl ResourceMetered for PeerMsg {} impl fmt::Debug for PeerMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - PeerMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + PeerMsg::RaftMessage(..) => write!(fmt, "Raft Message"), PeerMsg::RaftCommand(_) => write!(fmt, "Raft Command"), PeerMsg::Tick(tick) => write! { fmt, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 57c887820316..f2654ba3da13 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -179,6 +179,14 @@ "interval": "", "legendFormat": "Apply Duration .99", "refId": "E" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "Raft Message Wait .99", + "refId": "F" } ], "thresholds": [], @@ -5819,7 +5827,7 @@ "fillGradient": 0, "gridPos": { "h": 9, - "w": 24, + "w": 12, "x": 0, "y": 37 }, @@ -5908,6 +5916,111 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The count of gRPC raft message", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 37 + }, + "hiddenSeries": false, + "id": 24763573092, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_raftstore_message_recv_by_store{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, store)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{store}}", + "metric": "tikv_raftstore_message_recv_by_store", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "gRPC message count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -13892,7 +14005,7 @@ "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_apply_wait_time_duration_secs_bucket", "refId": "A", "step": 4 } @@ -14070,7 +14183,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_write_handle_msg_duration_secs_bucket", "refId": "A", "step": 4 } @@ -14144,7 +14257,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_write_trigger_wb_bytes_bucket", "refId": "A", "step": 4 } @@ -14333,7 +14446,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "store-{{type}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_perf_context_time_duration_secs_bucket", "refId": "A", "step": 4 }, @@ -14387,6 +14500,77 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each raft message", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 62 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 1977, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "tikv_raftstore_raft_msg_wait_time_duration_secs_bucket", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft message wait duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null } ], "repeat": null, diff --git a/src/server/server.rs b/src/server/server.rs index a886f1232f44..09782be4e16d 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -437,6 +437,7 @@ pub mod test_router { use engine_rocks::{RocksEngine, RocksSnapshot}; use kvproto::raft_serverpb::RaftMessage; use raftstore::{router::RaftStoreRouter, store::*, Result as RaftStoreResult}; + use tikv_util::time::Instant as TiInstant; use super::*; @@ -496,12 +497,10 @@ pub mod test_router { impl RaftStoreRouter for TestRaftStoreRouter { fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { - let _ = self - .tx - .send(Either::Left(PeerMsg::RaftMessage(InspectedRaftMessage { - heap_size: 0, - msg, - }))); + let _ = self.tx.send(Either::Left(PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size: 0, msg }, + Some(TiInstant::now()), + ))); Ok(()) } diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index ffbd69dc05eb..eb15c7e16fad 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1831,7 +1831,10 @@ fn test_concurrent_between_transfer_leader_and_merge() { // Actually, store 1 should not reach the line of propose_commit_merge_1 let _ = rx.recv_timeout(Duration::from_secs(2)); router - .force_send(msg.get_region_id(), PeerMsg::RaftMessage(Box::new(msg))) + .force_send( + msg.get_region_id(), + PeerMsg::RaftMessage(Box::new(msg), None), + ) .unwrap(); // Wait region 1 of node 2 to become leader From 356ae2416bb53b1e104bc82ba536a56fad3fc47c Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 17 Oct 2023 10:52:58 +0800 Subject: [PATCH 095/203] s3: support backup with session token and assume role (#15722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#15781, close pingcap/tidb#39832 Signed-off-by: 3pointer Signed-off-by: 3pointer Co-authored-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> --- components/cloud/aws/src/s3.rs | 85 +++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index a7ea47ec9d27..96031c91f063 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -1,5 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error as StdError, io, time::Duration}; +use std::{ + error::Error as StdError, + io, + time::{Duration, SystemTime}, +}; use async_trait::async_trait; use cloud::{ @@ -16,6 +20,7 @@ pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, S3 as InputConfig}; use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; +use rusoto_sts::{StsAssumeRoleSessionCredentialsProvider, StsClient}; use thiserror::Error; use tikv_util::{debug, stream::error_stream, time::Instant}; use tokio::time::{sleep, timeout}; @@ -29,6 +34,7 @@ pub const STORAGE_VENDOR_NAME_AWS: &str = "aws"; pub struct AccessKeyPair { pub access_key: StringNonEmpty, pub secret_access_key: StringNonEmpty, + pub session_token: Option, } impl std::fmt::Debug for AccessKeyPair { @@ -36,6 +42,7 @@ impl std::fmt::Debug for AccessKeyPair { f.debug_struct("AccessKeyPair") .field("access_key", &self.access_key) .field("secret_access_key", &"?") + .field("session_token", &self.session_token) .finish() } } @@ -51,6 +58,8 @@ pub struct Config { storage_class: Option, multi_part_size: usize, object_lock_enabled: bool, + role_arn: Option, + external_id: Option, } impl Config { @@ -66,6 +75,8 @@ impl Config { storage_class: None, multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: false, + role_arn: None, + external_id: None, } } @@ -78,12 +89,16 @@ impl Config { let access_key_opt = attrs.get("access_key"); let access_key_pair = if let Some(access_key) = access_key_opt { let secret_access_key = attrs.get("secret_access_key").unwrap_or(def).clone(); + let session_token = attrs + .get("session_token") + .and_then(|x| StringNonEmpty::opt(x.to_string())); Some(AccessKeyPair { access_key: StringNonEmpty::required_field(access_key.clone(), "access_key")?, secret_access_key: StringNonEmpty::required_field( secret_access_key, "secret_access_key", )?, + session_token, }) } else { None @@ -99,6 +114,8 @@ impl Config { sse_kms_key_id: StringNonEmpty::opt(attrs.get("sse_kms_key_id").unwrap_or(def).clone()), multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: false, + role_arn: StringNonEmpty::opt(attrs.get("role_arn").unwrap_or(def).clone()), + external_id: StringNonEmpty::opt(attrs.get("external_id").unwrap_or(def).clone()), }) } @@ -114,13 +131,17 @@ impl Config { }; let access_key_pair = match StringNonEmpty::opt(input.access_key) { None => None, - Some(ak) => Some(AccessKeyPair { - access_key: ak, - secret_access_key: StringNonEmpty::required_field( - input.secret_access_key, - "secret_access_key", - )?, - }), + Some(ak) => { + let session_token = StringNonEmpty::opt(input.session_token); + Some(AccessKeyPair { + access_key: ak, + secret_access_key: StringNonEmpty::required_field( + input.secret_access_key, + "secret_access_key", + )?, + session_token, + }) + } }; Ok(Config { storage_class, @@ -132,6 +153,8 @@ impl Config { sse_kms_key_id: StringNonEmpty::opt(input.sse_kms_key_id), multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: input.object_lock_enabled, + role_arn: StringNonEmpty::opt(input.role_arn), + external_id: StringNonEmpty::opt(input.external_id), }) } } @@ -198,20 +221,59 @@ impl S3Storage { Ok(S3Storage { config, client }) } + fn maybe_assume_role( + config: Config, + cred_provider: P, + dispatcher: D, + ) -> io::Result + where + P: ProvideAwsCredentials + Send + Sync + 'static, + D: DispatchSignedRequest + Send + Sync + 'static, + { + if config.role_arn.is_some() { + // try use role arn anyway with current creds when it's not nil. + let bucket_region = none_to_empty(config.bucket.region.clone()); + let bucket_endpoint = config.bucket.endpoint.clone(); + let region = util::get_region(&bucket_region, &none_to_empty(bucket_endpoint))?; + // cannot use the same dispatcher because of move, so use another http client. + let sts = StsClient::new_with(util::new_http_client()?, cred_provider, region); + let duration_since_epoch = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); + let timestamp_secs = duration_since_epoch.as_secs(); + let cred_provider = StsAssumeRoleSessionCredentialsProvider::new( + sts, + String::clone(config.role_arn.as_deref().unwrap()), + format!("{}", timestamp_secs), + config.external_id.as_deref().map(String::clone), + // default duration is 15min + None, + None, + None, + ); + Self::new_creds_dispatcher(config, dispatcher, cred_provider) + } else { + // or just use original cred_provider to access s3. + Self::new_creds_dispatcher(config, dispatcher, cred_provider) + } + } + pub fn with_request_dispatcher(config: Config, dispatcher: D) -> io::Result where D: DispatchSignedRequest + Send + Sync + 'static, { // static credentials are used with minio if let Some(access_key_pair) = &config.access_key_pair { - let cred_provider = StaticProvider::new_minimal( + let cred_provider = StaticProvider::new( (*access_key_pair.access_key).to_owned(), (*access_key_pair.secret_access_key).to_owned(), + access_key_pair.session_token.as_deref().map(String::clone), + None, ); - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + Self::maybe_assume_role(config, cred_provider, dispatcher) } else { let cred_provider = util::CredentialsProvider::new()?; - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + Self::maybe_assume_role(config, cred_provider, dispatcher) } } @@ -637,6 +699,7 @@ mod tests { config.access_key_pair = Some(AccessKeyPair { access_key: StringNonEmpty::required("abc".to_string()).unwrap(), secret_access_key: StringNonEmpty::required("xyz".to_string()).unwrap(), + session_token: Some(StringNonEmpty::required("token".to_string()).unwrap()), }); let mut s = S3Storage::new(config.clone()).unwrap(); // set a less than 5M value not work From f5d269496dba61827fd25dbfeec975b2c3f1af5a Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:33:29 +0800 Subject: [PATCH 096/203] tikv_util: Support customizing evict policy and operations without promoting for LruCache (#15747) ref tikv/tikv#11187 This PR makes the `LruCache` in `tikv_util` to support customizing how to determine an entries in the cache should be evicted. This is part of solving the issue #11187, which needs a `TxnStatusCache`. The `TxnStatusCache` is desinged to use `LruCache` internally, with ability to get or insert items without promoting items to the head (most-recently-used) position. This PR adds `get_no_promote` and `insert_if_not_exist` functions to `LruCache`. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/lru.rs | 198 +++++++++++++++++++++++++++++--- 1 file changed, 183 insertions(+), 15 deletions(-) diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 76fad6e8a34c..b5bfdfbf7d56 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -135,6 +135,10 @@ impl Trace { r.key.as_ptr().read() } } + + fn get_tail(&self) -> &K { + unsafe { self.tail.as_ref().prev.as_ref().key.assume_init_ref() } + } } impl Drop for Trace { @@ -174,14 +178,51 @@ impl SizePolicy for CountTracker { } } -pub struct LruCache +/// Some [`EvictPolicy`] may need to know what the entry bing popped out is to +/// determine if it really can be popped. But there is performance cost to +/// always get the tail entry. So we pass this interface to the `should_evict` +/// function. An implementation of `EvictPolicy` can read the tail entry only +/// when it really needs. +pub trait GetTailEntry { + fn get_tail_entry(&self) -> Option<(&K, &V)>; +} + +/// An [`EvictPolicy`] defines how the [`LruCache`] should determine an entry +/// at the tail should be popped out. +pub trait EvictPolicy { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + get_tail_entry: &impl GetTailEntry, + ) -> bool; +} + +/// The default [`EvictPolicy`] of [`LruCache`], which pops out entries at the +/// tail when the limit specified by `capacity` is exceeded. +pub struct EvictOnFull; + +impl EvictPolicy for EvictOnFull { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + _: &impl GetTailEntry, + ) -> bool { + capacity < current_size + } +} + +pub struct LruCache where T: SizePolicy, + E: EvictPolicy, { map: HashMap>, trace: Trace, capacity: usize, size_policy: T, + evict_policy: E, } impl LruCache @@ -189,18 +230,30 @@ where T: SizePolicy, { pub fn with_capacity_sample_and_trace( - mut capacity: usize, + capacity: usize, sample_mask: usize, size_policy: T, ) -> LruCache { + Self::new(capacity, sample_mask, size_policy, EvictOnFull) + } +} + +impl LruCache +where + T: SizePolicy, + E: EvictPolicy, +{ + pub fn new(mut capacity: usize, sample_mask: usize, size_policy: T, evict_policy: E) -> Self { + // The capacity is at least 1. if capacity == 0 { capacity = 1; } - LruCache { + Self { map: HashMap::default(), trace: Trace::new(sample_mask), capacity, size_policy, + evict_policy, } } @@ -215,10 +268,18 @@ where self.trace.clear(); self.size_policy.on_reset(0); } + + /// Get the capacity limited on the `LruCache`. #[inline] pub fn capacity(&self) -> usize { self.capacity } + + /// Get the capacity actually allocated by the internal data structure. + #[inline] + pub fn internal_allocated_capacity(&self) -> usize { + self.map.capacity() + } } impl LruCache @@ -234,25 +295,36 @@ where } } -impl LruCache +impl LruCache where K: Eq + Hash + Clone + std::fmt::Debug, T: SizePolicy, + E: EvictPolicy, { #[inline] - pub fn insert(&mut self, key: K, value: V) { + fn insert_impl(&mut self, key: K, value: V, replace: bool) -> bool { + let mut inserted = true; let mut old_key = None; let current_size = SizePolicy::::current(&self.size_policy); + // In case the current size exactly equals to capacity, we also expect to reuse + // tail when inserting. Use `current_size + 1` to include the case. + let should_evict_on_insert = + self.evict_policy + .should_evict(current_size + 1, self.capacity, self); match self.map.entry(key) { HashMapEntry::Occupied(mut e) => { - self.size_policy.on_remove(e.key(), &e.get().value); - self.size_policy.on_insert(e.key(), &value); - let mut entry = e.get_mut(); - self.trace.promote(entry.record); - entry.value = value; + if replace { + self.size_policy.on_remove(e.key(), &e.get().value); + self.size_policy.on_insert(e.key(), &value); + let mut entry = e.get_mut(); + self.trace.promote(entry.record); + entry.value = value; + } else { + inserted = false; + } } HashMapEntry::Vacant(v) => { - let record = if self.capacity <= current_size { + let record = if should_evict_on_insert { let res = self.trace.reuse_tail(v.key().clone()); old_key = Some(res.0); res.1 @@ -274,7 +346,8 @@ where // Perhaps we can reject entries larger than capacity goes in the LRU cache, but // that is impossible for now: the `SizePolicy` trait doesn't provide the // interface of querying the actual size of an item. - self.evict_until_fit() + self.evict_until_fit(); + inserted } fn evict_until_fit(&mut self) { @@ -283,7 +356,7 @@ where let current_size = self.size_policy.current(); // Should we keep at least one entry? So our users won't lose their fresh record // once it exceeds the capacity. - if current_size <= cap || self.map.is_empty() { + if !self.evict_policy.should_evict(current_size, cap, self) || self.map.is_empty() { break; } let key = self.trace.remove_tail(); @@ -292,6 +365,18 @@ where } } + #[inline] + pub fn insert(&mut self, key: K, value: V) { + self.insert_impl(key, value, true); + } + + /// Insert an entry if the key doesn't exist before. The existing entry + /// won't be replaced and won't be promoted to the most-recent place. + #[inline] + pub fn insert_if_not_exist(&mut self, key: K, value: V) -> bool { + self.insert_impl(key, value, false) + } + #[inline] pub fn remove(&mut self, key: &K) -> Option { if let Some(v) = self.map.remove(key) { @@ -313,6 +398,12 @@ where } } + /// Get an item by key without promoting the item. + #[inline] + pub fn get_no_promote(&self, key: &K) -> Option<&V> { + self.map.get(key).map(|v| &v.value) + } + #[inline] pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { match self.map.get_mut(key) { @@ -355,17 +446,37 @@ where } } -unsafe impl Send for LruCache +impl GetTailEntry for LruCache +where + K: Eq + Hash + Clone + std::fmt::Debug, + T: SizePolicy, + E: EvictPolicy, +{ + fn get_tail_entry(&self) -> Option<(&K, &V)> { + if self.is_empty() { + return None; + } + + let k = self.trace.get_tail(); + self.map + .get_key_value(k) + .map(|(k, entry)| (k, &entry.value)) + } +} + +unsafe impl Send for LruCache where K: Send, V: Send, T: Send + SizePolicy, + E: Send + EvictPolicy, { } -impl Drop for LruCache +impl Drop for LruCache where T: SizePolicy, + E: EvictPolicy, { fn drop(&mut self) { self.clear(); @@ -626,4 +737,61 @@ mod tests { assert!(cache.size() <= 42); } } + + #[test] + fn test_get_no_promote() { + let mut cache = LruCache::with_capacity_sample_and_trace(3, 0, CountTracker::default()); + cache.insert(1, 1); + cache.insert(2, 2); + cache.insert(3, 3); + assert_eq!(cache.size(), 3); + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + cache.insert(4, 4); + assert_eq!(cache.size(), 3); + // Key 1 is not promoted, so it's popped out first. + assert!(cache.get_no_promote(&1).is_none()); + // Other entries are not affected. + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + assert_eq!(*cache.get_no_promote(&4).unwrap(), 4); + } + + #[test] + fn test_insert_if_not_exist() { + let mut cache = LruCache::with_capacity_sample_and_trace(4, 0, CountTracker::default()); + assert!(cache.insert_if_not_exist(1, 1)); + assert!(cache.insert_if_not_exist(2, 2)); + assert!(cache.insert_if_not_exist(3, 3)); + assert_eq!(cache.size(), 3); + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + + assert!(!cache.insert_if_not_exist(1, 11)); + // Not updated. + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + + assert!(cache.insert_if_not_exist(4, 4)); + assert!(!cache.insert_if_not_exist(2, 22)); + // Not updated. + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + + assert_eq!(cache.size(), 4); + assert!(cache.insert_if_not_exist(5, 5)); + assert_eq!(cache.size(), 4); + // key 1 is not promoted, so it's first popped out. + assert!(cache.get_no_promote(&1).is_none()); + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + + assert!(cache.insert_if_not_exist(6, 6)); + assert_eq!(cache.size(), 4); + // key 2 is not promoted either, so it's first popped out. + assert!(cache.get_no_promote(&2).is_none()); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + + assert!(cache.insert_if_not_exist(7, 7)); + assert_eq!(cache.size(), 4); + assert!(cache.get_no_promote(&3).is_none()); + assert_eq!(*cache.get_no_promote(&4).unwrap(), 4); + } } From 8f8da90e0fca0a9adacc77f5a1edc11e59872573 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 18 Oct 2023 00:12:28 -0500 Subject: [PATCH 097/203] Revert "import: write RPC will check region epoch before continue" (#15787) close tikv/tikv#15791 Signed-off-by: lance6716 --- Makefile | 8 - .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 87 ++++++-- components/raftstore/src/store/msg.rs | 6 + .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +++++++++++- components/server/src/server.rs | 2 - components/server/src/server2.rs | 2 - components/sst_importer/src/import_file.rs | 49 ++--- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 8 +- components/test_raftstore-v2/src/server.rs | 1 - components/test_raftstore/src/server.rs | 1 - src/import/sst_service.rs | 185 +----------------- 14 files changed, 228 insertions(+), 271 deletions(-) diff --git a/Makefile b/Makefile index ce8d4e8b793d..bb1d7316e1b6 100644 --- a/Makefile +++ b/Makefile @@ -406,14 +406,6 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test -docker_shell: - docker build -f Dockerfile.test \ - -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - . - docker run -it -v $(shell pwd):/tikv \ - ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - /bin/bash - ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 3d39c9a73697..e963434fe837 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,11 +43,6 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); - // filter old version SSTs - let ssts: Vec<_> = ssts - .into_iter() - .filter(|sst| sst.api_version >= sst_importer::API_VERSION_2) - .collect(); if ssts.is_empty() { return Ok(()); } @@ -55,9 +50,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.meta.get_region_id()) + .entry(sst.get_region_id()) .or_default() - .push(sst.meta); + .push(sst); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 3a22ef8434d2..aa8fa7c318e6 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,13 +36,14 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ + import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{Feature, FeatureGate, PdClient}; +use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -812,6 +813,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), + StoreMsg::ValidateSstResult { invalid_ssts } => { + self.on_validate_sst_result(invalid_ssts) + } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1651,7 +1655,12 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); + let cleanup_sst_runner = CleanupSstRunner::new( + meta.get_id(), + self.router.clone(), + Arc::clone(&importer), + Arc::clone(&pd_client), + ); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2754,8 +2763,44 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { + fn on_validate_sst_result(&mut self, ssts: Vec) { + if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { + return; + } + // A stale peer can still ingest a stale Sst before it is + // destroyed. We need to make sure that no stale peer exists. + let mut delete_ssts = Vec::new(); + { + let meta = self.ctx.store_meta.lock().unwrap(); + for sst in ssts { + if !meta.regions.contains_key(&sst.get_region_id()) { + delete_ssts.push(sst); + } + } + } + if delete_ssts.is_empty() { + return; + } + + let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::CleanupSst(task)) + { + error!( + "schedule to delete ssts failed"; + "store_id" => self.fsm.store.id, + "err" => ?e, + ); + } + } + fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); + let mut validate_ssts = Vec::new(); + let import_size = box_try!(self.ctx.importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { @@ -2764,22 +2809,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if sst.api_version < sst_importer::API_VERSION_2 { - // SST of old versions are created by old TiKV and have different prerequisite - // we can't delete them here. They can only be deleted manually - continue; - } - if let Some(r) = meta.regions.get(&sst.meta.get_region_id()) { + if let Some(r) = meta.regions.get(&sst.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.meta.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst.meta); + delete_ssts.push(sst); } } else { - // The write RPC of import sst service have make sure the region do exist at the - // write time, and now the region is not found, sst can be - // deleted because it won't be used by ingest in future. - delete_ssts.push(sst.meta); + // If the peer doesn't exist, we need to validate the SST through PD. + validate_ssts.push(sst); } } } @@ -2799,6 +2837,27 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + // When there is an import job running, the region which this sst belongs may + // has not been split from the origin region because the apply thread is so busy + // that it can not apply SplitRequest as soon as possible. So we can not + // delete this sst file. + if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { + let task = CleanupSstTask::ValidateSst { + ssts: validate_ssts, + }; + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::CleanupSst(task)) + { + error!( + "schedule to validate ssts failed"; + "store_id" => self.fsm.store.id, + "err" => ?e, + ); + } + } + Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a92e5169549d..a33ca0e476ea 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,6 +10,7 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, + import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -823,6 +824,10 @@ where { RaftMessage(InspectedRaftMessage), + ValidateSstResult { + invalid_ssts: Vec, + }, + // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -879,6 +884,7 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), + StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 726b7abe5ceb..632e85f40cc3 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,6 +3,7 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; +use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -10,6 +11,7 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; +use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -27,26 +29,29 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, + S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, + C: PdClient, + S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -55,10 +60,12 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, + C: PdClient, + S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 44f188e6f8fb..8174b872f4b7 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,30 +1,62 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt, sync::Arc}; +use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; -use kvproto::import_sstpb::SstMeta; +use engine_traits::KvEngine; +use kvproto::{import_sstpb::SstMeta, metapb::Region}; +use pd_client::PdClient; use sst_importer::SstImporter; -use tikv_util::worker::Runnable; +use tikv_util::{error, worker::Runnable}; + +use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; + +type Result = std::result::Result>; pub enum Task { DeleteSst { ssts: Vec }, + ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), + Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner { +pub struct Runner +where + EK: KvEngine, + S: StoreRouter, +{ + store_id: u64, + store_router: S, importer: Arc, + pd_client: Arc, + _engine: PhantomData, } -impl Runner { - pub fn new(importer: Arc) -> Runner { - Runner { importer } +impl Runner +where + EK: KvEngine, + C: PdClient, + S: StoreRouter, +{ + pub fn new( + store_id: u64, + store_router: S, + importer: Arc, + pd_client: Arc, + ) -> Runner { + Runner { + store_id, + store_router, + importer, + pd_client, + _engine: PhantomData, + } } /// Deletes SST files from the importer. @@ -33,9 +65,78 @@ impl Runner { let _ = self.importer.delete(sst); } } + + fn get_region_by_meta(&self, sst: &SstMeta) -> Result { + // The SST meta has been delivered with a range, use it directly. + // For now, no case will reach this. But this still could be a guard for + // reducing the superise in the future... + if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { + return self + .pd_client + .get_region(sst.get_range().get_start()) + .map_err(Into::into); + } + // Once there isn't range provided. + let query_by_start_key_of_full_meta = || { + let start_key = self + .importer + .load_start_key_by_meta::(sst)? + .ok_or_else(|| -> Box { + "failed to load start key from sst, the sst might be empty".into() + })?; + let region = self.pd_client.get_region(&start_key)?; + Result::Ok(region) + }; + query_by_start_key_of_full_meta() + .map_err(|err| + format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() + ) + } + + /// Validates whether the SST is stale or not. + fn handle_validate_sst(&self, ssts: Vec) { + let store_id = self.store_id; + let mut invalid_ssts = Vec::new(); + for sst in ssts { + match self.get_region_by_meta(&sst) { + Ok(r) => { + // The region id may or may not be the same as the + // SST file, but it doesn't matter, because the + // epoch of a range will not decrease anyway. + if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { + // Region has not been updated. + continue; + } + if r.get_id() == sst.get_region_id() + && r.get_peers().iter().any(|p| p.get_store_id() == store_id) + { + // The SST still belongs to this store. + continue; + } + invalid_ssts.push(sst); + } + Err(e) => { + error!("get region failed"; "err" => %e); + } + } + } + + // We need to send back the result to check for the stale + // peer, which may ingest the stale SST before it is + // destroyed. + let msg = StoreMsg::ValidateSstResult { invalid_ssts }; + if let Err(e) = self.store_router.send(msg) { + error!(%e; "send validate sst result failed"); + } + } } -impl Runnable for Runner { +impl Runnable for Runner +where + EK: KvEngine, + C: PdClient, + S: StoreRouter, +{ type Task = Task; fn run(&mut self, task: Task) { @@ -43,6 +144,9 @@ impl Runnable for Runner { Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } + Task::ValidateSst { ssts } => { + self.handle_validate_sst(ssts); + } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a4b6276a5878..8d44890e5a63 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,7 +366,6 @@ where router.clone(), config.coprocessor.clone(), )); - let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1081,7 +1080,6 @@ where servers.importer.clone(), None, self.resource_manager.clone(), - Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 65d02f58c088..2593035618da 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,7 +948,6 @@ where backup_worker.start(backup_endpoint); // Import SST service. - let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -957,7 +956,6 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), - Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index ae81cf016462..b270d26a4111 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -440,7 +440,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -458,33 +458,20 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; -// version 2: compared to version 1 which is the default version, we will check -// epoch of request and local region in write API. -pub const API_VERSION_2: i32 = 2; - -/// sst_meta_to_path will encode the filepath with default api version (current -/// is 2). So when the SstMeta is created in old version of TiKV and filepath -/// will not correspond to the real file, in the deletion logic we can't remove -/// these files. + pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), - API_VERSION_2, SST_SUFFIX, ))) } -pub struct SstMetaWithApiVersion { - pub meta: SstMeta, - pub api_version: i32, // in future we may move api_version into SstMeta -} - -pub fn parse_meta_from_path>(path: P) -> Result { +pub fn parse_meta_from_path>(path: P) -> Result { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -513,11 +500,7 @@ pub fn parse_meta_from_path>(path: P) -> Result 5 { - api_version = elems[5].parse()?; - } - Ok(SstMetaWithApiVersion { meta, api_version }) + Ok(meta) } #[cfg(test)] @@ -537,12 +520,11 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default_2.sst", uuid); + let expected_path = format!("{}_1_2_3_default.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let meta_with_ver = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, meta_with_ver.meta); - assert_eq!(2, meta_with_ver.api_version); + let new_meta = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, new_meta); } #[test] @@ -561,9 +543,8 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let meta_with_ver = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, meta_with_ver.meta); - assert_eq!(1, meta_with_ver.api_version); + let new_meta = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, new_meta); } #[cfg(feature = "test-engines-rocksdb")] @@ -615,20 +596,14 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta_with_ver| { - let meta = &mut meta_with_ver.meta; + ssts.iter_mut().for_each(|meta| { let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!( - ssts.iter() - .map(|meta_with_ver| { meta_with_ver.meta.clone() }) - .collect(), - vec![meta] - ); + assert_eq!(ssts, vec![meta]); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index ff137005b09b..0cfc3bab774c 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::{sst_meta_to_path, API_VERSION_2}, + import_file::sst_meta_to_path, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index f36016eb3097..5530862e6a39 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -51,7 +51,7 @@ use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ caching::cache_map::{CacheMap, ShareOwned}, - import_file::{ImportDir, ImportFile, SstMetaWithApiVersion}, + import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, import_mode2::{HashRange, ImportModeSwitcherV2}, metrics::*, @@ -1387,7 +1387,7 @@ impl SstImporter { /// List the basic information of the current SST files. /// The information contains UUID, region ID, region Epoch. /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1587,9 +1587,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.meta.get_uuid()) + .find(|s| s.get_uuid() == sst.get_uuid()) .unwrap(); - dir.delete(&sst.meta, key_manager.as_deref()).unwrap(); + dir.delete(sst, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 5073304e17a6..299e93eb7461 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,7 +561,6 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), - Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f5c64fa86e91..0002f36d647e 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,7 +451,6 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), - Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6f9f22c9cb45..68403e226f8b 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -15,7 +15,6 @@ use std::{ use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; -use futures_executor::block_on; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -28,9 +27,7 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, - metapb::RegionEpoch, }; -use raftstore::{coprocessor::RegionInfoProvider, store::util::is_epoch_stale, RegionInfoAccessor}; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ @@ -42,7 +39,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::{create_stream_with_buffer, paired_future_callback}, + future::create_stream_with_buffer, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -127,7 +124,6 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, - region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -322,7 +318,6 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, - region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -370,7 +365,6 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, - region_info_accessor, writer, store_meta, resource_manager, @@ -681,59 +675,6 @@ impl ImportSstService { } } -fn check_local_region_stale( - region_id: u64, - epoch: &RegionEpoch, - region_info_accessor: Arc, -) -> Result<()> { - let (cb, f) = paired_future_callback(); - region_info_accessor - .find_region_by_id(region_id, cb) - .map_err(|e| { - Error::Engine(format!("failed to find region {} err {:?}", region_id, e).into()) - })?; - match block_on(f)? { - Some(local_region_info) => { - let local_region_epoch = local_region_info.region.region_epoch.unwrap(); - - // TODO(lance6717): we should only need to check conf_ver because we require all - // peers have SST on the disk, and does not care about which one is - // leader. But since check_sst_for_ingestion also checks epoch version, - // we just keep it here for now. - - // when local region epoch is stale, client can retry write later - if is_epoch_stale(&local_region_epoch, epoch) { - return Err(Error::Engine( - format!("request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", - region_id, local_region_epoch, epoch).into(), - )); - } - // when local region epoch is ahead, client need to rescan region from PD to get - // latest region later - if is_epoch_stale(epoch, &local_region_epoch) { - return Err(Error::Engine( - format!("request region {} is staler than local region, local epoch {:?}, request epoch {:?}, please rescan region later", - region_id, local_region_epoch, epoch).into(), - )); - } - - // not match means to rescan - Ok(()) - } - None => { - // when region not found, we can't tell whether it's stale or ahead, so we just - // return the safest case - Err(Error::Engine( - format!( - "region {} is not found, please rescan region later", - region_id - ) - .into(), - )) - } - } -} - #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -745,7 +686,6 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); - let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -773,15 +713,7 @@ macro_rules! impl_write { } _ => return Err(Error::InvalidChunk), }; - // wait the region epoch on this TiKV to catch up with the epoch - // in request, which comes from PD and represents the majority - // peers' status. let region_id = meta.get_region_id(); - check_local_region_stale( - region_id, - meta.get_region_epoch(), - region_info_accessor, - )?; let tablet = match tablets.get(region_id) { Some(t) => t, None => { @@ -1455,30 +1387,19 @@ fn write_needs_restore(write: &[u8]) -> bool { #[cfg(test)] mod test { - use std::{ - collections::HashMap, - sync::{Arc, Mutex}, - }; + use std::collections::HashMap; use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::{Region, RegionEpoch}, + metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::{Message, SingularPtrField}; - use raft::StateRole::Follower; - use raftstore::{ - coprocessor::{region_info_accessor::Callback, RegionInfoProvider}, - RegionInfo, - }; + use protobuf::Message; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{ - import::sst_service::{check_local_region_stale, RequestCollector}, - server::raftkv, - }; + use crate::{import::sst_service::RequestCollector, server::raftkv}; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1762,100 +1683,4 @@ mod test { } assert_eq!(total, 100); } - - #[test] - fn test_write_rpc_check_region_epoch() { - struct MockRegionInfoProvider { - map: Mutex>, - } - impl RegionInfoProvider for MockRegionInfoProvider { - fn find_region_by_id( - &self, - region_id: u64, - callback: Callback>, - ) -> Result<(), raftstore::coprocessor::Error> { - callback(self.map.lock().unwrap().get(®ion_id).cloned()); - Ok(()) - } - } - - let mock_provider = Arc::new(MockRegionInfoProvider { - map: Mutex::new(HashMap::new()), - }); - - let mut req_epoch = RegionEpoch { - conf_ver: 10, - version: 10, - ..Default::default() - }; - // test for region not found - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - assert!(result.is_err()); - // check error message contains "rescan region later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("rescan region later") - ); - - let mut local_region_info = RegionInfo { - region: Region { - id: 1, - region_epoch: SingularPtrField::some(req_epoch.clone()), - ..Default::default() - }, - role: Follower, - buckets: 1, - }; - mock_provider - .map - .lock() - .unwrap() - .insert(1, local_region_info.clone()); - // test the local region epoch is same as request - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - result.unwrap(); - - // test the local region epoch is ahead of request - local_region_info - .region - .region_epoch - .as_mut() - .unwrap() - .conf_ver = 11; - mock_provider - .map - .lock() - .unwrap() - .insert(1, local_region_info.clone()); - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - assert!(result.is_err()); - // check error message contains "rescan region later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("rescan region later") - ); - - req_epoch.conf_ver = 11; - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - result.unwrap(); - - // test the local region epoch is staler than request - req_epoch.version = 12; - let result = check_local_region_stale(1, &req_epoch, mock_provider); - assert!(result.is_err()); - // check error message contains "retry write later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("retry write later") - ); - } } From 7953ea518ca2768cc22c847bf10e7890063c6549 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Tue, 17 Oct 2023 22:25:58 -0700 Subject: [PATCH 098/203] raftstore-v2: Allow rollback merge during unsafe recovery for raftstore v2 (#15780) ref tikv/tikv#15580 Allow rollback merge during unsafe recovery for raftstore v2 Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../operation/command/admin/merge/commit.rs | 2 +- .../operation/command/admin/merge/prepare.rs | 2 +- .../src/operation/command/admin/mod.rs | 8 ++- .../raftstore-v2/src/operation/command/mod.rs | 13 ++-- components/raftstore/src/store/peer.rs | 1 + .../failpoints/cases/test_unsafe_recovery.rs | 63 +++---------------- 6 files changed, 25 insertions(+), 64 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index b12ba9eaf9df..da26a423a97d 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -371,7 +371,7 @@ impl Peer { let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::COMMIT_MERGE); let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + self.propose_with_ctx(store_ctx, data, proposal_ctx) } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 4a5875f70975..5de1c4cfe015 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -214,7 +214,7 @@ impl Peer { let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::PREPARE_MERGE); let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + self.propose_with_ctx(store_ctx, data, proposal_ctx) }); if r.is_ok() { self.proposal_control_mut().set_pending_prepare_merge(false); diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9d7fee55ae4e..b861f86f8591 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -30,7 +30,7 @@ use raftstore::{ cmd_resp, fsm::{apply, apply::validate_batch_split}, msg::ErrorCallback, - Transport, + ProposalContext, Transport, }, Error, }; @@ -237,10 +237,14 @@ impl Peer { } } AdminCmdType::CompactLog => self.propose_compact_log(ctx, req), - AdminCmdType::UpdateGcPeer | AdminCmdType::RollbackMerge => { + AdminCmdType::UpdateGcPeer => { let data = req.write_to_bytes().unwrap(); self.propose(ctx, data) } + AdminCmdType::RollbackMerge => { + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(ctx, data, ProposalContext::ROLLBACK_MERGE) + } AdminCmdType::PrepareMerge => self.propose_prepare_merge(ctx, req), AdminCmdType::CommitMerge => self.propose_commit_merge(ctx, req), AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 70cdbfda237d..b93ea700f801 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -43,7 +43,7 @@ use raftstore::{ }, msg::ErrorCallback, util::{self, check_flashback_state}, - Config, Transport, WriteCallback, + Config, ProposalContext, Transport, WriteCallback, }, Error, Result, }; @@ -202,7 +202,8 @@ impl Peer { // progress less error-prone. if !(admin_type.is_some() && (admin_type.unwrap() == AdminCmdType::ChangePeer - || admin_type.unwrap() == AdminCmdType::ChangePeerV2)) + || admin_type.unwrap() == AdminCmdType::ChangePeerV2 + || admin_type.unwrap() == AdminCmdType::RollbackMerge)) { return Err(Error::RecoveryInProgress(self.region_id())); } @@ -239,7 +240,7 @@ impl Peer { store_ctx: &mut StoreContext, data: Vec, ) -> Result { - self.propose_with_ctx(store_ctx, data, vec![]) + self.propose_with_ctx(store_ctx, data, ProposalContext::empty()) } #[inline] @@ -247,12 +248,12 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, data: Vec, - proposal_ctx: Vec, + proposal_ctx: ProposalContext, ) -> Result { // Should not propose normal in force leader state. // In `pre_propose_raft_command`, it rejects all the requests expect // conf-change if in force leader state. - if self.has_force_leader() { + if self.has_force_leader() && proposal_ctx != ProposalContext::ROLLBACK_MERGE { store_ctx.raft_metrics.invalid_proposal.force_leader.inc(); panic!( "[{}] {} propose normal in force leader state {:?}", @@ -274,7 +275,7 @@ impl Peer { }); } let last_index = self.raft_group().raft.raft_log.last_index(); - self.raft_group_mut().propose(proposal_ctx, data)?; + self.raft_group_mut().propose(proposal_ctx.to_vec(), data)?; if self.raft_group().raft.raft_log.last_index() == last_index { // The message is dropped silently, this usually due to leader absence // or transferring leader. Both cases can be considered as NotLeader error. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 85b8798bfb17..e72d32f8e914 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -242,6 +242,7 @@ bitflags! { const SPLIT = 0b0000_0010; const PREPARE_MERGE = 0b0000_0100; const COMMIT_MERGE = 0b0000_1000; + const ROLLBACK_MERGE = 0b0001_0000; } } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 9e5a5dffcd94..95d45c8e99c9 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -442,6 +442,7 @@ fn test_unsafe_recovery_demotion_reentrancy() { } #[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_rollback_merge() { let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); @@ -471,11 +472,15 @@ fn test_unsafe_recovery_rollback_merge() { let right_peer_2 = find_peer(&right, nodes[2]).unwrap().to_owned(); cluster.must_transfer_leader(left.get_id(), left_peer_2); cluster.must_transfer_leader(right.get_id(), right_peer_2); - cluster.must_try_merge(left.get_id(), right.get_id()); + cluster.try_merge(left.get_id(), right.get_id()); + let right_peer_0 = find_peer(&right, nodes[0]).unwrap().to_owned(); + pd_client.must_remove_peer(right.get_id(), right_peer_0); + cluster.must_remove_region(nodes[0], right.get_id()); // Makes the group lose its quorum. cluster.stop_node(nodes[1]); cluster.stop_node(nodes[2]); + fail::remove("on_schedule_merge"); { let put = new_put_cmd(b"k2", b"v2"); let req = new_request( @@ -491,7 +496,8 @@ fn test_unsafe_recovery_rollback_merge() { } cluster.must_enter_force_leader(left.get_id(), nodes[0], vec![nodes[1], nodes[2]]); - cluster.must_enter_force_leader(right.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + // Allow rollback merge to finish. + sleep_ms(100); // Construct recovery plan. let mut plan = pdpb::RecoveryPlan::default(); @@ -505,23 +511,12 @@ fn test_unsafe_recovery_rollback_merge() { let mut left_demote = pdpb::DemoteFailedVoters::default(); left_demote.set_region_id(left.get_id()); left_demote.set_failed_voters(left_demote_peers.into()); - let right_demote_peers: Vec = right - .get_peers() - .iter() - .filter(|&peer| peer.get_store_id() != nodes[0]) - .cloned() - .collect(); - let mut right_demote = pdpb::DemoteFailedVoters::default(); - right_demote.set_region_id(right.get_id()); - right_demote.set_failed_voters(right_demote_peers.into()); plan.mut_demotes().push(left_demote); - plan.mut_demotes().push(right_demote); // Triggers the unsafe recovery plan execution. pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); cluster.must_send_store_heartbeat(nodes[0]); - // Can't propose demotion as it's in merging mode let mut store_report = None; for _ in 0..20 { store_report = pd_client.must_get_store_report(nodes[0]); @@ -531,58 +526,18 @@ fn test_unsafe_recovery_rollback_merge() { sleep_ms(100); } assert_ne!(store_report, None); - let has_force_leader = store_report - .unwrap() - .get_peer_reports() - .iter() - .any(|p| p.get_is_force_leader()); - // Force leader is not exited due to demotion failure - assert!(has_force_leader); - - fail::remove("on_schedule_merge"); - fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); - - // Make sure merge check is scheduled, and rollback merge is triggered - sleep_ms(50); - - // Re-triggers the unsafe recovery plan execution. - pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); - cluster.must_send_store_heartbeat(nodes[0]); - let mut store_report = None; - for _ in 0..20 { - store_report = pd_client.must_get_store_report(nodes[0]); - if store_report.is_some() { - break; - } - sleep_ms(100); - } - assert_ne!(store_report, None); - // No force leader - for peer_report in store_report.unwrap().get_peer_reports() { - assert!(!peer_report.get_is_force_leader()); - } - // Demotion is done let mut demoted = false; for _ in 0..10 { let new_left = block_on(pd_client.get_region_by_id(left.get_id())) .unwrap() .unwrap(); - let new_right = block_on(pd_client.get_region_by_id(right.get_id())) - .unwrap() - .unwrap(); assert_eq!(new_left.get_peers().len(), 3); - assert_eq!(new_right.get_peers().len(), 3); demoted = new_left .get_peers() .iter() .filter(|peer| peer.get_store_id() != nodes[0]) - .all(|peer| peer.get_role() == metapb::PeerRole::Learner) - && new_right - .get_peers() - .iter() - .filter(|peer| peer.get_store_id() != nodes[0]) - .all(|peer| peer.get_role() == metapb::PeerRole::Learner); + .all(|peer| peer.get_role() == metapb::PeerRole::Learner); if demoted { break; } From 3f53e5976c2c1e8578897b73f46424fdf700bfe6 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 19 Oct 2023 15:10:59 +0800 Subject: [PATCH 099/203] storage/txn: Add txn_status_cache module (#15749) ref tikv/tikv#11187 This PR adds a module named txn_status_cache. It provides the type `TxnStatusCache`, which will be then put into scheduler to cache start_ts->commit_ts information of recently-committed transactions. This is part of fixing issue #11187 and will also be used for possible future optimizations such as checking txn status locally. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/lru.rs | 11 +- src/storage/metrics.rs | 15 + src/storage/txn/mod.rs | 1 + src/storage/txn/txn_status_cache.rs | 961 ++++++++++++++++++++++++++++ 4 files changed, 983 insertions(+), 5 deletions(-) create mode 100644 src/storage/txn/txn_status_cache.rs diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index b5bfdfbf7d56..302bfc9264b1 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -178,11 +178,12 @@ impl SizePolicy for CountTracker { } } -/// Some [`EvictPolicy`] may need to know what the entry bing popped out is to -/// determine if it really can be popped. But there is performance cost to -/// always get the tail entry. So we pass this interface to the `should_evict` -/// function. An implementation of `EvictPolicy` can read the tail entry only -/// when it really needs. +/// Some [`EvictPolicy`] (e.g. the `TxnStatusCache` in +/// `tikv::storage::txn::txn_status_cache` module) may need to know what the +/// entry bing popped out is to determine if it really can be popped. But there +/// is performance cost to always get the tail entry. So we pass this interface +/// to the `should_evict` function. An implementation of `EvictPolicy` can read +/// the tail entry only when it really needs. pub trait GetTailEntry { fn get_tail_entry(&self) -> Option<(&K, &V)>; } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e9477b56b0ff..cf7956d76b78 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -370,6 +370,13 @@ make_static_metric! { keys, }, } + + pub struct TxnStatusCacheSizeGauge: IntGauge { + "type" => { + used, + allocated, + } + } } lazy_static! { @@ -601,4 +608,12 @@ lazy_static! { exponential_buckets(1.0, 2.0, 16).unwrap() ) .unwrap(); + + pub static ref SCHED_TXN_STATUS_CACHE_SIZE: TxnStatusCacheSizeGauge = register_static_int_gauge_vec!( + TxnStatusCacheSizeGauge, + "tikv_scheduler_txn_status_cache_size", + "Statistics of size and capacity of txn status cache (represented in count of entries)", + &["type"] + ) + .unwrap(); } diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 640c534fc861..8c30ae0a068a 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -6,6 +6,7 @@ pub mod commands; pub mod flow_controller; pub mod sched_pool; pub mod scheduler; +pub mod txn_status_cache; mod actions; mod latch; diff --git a/src/storage/txn/txn_status_cache.rs b/src/storage/txn/txn_status_cache.rs new file mode 100644 index 000000000000..2428bbb99c58 --- /dev/null +++ b/src/storage/txn/txn_status_cache.rs @@ -0,0 +1,961 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements a cache for the status of recent finished +//! transactions. When a transaction is committed or rolled back, we store the +//! information in the cache for a while. Later, in some cases, one can find +//! the transaction status without accessing the physical storage. This helps +//! to quickly find out the transaction status in some cases. +//! +//! > **Note:** +//! > * Currently, only committed transactions are cached. We may also cache +//! > rolled-back transactions in the future. +//! > * Currently, the cache is only used to filter unnecessary stale prewrite +//! > requests. We will also consider use the cache for other purposes in the +//! > future. +//! +//! ## Why we need this? +//! +//! ### For filtering out unwanted late-arrived stale prewrite requests +//! +//! This solves a problem which has a complicated background. +//! +//! There's such an optimization in pessimistic transactions when TiKV runs +//! accompanied with TiDB: non-unique index keys don't need to be pessimistic- +//! locked, and WRITE CF don't need to be checked either when prewriting. The +//! correctness in case there's any kinds of conflicts will be protected by +//! the corresponding row key, as the index key is never written without +//! writing the corresponding row key. +//! +//! However, it's later found to be problematic, especially with async commit +//! and 1PC, as the prewrite requests on these index keys lost its idempotency. +//! You can see [this issue](https://github.com/tikv/tikv/issues/11187) to see +//! how it causes problems, including those that affects transaction +//! correctness. +//! +//! The problem happens when the prewrite request to the same index key is +//! sent more than once. Our first solution is to add a `is_retry_request` flag +//! to the second (or even more) requests, which is sent due to retrying from +//! the client side. But it's still imperfect, considering that it's +//! theoretically possible that the original request arrives to TiKV later than +//! the retried one. In fact, we once observed this happens in an environment +//! where the network is terribly unstable. +//! +//! Our second solution, additional to the previous one, is to use this cache. +//! Each committed transaction should be guaranteed to be kept in the cache for +//! [a long-enough time](CACHE_ITEMS_REQUIRED_KEEP_TIME). When a prewrite +//! request is received, it should check the cache before executing. If it finds +//! its belonging transaction is already committed, it won't skip constraint +//! check in WRITE CF. Note that if the index key is already committed but the +//! transaction info is not cached, then a late-arrived prewrite request cannot +//! be protected by this mechanism. This means we shouldn't miss any cacheable +//! transactions, and it is the reason why committed transactions should be +//! cached for *a long-enough time*. +//! +//! Unfortunately, the solution is still imperfect. As it's already known, it +//! may still be problematic due to the following reasons: +//! +//! 1. We don't have mechanism to refuse requests that have +//! past more than [CACHE_ITEMS_REQUIRED_KEEP_TIME] since they were sent. +//! 2. To prevent the cache from consuming too much more memory than expected, +//! we have a limit to the capacity (though the limit is very large), and it's +//! configurable (so the cache can be disabled, see how the `capacity` parameter +//! of function [TxnStatusCache::new] is used) as a way to escape from potential +//! faults. +//! 3. The cache can't be synced across different TiKV instances. +//! +//! The third case above needs detailed explanation to be clarified. This is +//! an example of the problem: +//! +//! 1. Client try to send prewrite request to TiKV A, who has the leader of the +//! region containing a index key. The request is not received by TiKV and the +//! client retries. +//! 2. The leader is transferred to TiKV B, and the retries prewrite request +//! is sent to it and processed successfully. +//! 3. The transaction is committed on TiKV B, not being known by TiKV A. +//! 4. The leader transferred back to TiKV A. +//! 5. The original request arrives to TiKV A and being executed. As the +//! status of the transaction is not in the cache in TiKV A, the prewrite +//! request will be handled in normal way, skipping constraint checks. +//! +//! As of the time when this module is written, the above remaining cases have +//! not yet been handled, considering the extremely low possibility to happen +//! and high complexity to fix. +//! +//! The perfect and most elegant way to fix all of these problem is never to +//! skip constraint checks or never skipping pessimistic locks for index keys. +//! Or to say, totally remove the optimization mentioned above on index keys. +//! But for historical reason, this may lead to significant performance +//! regression in existing clusters. +//! +//! ### For read data locked by large transactions more efficiently +//! +//! * Note: the `TxnStatusCache` is designed prepared for this usage, but not +//! used yet for now. +//! +//! Consider the case that a very-large transaction locked a lot of keys after +//! prewriting, while many simple reads and writes executes frequently, thus +//! these simple transactions frequently meets the lock left by the large +//! transaction. It will be very inefficient for these small transactions to +//! come back to the client and start resolve lock procedure. Even if the client +//! side has the cache of that transaction, it still wastes an RTT. +//! +//! There would be more possibilities if we have such a cache in TiKV side: for +//! read requests, it can check the cache to know whether it can read from the +//! lock; and for write requests, if it finds the transaction of that lock is +//! already committed, it can merge together the resolve-lock-committing and the +//! write operation that the request needs to perform. + +use std::{ + sync::{atomic::AtomicU64, Arc}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use crossbeam::utils::CachePadded; +use parking_lot::Mutex; +use tikv_util::{ + lru, + lru::{GetTailEntry, LruCache}, +}; +use txn_types::TimeStamp; + +use crate::storage::metrics::*; + +const TXN_STATUS_CACHE_SLOTS: usize = 128; + +/// An cache item should be kept for at least this time. +/// Actually this should be guaranteed only for committed transactions. See +/// [this section](# +/// for-filtering-out-unwanted-late-arrived-stale-prewrite-requests) for details +/// about why this is needed. +const CACHE_ITEMS_REQUIRED_KEEP_TIME: Duration = Duration::from_secs(30); + +struct CacheEntry { + commit_ts: TimeStamp, + /// The system timestamp in milliseconds when the entry is inserted to the + /// cache. + insert_time: u64, +} + +/// Defines the policy to evict expired entries from the cache. +/// [`TxnStatusCache`] needs to keep entries for a while, so the common +/// policy that only limiting capacity is not proper to be used here. +struct TxnStatusCacheEvictPolicy { + required_keep_time_millis: u64, + #[cfg(test)] + simulated_system_time: Option>, +} + +impl TxnStatusCacheEvictPolicy { + fn new( + required_keep_time: Duration, + #[allow(unused_variables)] simulated_system_time: Option>, + ) -> Self { + Self { + required_keep_time_millis: required_keep_time.as_millis() as u64, + #[cfg(test)] + simulated_system_time, + } + } + + #[inline] + #[cfg(not(test))] + fn now(&self) -> SystemTime { + SystemTime::now() + } + + /// When used in tests, the system time can be simulated by controlling the + /// field `simulated_system_time`. + #[inline] + #[cfg(test)] + fn now(&self) -> SystemTime { + // Always get the system time to simulate the latency. + let now = SystemTime::now(); + if let Some(pseudo_system_time) = &self.simulated_system_time { + UNIX_EPOCH + + std::time::Duration::from_millis( + pseudo_system_time.load(std::sync::atomic::Ordering::Acquire), + ) + } else { + now + } + } +} + +impl lru::EvictPolicy for TxnStatusCacheEvictPolicy { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + get_tail_entry: &impl GetTailEntry, + ) -> bool { + // See how much time has been elapsed since the tail entry is inserted. + // If it's long enough, remove it. + if let Some((_, v)) = get_tail_entry.get_tail_entry() { + if self.now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + > self.required_keep_time_millis + v.insert_time + { + return true; + } + } + + // If the capacity limit is exceeded, remove it. + current_size > capacity + } +} + +type TxnStatusCacheSlot = + LruCache; + +/// The cache for storing transaction status. It holds recent +/// `start_ts` -> `commit_ts` pairs for a while, which can be useful for quickly +/// but not strictly determining transaction status. +/// +/// `TxnStatusCache` is divided into several slots +/// to make the lock more fine-grained. Each slot uses an [`LruCache`] as the +/// internal implementation, with customized evict policy. However, we do not +/// always adopt the LRU behavior. Some operation to an existing entry in the +/// cache won't promote it to the most-recent place. +/// +/// Note that the `TxnStatusCache` updates metrics in some operations assuming +/// there's at most one instance of `TxnStatusCache` in a process. +pub struct TxnStatusCache { + slots: Vec>>, + is_enabled: bool, +} + +unsafe impl Sync for TxnStatusCache {} + +impl TxnStatusCache { + fn new_impl( + slots: usize, + required_keep_time: Duration, + capacity: usize, + simulated_system_time: Option>, + ) -> Self { + if capacity == 0 { + return Self { + slots: vec![], + is_enabled: false, + }; + } + + // The limit of the LruCache of each slot. + let allowed_capacity_per_slot = capacity / slots; + // The total memory allocated initially by the LruCache's internal data + // structure for all slots. + + let mut initial_allocated_capacity_total = 0; + let res = Self { + slots: (0..slots) + .map(|_| { + let cache = LruCache::new( + allowed_capacity_per_slot, + 0, + lru::CountTracker::default(), + TxnStatusCacheEvictPolicy::new( + required_keep_time, + simulated_system_time.clone(), + ), + ); + let allocated_capacity = cache.internal_allocated_capacity(); + initial_allocated_capacity_total += allocated_capacity; + Mutex::new(cache).into() + }) + .collect(), + is_enabled: true, + }; + SCHED_TXN_STATUS_CACHE_SIZE + .allocated + .set(initial_allocated_capacity_total as i64); + res + } + + pub fn new(capacity: usize) -> Self { + Self::with_slots_and_time_limit( + TXN_STATUS_CACHE_SLOTS, + CACHE_ITEMS_REQUIRED_KEEP_TIME, + capacity, + ) + } + + #[cfg(test)] + pub fn new_for_test() -> Self { + // 1M capacity should be enough for tests. + Self::with_slots_and_time_limit(16, CACHE_ITEMS_REQUIRED_KEEP_TIME, 1 << 20) + } + + pub fn with_slots_and_time_limit( + slots: usize, + required_keep_time: Duration, + capacity: usize, + ) -> Self { + Self::new_impl(slots, required_keep_time, capacity, None) + } + + /// Create a `TxnStatusCache` instance for test purpose, with simulating + /// system time enabled. This helps when testing functionalities that are + /// related to system time. + /// + /// An `AtomicU64` will be returned. Store timestamps + /// in milliseconds in it to control the time. + #[cfg(test)] + fn with_simulated_system_time( + slots: usize, + requried_keep_time: Duration, + capacity: usize, + ) -> (Self, Arc) { + let system_time = Arc::new(AtomicU64::new(0)); + let res = Self::new_impl( + slots, + requried_keep_time, + capacity, + Some(system_time.clone()), + ); + (res, system_time) + } + + fn slot_index(&self, start_ts: TimeStamp) -> usize { + fxhash::hash(&start_ts) % self.slots.len() + } + + /// Insert a transaction status into the cache. The current system time + /// should be passed from outside to avoid getting system time repeatedly + /// when multiple items is being inserted. + /// + /// If the transaction's information is already in the cache, it will + /// **NOT** be promoted to the most-recent place of the internal LRU. + pub fn insert(&self, start_ts: TimeStamp, commit_ts: TimeStamp, now: SystemTime) { + if !self.is_enabled { + return; + } + + let insert_time = now.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64; + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + let previous_size = slot.size(); + let previous_allocated = slot.internal_allocated_capacity(); + slot.insert_if_not_exist( + start_ts, + CacheEntry { + commit_ts, + insert_time, + }, + ); + let size = slot.size(); + let allocated = slot.internal_allocated_capacity(); + // Update statistics. + // CAUTION: Assuming that only one TxnStatusCache instance is in a TiKV process. + SCHED_TXN_STATUS_CACHE_SIZE + .used + .add(size as i64 - previous_size as i64); + SCHED_TXN_STATUS_CACHE_SIZE + .allocated + .add(allocated as i64 - previous_allocated as i64); + } + + /// Try to get an item from the cache, without promoting the item (if + /// exists) to the most recent place. + pub fn get_no_promote(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let slot = self.slots[self.slot_index(start_ts)].lock(); + slot.get_no_promote(&start_ts).map(|entry| entry.commit_ts) + } + + pub fn get(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + slot.get(&start_ts).map(|entry| entry.commit_ts) + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant, SystemTime}, + }; + + use rand::{prelude::SliceRandom, Rng}; + + use super::*; + + fn bench_insert_impl(b: &mut test::Bencher, init_size: usize) { + let (c, time) = TxnStatusCache::with_simulated_system_time( + TXN_STATUS_CACHE_SLOTS, + Duration::from_millis(init_size as u64), + 1 << 20, + ); + let start_time = SystemTime::now(); + // Spread these items evenly in a specific time limit, so that every time + // a new item is inserted, an item will be popped out. + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + start_time + Duration::from_millis(i as u64), + ); + } + let mut current_time_shift = (init_size + 1) as u64; + b.iter(|| { + let simulated_now = start_time + Duration::from_millis(current_time_shift); + // Simulate the system time advancing. + time.store( + simulated_now + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis() as u64, + Ordering::Release, + ); + c.insert( + current_time_shift.into(), + (current_time_shift + 1).into(), + simulated_now, + ); + current_time_shift += 1; + }); + test::black_box(&c); + } + + fn bench_get_impl(b: &mut test::Bencher, init_size: usize) { + let c = TxnStatusCache::with_slots_and_time_limit( + TXN_STATUS_CACHE_SLOTS, + CACHE_ITEMS_REQUIRED_KEEP_TIME, + 1 << 20, + ); + let now = SystemTime::now(); + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + now + Duration::from_millis(i as u64), + ); + } + let rand_range = if init_size == 0 { 10000 } else { init_size } as u64; + b.iter(|| { + let ts = rand::thread_rng().gen_range(0u64, rand_range); + let res = c.get_no_promote(ts.into()); + test::black_box(&res); + }) + } + + #[bench] + fn bench_insert_empty(b: &mut test::Bencher) { + bench_insert_impl(b, 0); + } + + #[bench] + fn bench_insert_100000(b: &mut test::Bencher) { + bench_insert_impl(b, 100000); + } + + #[bench] + fn bench_get_empty(b: &mut test::Bencher) { + bench_get_impl(b, 0); + } + + #[bench] + fn bench_get_100000(b: &mut test::Bencher) { + bench_get_impl(b, 100000); + } + + /// A simple statistic tool for collecting a set of data and calculating the + /// average, stddev, and percentiles (by using a linear histogram). + /// Data is collected in u128, and results are given in f64. + struct SimpleStatistics { + sum: u128, + sum_square: u128, + count: usize, + bucket_width: u128, + buckets: Vec, + } + + impl SimpleStatistics { + fn new(bucket_width: u128) -> Self { + Self { + sum: 0, + sum_square: 0, + count: 0, + bucket_width, + buckets: vec![], + } + } + + /// Merge another instance into the current one + fn add(&mut self, other: Self) { + self.sum += other.sum; + self.sum_square += other.sum_square; + self.count += other.count; + assert_eq!(self.bucket_width, other.bucket_width); + if self.buckets.len() < other.buckets.len() { + self.buckets.resize(other.buckets.len(), 0); + } + for (count, other_count) in self.buckets.iter_mut().zip(other.buckets.iter()) { + *count += *other_count + } + } + + fn avg(&self) -> f64 { + self.sum as f64 / (self.count as f64) + } + + fn stddev(&self) -> f64 { + let avg = self.avg(); + let sum_sqr_diff: f64 = + (self.sum_square as f64) - (self.sum as f64 * avg * 2.0) + avg * self.count as f64; + (sum_sqr_diff / (self.count - 1) as f64).sqrt() + } + + /// Calculate the percentile value at specified position (should be in + /// range [0, 1]) + fn percentile(&self, position: f64) -> f64 { + let mut bucket = self.buckets.len(); + let mut prefix_sum = self.count; + while bucket > 0 { + bucket -= 1; + prefix_sum -= self.buckets[bucket]; + let prefix_percentile = prefix_sum as f64 / self.count as f64; + if prefix_percentile <= position { + assert_le!(prefix_sum as f64, position * self.count as f64); + assert_lt!( + position * self.count as f64, + (prefix_sum + self.buckets[bucket]) as f64 + ); + break; + } + } + + bucket as f64 * self.bucket_width as f64 + + (position * self.count as f64 - prefix_sum as f64) * self.bucket_width as f64 + / self.buckets[bucket] as f64 + } + + fn observe(&mut self, value: u128) { + self.sum += value; + self.sum_square += value * value; + self.count += 1; + let bucket = (value / self.bucket_width) as usize; + if self.buckets.len() <= bucket { + self.buckets.resize(bucket + 1, 0); + } + self.buckets[bucket] += 1; + } + } + + fn bench_concurrent_impl( + name: &str, + threads: usize, + function: impl Fn(u64) -> T + Send + Sync + 'static, + ) { + let start_time = Instant::now(); + // Run the benchmark code repeatedly for 10 seconds. + const TIME_LIMIT: Duration = Duration::from_secs(10); + let iteration = Arc::new(AtomicU64::new(0)); + + // Make the lifetime checker happy. + let function = Arc::new(function); + + let mut handles = Vec::with_capacity(threads); + for _ in 0..threads { + let f = function.clone(); + let iteration = iteration.clone(); + let handle = std::thread::spawn(move || { + let mut stats = SimpleStatistics::new(20); + loop { + if start_time.elapsed() > TIME_LIMIT { + break; + } + let i = iteration.fetch_add(1, Ordering::SeqCst); + let iter_start_time = Instant::now(); + test::black_box(f(i)); + let duration = iter_start_time.elapsed(); + stats.observe(duration.as_nanos()); + } + stats + }); + handles.push(handle); + } + + let mut total_stats = SimpleStatistics::new(20); + for h in handles { + total_stats.add(h.join().unwrap()); + } + + println!( + "benchmark {}: duration per iter: avg: {:?}, stddev: {:?}, percentile .99: {:?}, percentile .999: {:?}", + name, + Duration::from_nanos(total_stats.avg() as u64), + Duration::from_nanos(total_stats.stddev() as u64), + Duration::from_nanos(total_stats.percentile(0.99) as u64), + Duration::from_nanos(total_stats.percentile(0.999) as u64), + ); + } + + fn bench_txn_status_cache_concurrent_impl( + threads: usize, + init_size: usize, + simulate_contention: bool, + get_before_insert: bool, + ) { + let slots = if simulate_contention { + 1 + } else { + TXN_STATUS_CACHE_SLOTS + }; + let (c, time) = TxnStatusCache::with_simulated_system_time( + slots, + Duration::from_millis(init_size as u64), + 1 << 20, + ); + let start_time = SystemTime::now(); + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + start_time + Duration::from_millis(i as u64), + ); + } + + let name = format!( + "bench_concurrent_{}_{}_size{}{}", + if get_before_insert { + "get_and_insert" + } else { + "insert" + }, + threads, + init_size, + if simulate_contention { + "_contention" + } else { + "" + }, + ); + + bench_concurrent_impl(&name, threads, move |iter| { + let time_shift = init_size as u64 + iter; + let now = start_time + Duration::from_millis(time_shift); + time.store( + now.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64, + Ordering::Release, + ); + + if get_before_insert { + test::black_box(c.get_no_promote(time_shift.into())); + } + c.insert(time_shift.into(), (time_shift + 1).into(), now); + test::black_box(&c); + }); + } + + #[bench] + #[ignore] + fn bench_txn_status_cache_concurrent(_b: &mut test::Bencher) { + // This case is implemented to run the concurrent benchmark in a handy way + // just like running other normal benchmarks. However, it doesn't seem + // to be possible to benchmark an operation in concurrent way by using + // either the built-in bencher or criterion. + // Here we test it in our own way without using the built-in bencher, + // and output the result by stdout. + // When you need to run this benchmark, comment out the `#[ignore]` and + // add --nocapture in your benchmark command line to get the result. + bench_txn_status_cache_concurrent_impl(16, 10000, false, false); + bench_txn_status_cache_concurrent_impl(16, 10000, true, false); + bench_txn_status_cache_concurrent_impl(16, 10000, false, true); + bench_txn_status_cache_concurrent_impl(16, 10000, true, true); + bench_txn_status_cache_concurrent_impl(64, 10000, false, false); + bench_txn_status_cache_concurrent_impl(64, 10000, true, false); + bench_txn_status_cache_concurrent_impl(64, 10000, false, true); + bench_txn_status_cache_concurrent_impl(64, 10000, true, true); + } + + #[test] + fn test_insert_and_get() { + let c = TxnStatusCache::new_for_test(); + assert!(c.get_no_promote(1.into()).is_none()); + + let now = SystemTime::now(); + + c.insert(1.into(), 2.into(), now); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + c.insert(3.into(), 4.into(), now); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // This won't actually happen, since a transaction will never have commit info + // with two different commit_ts. We just use this to check replacing + // won't happen. + c.insert(1.into(), 4.into(), now); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + + let mut start_ts_list: Vec<_> = (1..100).step_by(2).map(TimeStamp::from).collect(); + start_ts_list.shuffle(&mut rand::thread_rng()); + for &start_ts in &start_ts_list { + let commit_ts = start_ts.next(); + c.insert(start_ts, commit_ts, now); + } + start_ts_list.shuffle(&mut rand::thread_rng()); + for &start_ts in &start_ts_list { + let commit_ts = start_ts.next(); + assert_eq!(c.get_no_promote(start_ts).unwrap(), commit_ts); + } + } + + #[test] + fn test_evicting_expired() { + let (c, time) = + TxnStatusCache::with_simulated_system_time(1, Duration::from_millis(1000), 1000); + let time_base = SystemTime::now(); + let set_time = |offset_millis: u64| { + time.store( + time_base.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + offset_millis, + Ordering::Release, + ) + }; + let now = || UNIX_EPOCH + Duration::from_millis(time.load(Ordering::Acquire)); + + set_time(0); + assert_lt!( + time_base.duration_since(now()).unwrap(), + Duration::from_millis(1) + ); + + c.insert(1.into(), 2.into(), now()); + set_time(1); + c.insert(3.into(), 4.into(), now()); + set_time(2); + c.insert(5.into(), 6.into(), now()); + // Size should be calculated by count. + assert_eq!(c.slots[0].lock().size(), 3); + + // Insert entry 1 again. So if entry 1 is the first one to be popped out, it + // verifies that inserting an existing key won't promote it. + c.insert(1.into(), 2.into(), now()); + + // All the 3 entries are kept + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + assert_eq!(c.get_no_promote(5.into()).unwrap(), 6.into()); + + set_time(1001); + c.insert(7.into(), 8.into(), now()); + // Entry 1 will be popped out. + assert!(c.get_no_promote(1.into()).is_none()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + assert_eq!(c.get_no_promote(5.into()).unwrap(), 6.into()); + set_time(1004); + c.insert(9.into(), 10.into(), now()); + // It pops more than 1 entries if there are many expired items at the tail. + // Entry 3 and 5 will be popped out. + assert!(c.get_no_promote(1.into()).is_none()); + assert!(c.get_no_promote(3.into()).is_none()); + assert!(c.get_no_promote(5.into()).is_none()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + assert_eq!(c.get_no_promote(9.into()).unwrap(), 10.into()); + + // Now the cache's contents are: + // 7@1001, 9@1004 + // Test `get` promotes an entry and entries are not in order on insert time. + assert_eq!(c.get(7.into()).unwrap(), 8.into()); + set_time(2003); + c.insert(11.into(), 12.into(), now()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + assert_eq!(c.get_no_promote(9.into()).unwrap(), 10.into()); + assert_eq!(c.get_no_promote(11.into()).unwrap(), 12.into()); + + set_time(2005); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(7.into()).is_none()); + assert!(c.get_no_promote(9.into()).is_none()); + assert_eq!(c.get_no_promote(11.into()).unwrap(), 12.into()); + + // Now the cache's contents are: + // 11@2003, 13@2005 + // Test inserting existed entries. + // According to the implementation of LruCache, though it won't do any update to + // the content, it still check the tail to see if anything can be + // evicted. + set_time(3004); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(11.into()).is_none()); + assert_eq!(c.get_no_promote(13.into()).unwrap(), 14.into()); + + set_time(3006); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(13.into()).is_none()); + + // Now the cache is empty. + c.insert(15.into(), 16.into(), now()); + set_time(3008); + c.insert(17.into(), 18.into(), now()); + // Test inserting existed entry doesn't promote it. + // Re-insert 15. + set_time(3009); + c.insert(15.into(), 16.into(), now()); + set_time(4007); + c.insert(19.into(), 20.into(), now()); + // 15's insert time is not updated, and is at the tail of the LRU, so it should + // be popped. + assert!(c.get_no_promote(15.into()).is_none()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + + // Now the cache's contents are: + // 17@3008, 19@4007 + // Test system time being changed, which can lead to current time being less + // than entries' insert time. + set_time(2000); + c.insert(21.into(), 22.into(), now()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + assert_eq!(c.get_no_promote(21.into()).unwrap(), 22.into()); + set_time(3500); + c.insert(23.into(), 24.into(), now()); + assert_eq!(c.get_no_promote(21.into()).unwrap(), 22.into()); + assert_eq!(c.get(17.into()).unwrap(), 18.into()); + assert_eq!(c.get(19.into()).unwrap(), 20.into()); + assert_eq!(c.get(23.into()).unwrap(), 24.into()); + // `get` promotes the entries, and entry 21 is put to the tail. + c.insert(23.into(), 24.into(), now()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + assert!(c.get_no_promote(21.into()).is_none()); + assert_eq!(c.get_no_promote(23.into()).unwrap(), 24.into()); + + // Now the cache's contents are: + // 17@3008, 19@4007, 23@3500 + // The time passed to `insert` may differ from the time fetched in + // the `TxnStatusCacheEvictPolicy` as they are fetched at different time. + set_time(4009); + // Insert with time 4007, but check with time 4009 + c.insert(25.into(), 26.into(), now() - Duration::from_millis(2)); + assert!(c.get_no_promote(17.into()).is_none()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + + // The cache's contents: + // 19@4007, 23@3500, 25@4007 + set_time(4010); + c.insert(27.into(), 28.into(), now()); + // The cache's contents: + // 19@4007, 23@3500, 25@4007, 27@4010 + + // It's also possible to check with a lower time considering that system time + // may be changed. Insert with time 5018, but check with time 5008 + set_time(5008); + c.insert(29.into(), 30.into(), now() + Duration::from_millis(10)); + assert!(c.get_no_promote(19.into()).is_none()); + assert!(c.get_no_promote(23.into()).is_none()); + assert!(c.get_no_promote(25.into()).is_none()); + assert_eq!(c.get_no_promote(27.into()).unwrap(), 28.into()); + assert_eq!(c.get_no_promote(29.into()).unwrap(), 30.into()); + + // Now the the cache's contents are: + // 27@4010, 29@5018 + // Considering the case that system time is being changed, it's even + // possible that the entry being inserted is already expired + // comparing to the current time. It doesn't matter whether the + // entry will be dropped immediately or not. We just ensure it won't + // trigger more troubles. + set_time(7000); + c.insert(31.into(), 32.into(), now() - Duration::from_millis(1001)); + assert!(c.get_no_promote(27.into()).is_none()); + assert!(c.get_no_promote(29.into()).is_none()); + assert!(c.get_no_promote(31.into()).is_none()); + assert_eq!(c.slots[0].lock().size(), 0); + } + + #[test] + fn test_setting_capacity() { + let c = TxnStatusCache::new_impl(2, Duration::from_millis(1000), 10, None); + assert!(c.is_enabled); + assert_eq!(c.slots.len(), 2); + assert_eq!(c.slots[0].lock().capacity(), 5); + assert_eq!(c.slots[1].lock().capacity(), 5); + + let c = TxnStatusCache::new_impl(2, Duration::from_millis(1000), 0, None); + assert!(!c.is_enabled); + assert_eq!(c.slots.len(), 0); + // All operations are noops and won't cause panic or return any incorrect + // result. + c.insert(1.into(), 2.into(), SystemTime::now()); + assert!(c.get_no_promote(1.into()).is_none()); + assert!(c.get(1.into()).is_none()); + } + + #[test] + fn test_evicting_by_capacity() { + let (c, time) = + TxnStatusCache::with_simulated_system_time(1, Duration::from_millis(1000), 5); + let time_base = SystemTime::now(); + let set_time = |offset_millis: u64| { + time.store( + time_base.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + offset_millis, + Ordering::Release, + ) + }; + let now = || UNIX_EPOCH + Duration::from_millis(time.load(Ordering::Acquire)); + + set_time(0); + c.insert(1.into(), 2.into(), now()); + set_time(2); + c.insert(3.into(), 4.into(), now()); + set_time(4); + c.insert(5.into(), 6.into(), now()); + set_time(6); + c.insert(7.into(), 8.into(), now()); + + // The cache can keep at most 5 entries. + set_time(8); + c.insert(9.into(), 10.into(), now()); + // Entry 1 not evicted. 5 entries in the cache currently + assert_eq!(c.slots[0].lock().len(), 5); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + set_time(10); + c.insert(11.into(), 12.into(), now()); + // Entry 1 evicted. Still 5 entries in the cache. + assert_eq!(c.slots[0].lock().len(), 5); + assert!(c.get_no_promote(1.into()).is_none()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // Nothing will be evicted after trying to insert an existing key. + c.insert(11.into(), 12.into(), now()); + assert_eq!(c.slots[0].lock().len(), 5); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // Current contents (key@time): + // 3@2, 5@4, 7@6. 9@8, 11@10 + // Evicting by time works as well. + set_time(1005); + c.insert(13.into(), 14.into(), now()); + assert_eq!(c.slots[0].lock().len(), 4); + assert!(c.get_no_promote(3.into()).is_none()); + assert!(c.get_no_promote(5.into()).is_none()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + + // Reorder the entries by `get` to prepare for testing the next case. + assert_eq!(c.get(7.into()).unwrap(), 8.into()); + assert_eq!(c.get(9.into()).unwrap(), 10.into()); + assert_eq!(c.get(11.into()).unwrap(), 12.into()); + + c.insert(15.into(), 16.into(), now()); + // Current contents: + // 13@1005, 7@6. 9@8, 11@10, 15@1005 + assert_eq!(c.slots[0].lock().len(), 5); + // Expired entries that are not the tail can be evicted after the tail + // is evicted due to capacity exceeded. + set_time(1011); + c.insert(17.into(), 18.into(), now()); + assert_eq!(c.slots[0].lock().len(), 2); + assert!(c.get_no_promote(13.into()).is_none()); + assert!(c.get_no_promote(7.into()).is_none()); + assert!(c.get_no_promote(9.into()).is_none()); + assert!(c.get_no_promote(11.into()).is_none()); + assert_eq!(c.get(15.into()).unwrap(), 16.into()); + assert_eq!(c.get(17.into()).unwrap(), 18.into()); + } +} From b48812fef7e9dc7ec7d2cd0d178ccbc6e7c9c2cb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Sat, 21 Oct 2023 01:36:29 +0800 Subject: [PATCH 100/203] *: remove unused dyn and grpc external storage (#15808) close tikv/tikv#15807 `CloudDynamic` was initially designed to facilitate out-of-process access to external storages in a cloud environment. However, it has never been utilized in either production or testing scenarios, rendering it essentially dead code that adds unnecessary complexity. It would be best to remove it altogether. Also, it removes features "cloud-gcp", "cloud-aws" and "cloud-azure" for most crates, as they are always enabled, and there is no reason to enable and disable them at compile time. Signed-off-by: Neil Shen --- Cargo.lock | 71 +---- Cargo.toml | 10 +- cmd/tikv-ctl/Cargo.toml | 5 - components/backup-stream/Cargo.toml | 3 +- components/backup-stream/src/router.rs | 7 +- components/backup/Cargo.toml | 8 +- components/backup/src/endpoint.rs | 5 +- components/backup/src/service.rs | 2 +- components/backup/src/writer.rs | 7 +- components/cloud/aws/src/s3.rs | 107 +------- components/cloud/azure/src/azblob.rs | 84 +----- components/cloud/gcp/src/gcs.rs | 74 +----- components/cloud/src/blob.rs | 15 -- components/external_storage/Cargo.toml | 27 +- .../{export => }/examples/scli.rs | 62 +---- components/external_storage/export/Cargo.toml | 96 ------- .../export/src/bin/tikv-cloud-storage.rs | 61 ----- .../external_storage/export/src/dylib.rs | 247 ------------------ .../export/src/grpc_service.rs | 131 ---------- components/external_storage/export/src/lib.rs | 15 -- .../external_storage/export/src/request.rs | 91 ------- .../external_storage/src/dylib_client.rs | 169 ------------ .../{export => }/src/export.rs | 163 +----------- .../external_storage/src/grpc_client.rs | 134 ---------- components/external_storage/src/lib.rs | 8 +- components/external_storage/src/request.rs | 101 ------- components/raftstore-v2/Cargo.toml | 4 - components/raftstore/Cargo.toml | 6 +- components/sst_importer/Cargo.toml | 9 +- .../sst_importer/src/caching/storage_cache.rs | 4 +- components/sst_importer/src/sst_importer.rs | 62 +++-- components/sst_importer/src/util.rs | 2 +- components/test_backup/Cargo.toml | 8 +- components/test_backup/src/lib.rs | 2 +- tests/Cargo.toml | 7 +- tests/failpoints/cases/test_import_service.rs | 2 +- tests/integrations/backup/mod.rs | 2 +- tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/import/test_sst_service.rs | 2 +- tests/integrations/import/util.rs | 2 +- 40 files changed, 101 insertions(+), 1716 deletions(-) rename components/external_storage/{export => }/examples/scli.rs (75%) delete mode 100644 components/external_storage/export/Cargo.toml delete mode 100644 components/external_storage/export/src/bin/tikv-cloud-storage.rs delete mode 100644 components/external_storage/export/src/dylib.rs delete mode 100644 components/external_storage/export/src/grpc_service.rs delete mode 100644 components/external_storage/export/src/lib.rs delete mode 100644 components/external_storage/export/src/request.rs delete mode 100644 components/external_storage/src/dylib_client.rs rename components/external_storage/{export => }/src/export.rs (54%) delete mode 100644 components/external_storage/src/grpc_client.rs delete mode 100644 components/external_storage/src/request.rs diff --git a/Cargo.lock b/Cargo.lock index fccff7d7822d..f222631d7723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -506,7 +506,6 @@ dependencies = [ "engine_traits", "error_code", "external_storage", - "external_storage_export", "file_system", "futures 0.3.15", "futures-util", @@ -560,7 +559,6 @@ dependencies = [ "error_code", "etcd-client", "external_storage", - "external_storage_export", "fail", "file_system", "futures 0.3.15", @@ -1835,44 +1833,6 @@ dependencies = [ [[package]] name = "external_storage" version = "0.0.1" -dependencies = [ - "async-compression", - "async-trait", - "bytes", - "encryption", - "engine_traits", - "fail", - "ffi-support", - "file_system", - "futures 0.3.15", - "futures-executor", - "futures-io", - "futures-util", - "grpcio", - "kvproto", - "lazy_static", - "libloading", - "matches", - "openssl", - "prometheus", - "protobuf", - "rand 0.8.5", - "rusoto_core", - "rust-ini", - "slog", - "slog-global", - "structopt", - "tempfile", - "tikv_alloc", - "tikv_util", - "tokio", - "tokio-util", - "url", -] - -[[package]] -name = "external_storage_export" -version = "0.0.1" dependencies = [ "async-compression", "async-trait", @@ -1881,30 +1841,23 @@ dependencies = [ "cloud", "encryption", "engine_traits", - "external_storage", - "ffi-support", "file_system", "futures 0.3.15", - "futures-executor", "futures-io", "futures-util", "gcp", - "grpcio", "kvproto", "lazy_static", - "libc 0.2.146", - "libloading", "matches", - "nix 0.24.1", - "once_cell", - "protobuf", + "openssl", + "prometheus", + "rand 0.8.5", "rust-ini", - "signal-hook", "slog", "slog-global", - "slog-term", "structopt", "tempfile", + "tikv_alloc", "tikv_util", "tokio", "tokio-util", @@ -1937,16 +1890,6 @@ dependencies = [ "instant", ] -[[package]] -name = "ffi-support" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f85d4d1be103c0b2d86968f0b0690dc09ac0ba205b90adb0389b552869e5000e" -dependencies = [ - "lazy_static", - "log", -] - [[package]] name = "file_system" version = "0.1.0" @@ -5689,7 +5632,7 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "external_storage_export", + "external_storage", "file_system", "futures 0.3.15", "futures-util", @@ -6004,7 +5947,7 @@ dependencies = [ "concurrency_manager", "crc64fast", "engine_traits", - "external_storage_export", + "external_storage", "file_system", "futures 0.3.15", "futures-executor", @@ -6259,7 +6202,7 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "external_storage_export", + "external_storage", "fail", "file_system", "futures 0.3.15", diff --git a/Cargo.toml b/Cargo.toml index bd2b49469509..32b2d858b6a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,16 +22,14 @@ portable = ["engine_rocks/portable"] sse = ["engine_rocks/sse"] mem-profiling = ["tikv_alloc/mem-profiling"] failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints", "raft_log_engine/failpoints"] -cloud-aws = ["encryption_export/cloud-aws", "sst_importer/cloud-aws"] -cloud-gcp = ["encryption_export/cloud-gcp", "sst_importer/cloud-gcp"] -cloud-azure = ["encryption_export/cloud-azure", "sst_importer/cloud-azure"] +cloud-aws = ["encryption_export/cloud-aws"] +cloud-gcp = ["encryption_export/cloud-gcp"] +cloud-azure = ["encryption_export/cloud-azure"] testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] test-engines-panic = ["engine_test/test-engines-panic"] -cloud-storage-grpc = ["sst_importer/cloud-storage-grpc"] -cloud-storage-dylib = ["sst_importer/cloud-storage-dylib"] pprof-fp = ["pprof/frame-pointer"] # for testing configure propegate to other crates @@ -239,7 +237,6 @@ members = [ # "components/engine_tirocks", "components/error_code", "components/external_storage", - "components/external_storage/export", "components/file_system", "components/into_other", "components/keys", @@ -314,7 +311,6 @@ engine_traits = { path = "components/engine_traits" } engine_traits_tests = { path = "components/engine_traits_tests", default-features = false } error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } -external_storage_export = { path = "components/external_storage/export" } file_system = { path = "components/file_system" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index a36e72b3c641..0a630ebc023a 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -17,18 +17,13 @@ mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] cloud-aws = [ "encryption_export/cloud-aws", - "backup/cloud-aws", ] cloud-gcp = [ "encryption_export/cloud-gcp", - "backup/cloud-gcp", ] cloud-azure = [ "encryption_export/cloud-azure", - "backup/cloud-azure", ] -cloud-storage-grpc = ["backup/cloud-storage-grpc"] -cloud-storage-dylib = ["backup/cloud-storage-dylib"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 8c1edc89a482..12979eab2123 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -43,7 +43,6 @@ error_code = { workspace = true } # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"], optional = true } external_storage = { workspace = true } -external_storage_export = { workspace = true } fail = "0.5" file_system = { workspace = true } futures = "0.3" @@ -78,7 +77,7 @@ tikv_kv = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" -tokio-util = { version = "0.7", features = ["compat"] } +tokio-util = { version = "0.7", features = ["compat"] } tonic = { version = "0.8", optional = true } txn_types = { workspace = true } uuid = "0.8" diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 1786d513dc88..05f1a0533921 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -14,8 +14,7 @@ use std::{ }; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_WRITE}; -use external_storage::{BackendConfig, UnpinReader}; -use external_storage_export::{create_storage, ExternalStorage}; +use external_storage::{create_storage, BackendConfig, ExternalStorage, UnpinReader}; use futures::io::Cursor; use kvproto::{ brpb::{ @@ -1868,7 +1867,7 @@ mod tests { #[tokio::test] async fn test_do_flush() { let tmp_dir = tempfile::tempdir().unwrap(); - let backend = external_storage_export::make_local_backend(tmp_dir.path()); + let backend = external_storage::make_local_backend(tmp_dir.path()); let mut task_info = StreamBackupTaskInfo::default(); task_info.set_storage(backend); let stream_task = StreamTask { @@ -2241,7 +2240,7 @@ mod tests { async fn test_update_global_checkpoint() -> Result<()> { // create local storage let tmp_dir = tempfile::tempdir().unwrap(); - let backend = external_storage_export::make_local_backend(tmp_dir.path()); + let backend = external_storage::make_local_backend(tmp_dir.path()); // build a StreamTaskInfo let mut task_info = StreamBackupTaskInfo::default(); diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 225a88a3e8fc..03b6e4398793 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -5,12 +5,7 @@ edition = "2021" publish = false [features] -default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] -cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] -cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] @@ -45,7 +40,6 @@ engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } external_storage = { workspace = true } -external_storage_export = { workspace = true } file_system = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index a4efc162092c..cc4d0bf0e289 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -12,8 +12,7 @@ use async_channel::SendError; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use concurrency_manager::ConcurrencyManager; use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, KvEngine, SstCompressionType}; -use external_storage::{BackendConfig, HdfsConfig}; -use external_storage_export::{create_storage, ExternalStorage}; +use external_storage::{create_storage, BackendConfig, ExternalStorage, HdfsConfig}; use futures::{channel::mpsc::*, executor::block_on}; use kvproto::{ brpb::*, @@ -1302,7 +1301,7 @@ pub mod tests { use api_version::{api_v2::RAW_KEY_PREFIX, dispatch_api_version, KvFormat, RawValue}; use collections::HashSet; use engine_traits::MiscExt; - use external_storage_export::{make_local_backend, make_noop_backend}; + use external_storage::{make_local_backend, make_noop_backend}; use file_system::{IoOp, IoRateLimiter, IoType}; use futures::{executor::block_on, stream::StreamExt}; use kvproto::metapb; diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index 237234c061ec..8420b7ded9c5 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -144,7 +144,7 @@ mod tests { use std::{sync::Arc, time::Duration}; use engine_rocks::RocksEngine; - use external_storage_export::make_local_backend; + use external_storage::make_local_backend; use tikv::storage::txn::tests::{must_commit, must_prewrite_put}; use tikv_util::worker::{dummy_scheduler, ReceiverWrapper}; use txn_types::TimeStamp; diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 715c4f682913..dfbe36b60cf1 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -7,7 +7,7 @@ use engine_traits::{ CfName, ExternalSstFileInfo, KvEngine, SstCompressionType, SstExt, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; -use external_storage_export::{ExternalStorage, UnpinReader}; +use external_storage::{ExternalStorage, UnpinReader}; use file_system::Sha256Reader; use futures_util::io::AllowStdIo; use kvproto::{ @@ -485,9 +485,8 @@ mod tests { .build() .unwrap(); let db = rocks.get_rocksdb(); - let backend = external_storage_export::make_local_backend(temp.path()); - let storage = - external_storage_export::create_storage(&backend, Default::default()).unwrap(); + let backend = external_storage::make_local_backend(temp.path()); + let storage = external_storage::create_storage(&backend, Default::default()).unwrap(); // Test empty file. let mut r = kvproto::metapb::Region::default(); diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 96031c91f063..73ddf479fd8e 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -16,7 +16,7 @@ use futures_util::{ io::{AsyncRead, AsyncReadExt}, stream::TryStreamExt, }; -pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, S3 as InputConfig}; +pub use kvproto::brpb::{Bucket as InputBucket, S3 as InputConfig}; use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; @@ -80,45 +80,6 @@ impl Config { } } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - let force_path_style_str = attrs.get("force_path_style").unwrap_or(def).clone(); - let force_path_style = force_path_style_str == "true" || force_path_style_str == "True"; - let access_key_opt = attrs.get("access_key"); - let access_key_pair = if let Some(access_key) = access_key_opt { - let secret_access_key = attrs.get("secret_access_key").unwrap_or(def).clone(); - let session_token = attrs - .get("session_token") - .and_then(|x| StringNonEmpty::opt(x.to_string())); - Some(AccessKeyPair { - access_key: StringNonEmpty::required_field(access_key.clone(), "access_key")?, - secret_access_key: StringNonEmpty::required_field( - secret_access_key, - "secret_access_key", - )?, - session_token, - }) - } else { - None - }; - let storage_class = bucket.storage_class.clone(); - Ok(Config { - bucket, - storage_class, - sse: StringNonEmpty::opt(attrs.get("sse").unwrap_or(def).clone()), - acl: StringNonEmpty::opt(attrs.get("acl").unwrap_or(def).clone()), - access_key_pair, - force_path_style, - sse_kms_key_id: StringNonEmpty::opt(attrs.get("sse_kms_key_id").unwrap_or(def).clone()), - multi_part_size: MINIMUM_PART_SIZE, - object_lock_enabled: false, - role_arn: StringNonEmpty::opt(attrs.get("role_arn").unwrap_or(def).clone()), - external_id: StringNonEmpty::opt(attrs.get("external_id").unwrap_or(def).clone()), - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let storage_class = StringNonEmpty::opt(input.storage_class); let endpoint = StringNonEmpty::opt(input.endpoint); @@ -185,10 +146,6 @@ impl S3Storage { Self::new(Config::from_input(input)?) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - pub fn set_multi_part_size(&mut self, mut size: usize) { if size < MINIMUM_PART_SIZE { // default multi_part_size is 5MB, S3 cannot allow a smaller size. @@ -352,7 +309,7 @@ impl From> for UploadError { } } -/// try_read_exact tries to read exact length data as the buffer size. +/// try_read_exact tries to read exact length data as the buffer size. /// like [`std::io::Read::read_exact`], but won't return `UnexpectedEof` when /// cannot read anything more from the `Read`. once returning a size less than /// the buffer length, implies a EOF was meet, or nothing read. @@ -934,66 +891,6 @@ mod tests { ); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - input.set_region("us-west-2".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - assert_eq!(c1.bucket.region, c2.bucket.region); - assert_eq!( - c1.bucket.region, - StringNonEmpty::opt("us-west-2".to_owned()) - ); - } - - fn cloud_dynamic_from_input(mut s3: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !s3.endpoint.is_empty() { - bucket.endpoint = s3.take_endpoint(); - } - if !s3.region.is_empty() { - bucket.region = s3.take_region(); - } - if !s3.prefix.is_empty() { - bucket.prefix = s3.take_prefix(); - } - if !s3.storage_class.is_empty() { - bucket.storage_class = s3.take_storage_class(); - } - if !s3.bucket.is_empty() { - bucket.bucket = s3.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !s3.sse.is_empty() { - attrs.insert("sse".to_owned(), s3.take_sse()); - } - if !s3.acl.is_empty() { - attrs.insert("acl".to_owned(), s3.take_acl()); - } - if !s3.access_key.is_empty() { - attrs.insert("access_key".to_owned(), s3.take_access_key()); - } - if !s3.secret_access_key.is_empty() { - attrs.insert("secret_access_key".to_owned(), s3.take_secret_access_key()); - } - if !s3.sse_kms_key_id.is_empty() { - attrs.insert("sse_kms_key_id".to_owned(), s3.take_sse_kms_key_id()); - } - if s3.force_path_style { - attrs.insert("force_path_style".to_owned(), "true".to_owned()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("aws".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } - #[tokio::test] async fn test_try_read_exact() { use std::io::{self, Cursor, Read}; diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index d88020aa944c..5a806c54fafd 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -24,11 +24,8 @@ use futures_util::{ stream::StreamExt, TryStreamExt, }; -pub use kvproto::brpb::{ - AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket, CloudDynamic, -}; +pub use kvproto::brpb::{AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket}; use oauth2::{ClientId, ClientSecret}; -use openssl::sha::Sha256; use tikv_util::{ debug, stream::{retry, RetryError}, @@ -62,18 +59,6 @@ struct EncryptionCustomer { encryption_key_sha256: String, } -impl EncryptionCustomer { - fn new(encryption_key: &str) -> Self { - let mut hasher = Sha256::new(); - hasher.update(encryption_key.as_bytes()); - let encryption_key_sha256 = base64::encode(hasher.finish()); - EncryptionCustomer { - encryption_key: base64::encode(encryption_key), - encryption_key_sha256, - } - } -} - impl From for EncryptionCustomer { fn from(value: AzureCustomerKey) -> Self { EncryptionCustomer { @@ -164,28 +149,6 @@ impl Config { env::var(ENV_SHARED_KEY).ok().and_then(StringNonEmpty::opt) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - - Ok(Config { - bucket, - account_name: StringNonEmpty::opt(attrs.get("account_name").unwrap_or(def).clone()), - shared_key: StringNonEmpty::opt(attrs.get("shared_key").unwrap_or(def).clone()), - sas_token: StringNonEmpty::opt(attrs.get("sas_token").unwrap_or(def).clone()), - credential_info: Self::load_credential_info(), - env_account_name: Self::load_env_account_name(), - env_shared_key: Self::load_env_shared_key(), - encryption_scope: StringNonEmpty::opt( - attrs.get("encryption_scope").unwrap_or(def).clone(), - ), - encryption_customer: attrs - .get("encryption_key") - .map(|encryption_key| EncryptionCustomer::new(encryption_key)), - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let bucket = BucketConf { endpoint: StringNonEmpty::opt(input.endpoint), @@ -574,10 +537,6 @@ impl AzureStorage { }) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - pub fn new(config: Config) -> io::Result { Self::check_config(&config)?; @@ -900,47 +859,6 @@ mod tests { assert_eq!(get_size, size); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - input.set_account_name("user".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - assert_eq!(c1.account_name, c2.account_name); - } - - fn cloud_dynamic_from_input(mut azure: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !azure.endpoint.is_empty() { - bucket.endpoint = azure.take_endpoint(); - } - if !azure.prefix.is_empty() { - bucket.prefix = azure.take_prefix(); - } - if !azure.storage_class.is_empty() { - bucket.storage_class = azure.take_storage_class(); - } - if !azure.bucket.is_empty() { - bucket.bucket = azure.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !azure.account_name.is_empty() { - attrs.insert("account_name".to_owned(), azure.take_account_name()); - } - if !azure.shared_key.is_empty() { - attrs.insert("shared_key".to_owned(), azure.take_shared_key()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("azure".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } - #[test] fn test_config_check() { { diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index c43e4e63969d..56cd317c3f85 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -14,7 +14,7 @@ use futures_util::{ use http::HeaderValue; use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; use hyper_tls::HttpsConnector; -pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, Gcs as InputConfig}; +pub use kvproto::brpb::{Bucket as InputBucket, Gcs as InputConfig}; use tame_gcs::{ common::{PredefinedAcl, StorageClass}, objects::{InsertObjectOptional, Metadata, Object}, @@ -54,35 +54,6 @@ impl Config { io::Error::new(io::ErrorKind::InvalidInput, "missing credentials") } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - let predefined_acl = parse_predefined_acl(attrs.get("predefined_acl").unwrap_or(def)) - .or_invalid_input("invalid predefined_acl")?; - let storage_class = parse_storage_class(&none_to_empty(bucket.storage_class.clone())) - .or_invalid_input("invalid storage_class")?; - - let credentials_blob_opt = StringNonEmpty::opt( - attrs - .get("credentials_blob") - .unwrap_or(&"".to_string()) - .to_string(), - ); - let svc_info = if let Some(cred) = credentials_blob_opt { - Some(deserialize_service_account_info(cred)?) - } else { - None - }; - - Ok(Config { - bucket, - predefined_acl, - svc_info, - storage_class, - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let endpoint = StringNonEmpty::opt(input.endpoint); let bucket = BucketConf { @@ -241,10 +212,6 @@ impl GcsStorage { Self::new(Config::from_input(input)?) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - /// Create a new GCS storage for the given config. pub fn new(config: Config) -> io::Result { let svc_access = if let Some(si) = &config.svc_info { @@ -618,17 +585,6 @@ mod tests { ); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - } - enum ThrottleReadState { Spawning, Emitting, @@ -706,32 +662,4 @@ mod tests { assert_eq!(dst.len(), BENCH_READ_SIZE) }) } - - fn cloud_dynamic_from_input(mut gcs: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !gcs.endpoint.is_empty() { - bucket.endpoint = gcs.take_endpoint(); - } - if !gcs.prefix.is_empty() { - bucket.prefix = gcs.take_prefix(); - } - if !gcs.storage_class.is_empty() { - bucket.storage_class = gcs.take_storage_class(); - } - if !gcs.bucket.is_empty() { - bucket.bucket = gcs.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !gcs.predefined_acl.is_empty() { - attrs.insert("predefined_acl".to_owned(), gcs.take_predefined_acl()); - } - if !gcs.credentials_blob.is_empty() { - attrs.insert("credentials_blob".to_owned(), gcs.take_credentials_blob()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("gcp".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } } diff --git a/components/cloud/src/blob.rs b/components/cloud/src/blob.rs index 84ca77042d7d..a0b5c26953c5 100644 --- a/components/cloud/src/blob.rs +++ b/components/cloud/src/blob.rs @@ -4,7 +4,6 @@ use std::{io, marker::Unpin, pin::Pin, task::Poll}; use async_trait::async_trait; use futures_io::AsyncRead; -pub use kvproto::brpb::CloudDynamic; pub trait BlobConfig: 'static + Send + Sync { fn name(&self) -> &'static str; @@ -177,20 +176,6 @@ impl BucketConf { Ok(u) } } - - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = cloud_dynamic.bucket.clone().into_option().ok_or_else(|| { - io::Error::new(io::ErrorKind::Other, "Required field bucket is missing") - })?; - - Ok(Self { - endpoint: StringNonEmpty::opt(bucket.endpoint), - bucket: StringNonEmpty::required_field(bucket.bucket, "bucket")?, - prefix: StringNonEmpty::opt(bucket.prefix), - storage_class: StringNonEmpty::opt(bucket.storage_class), - region: StringNonEmpty::opt(bucket.region), - }) - } } pub fn none_to_empty(opt: Option) -> String { diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index aed49aad3abf..25a852f9d5c3 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -4,39 +4,24 @@ version = "0.0.1" edition = "2021" publish = false -[features] -cloud-storage-dylib = [ - "ffi-support", - "libloading", - "protobuf", -] -cloud-storage-grpc = [ - "grpcio", -] -failpoints = ["fail/failpoints"] - [dependencies] async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } async-trait = "0.1" -bytes = "1.0" +aws = { workspace = true } +azure = { workspace = true } +cloud = { workspace = true } encryption = { workspace = true } engine_traits = { workspace = true } -fail = "0.5" -ffi-support = { optional = true, version = "0.4.2" } file_system = { workspace = true } futures = "0.3" -futures-executor = "0.3" futures-io = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { workspace = true, optional = true } +gcp = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" -libloading = { optional = true, version = "0.7.0" } openssl = "0.10" prometheus = { version = "0.13", default-features = false, features = ["nightly", "push"] } -protobuf = { optional = true, version = "2" } rand = "0.8" -rusoto_core = "0.46.0" slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } @@ -52,3 +37,7 @@ rust-ini = "0.14.0" structopt = "0.3" tempfile = "3.1" tokio = { version = "1.5", features = ["macros"] } + +[[example]] +name = "scli" +path = "examples/scli.rs" diff --git a/components/external_storage/export/examples/scli.rs b/components/external_storage/examples/scli.rs similarity index 75% rename from components/external_storage/export/examples/scli.rs rename to components/external_storage/examples/scli.rs index 0ab54721b291..9621f840e6ce 100644 --- a/components/external_storage/export/examples/scli.rs +++ b/components/external_storage/examples/scli.rs @@ -6,19 +6,13 @@ use std::{ path::Path, }; -#[cfg(feature = "cloud-azure")] -use external_storage_export::make_azblob_backend; -#[cfg(feature = "cloud-gcp")] -use external_storage_export::make_gcs_backend; -#[cfg(feature = "cloud-aws")] -use external_storage_export::make_s3_backend; -use external_storage_export::{ - create_storage, make_cloud_backend, make_hdfs_backend, make_local_backend, make_noop_backend, - ExternalStorage, UnpinReader, +use external_storage::{ + create_storage, make_azblob_backend, make_gcs_backend, make_hdfs_backend, make_local_backend, + make_noop_backend, make_s3_backend, ExternalStorage, UnpinReader, }; use futures_util::io::{copy, AllowStdIo}; use ini::ini::Ini; -use kvproto::brpb::{AzureBlobStorage, Bucket, CloudDynamic, Gcs, StorageBackend, S3}; +use kvproto::brpb::{AzureBlobStorage, Gcs, StorageBackend, S3}; use structopt::{clap::arg_enum, StructOpt}; use tikv_util::stream::block_on_external_io; use tokio::runtime::Runtime; @@ -32,7 +26,6 @@ arg_enum! { S3, GCS, Azure, - Cloud, } } @@ -67,8 +60,6 @@ pub struct Opt { /// Remote path prefix #[structopt(short = "x", long)] prefix: Option, - #[structopt(long)] - cloud_name: Option, #[structopt(subcommand)] command: Command, } @@ -82,35 +73,6 @@ enum Command { Load, } -fn create_cloud_storage(opt: &Opt) -> Result { - let mut bucket = Bucket::default(); - if let Some(endpoint) = &opt.endpoint { - bucket.endpoint = endpoint.to_string(); - } - if let Some(region) = &opt.region { - bucket.region = region.to_string(); - } - if let Some(bucket_name) = &opt.bucket { - bucket.bucket = bucket_name.to_string(); - } else { - return Err(Error::new(ErrorKind::Other, "missing bucket")); - } - if let Some(prefix) = &opt.prefix { - bucket.prefix = prefix.to_string(); - } - let mut config = CloudDynamic::default(); - config.set_bucket(bucket); - let mut attrs = std::collections::HashMap::new(); - if let Some(credential_file) = &opt.credential_file { - attrs.insert("credential_file".to_owned(), credential_file.clone()); - } - config.set_attrs(attrs); - if let Some(cloud_name) = &opt.cloud_name { - config.provider_name = cloud_name.clone(); - } - Ok(make_cloud_backend(config)) -} - fn create_s3_storage(opt: &Opt) -> Result { let mut config = S3::default(); @@ -150,10 +112,7 @@ fn create_s3_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-aws")] - return Ok(make_s3_backend(config)); - #[cfg(not(feature = "cloud-aws"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_s3_backend(config)) } fn create_gcs_storage(opt: &Opt) -> Result { @@ -173,10 +132,7 @@ fn create_gcs_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-gcp")] - return Ok(make_gcs_backend(config)); - #[cfg(not(feature = "cloud-gcp"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_gcs_backend(config)) } fn create_azure_storage(opt: &Opt) -> Result { @@ -212,10 +168,7 @@ fn create_azure_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-azure")] - return Ok(make_azblob_backend(config)); - #[cfg(not(feature = "cloud-azure"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_azblob_backend(config)) } fn process() -> Result<()> { @@ -228,7 +181,6 @@ fn process() -> Result<()> { StorageType::S3 => create_s3_storage(&opt)?, StorageType::GCS => create_gcs_storage(&opt)?, StorageType::Azure => create_azure_storage(&opt)?, - StorageType::Cloud => create_cloud_storage(&opt)?, }), Default::default(), )?; diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml deleted file mode 100644 index 6537eaf8995b..000000000000 --- a/components/external_storage/export/Cargo.toml +++ /dev/null @@ -1,96 +0,0 @@ -[package] -name = "external_storage_export" -version = "0.0.1" -edition = "2021" -publish = false - -[[bin]] -name = "tikv-cloud-storage" -path = "src/bin/tikv-cloud-storage.rs" -required-features = ["cloud-storage-grpc"] - -[lib] -name = "external_storage_export" -# Experimental feature to load the cloud storage code dynamically -# crate-type = ["lib", "cdylib"] - -[features] -default = ["cloud-gcp", "cloud-aws", "cloud-azure"] -cloud-aws = ["aws"] -cloud-gcp = ["gcp"] -cloud-azure = ["azure"] -cloud-storage-dylib = [ - "external_storage/cloud-storage-dylib", - "ffi-support", - "file_system", - "futures", - "libloading", - "lazy_static", - "once_cell", - "protobuf", - "slog", - "slog-global", - "tokio", - "tokio-util", -] -cloud-storage-grpc = [ - "external_storage/cloud-storage-grpc", - "grpcio", - "file_system", - "futures", - "futures-executor", - "libc", - "signal-hook", - "slog", - "slog-global", - "slog-term", - "tokio", - "tokio-util", -] - -[dependencies] -async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } -async-trait = "0.1" -aws = { optional = true, workspace = true } -azure = { optional = true, workspace = true } -cloud = { workspace = true } -encryption = { workspace = true } -engine_traits = { workspace = true } -external_storage = { workspace = true } -ffi-support = { optional = true, version = "0.4.2" } -file_system = { workspace = true, optional = true } -futures = { optional = true, version = "0.3" } -futures-executor = { optional = true, version = "0.3" } -futures-io = { version = "0.3" } -futures-util = { version = "0.3", default-features = false, features = ["io"] } -gcp = { optional = true, workspace = true } -grpcio = { workspace = true, optional = true } -kvproto = { workspace = true } -lazy_static = { optional = true, version = "1.3" } -libloading = { optional = true, version = "0.7.0" } -once_cell = { optional = true, version = "1.3.1" } -protobuf = { optional = true, version = "2" } -slog-global = { optional = true, workspace = true } -tikv_util = { workspace = true } -tokio = { version = "1.5", features = ["time", "rt", "net"], optional = true } -tokio-util = { version = "0.7", features = ["compat"], optional = true } -url = "2.0" - -[dev-dependencies] -futures-util = { version = "0.3", default-features = false, features = ["io"] } -matches = "0.1.8" -rust-ini = "0.14.0" -structopt = "0.3" -tempfile = "3.1" -tokio = { version = "1.5", features = ["time"] } - -[[example]] -name = "scli" -path = "examples/scli.rs" - -[target.'cfg(unix)'.dependencies] -nix = { optional = true, version = "0.24" } -signal-hook = { optional = true, version = "0.3" } -libc = { optional = true, version = "0.2" } -slog = { optional = true, version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-term = { optional = true, version = "2.4" } diff --git a/components/external_storage/export/src/bin/tikv-cloud-storage.rs b/components/external_storage/export/src/bin/tikv-cloud-storage.rs deleted file mode 100644 index 07cd85079489..000000000000 --- a/components/external_storage/export/src/bin/tikv-cloud-storage.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use external_storage_export::new_service; -use grpcio::{self}; -use slog::{self}; -use slog_global::{info, warn}; -use tikv_util::logger::{self}; - -fn build_logger(drainer: D, log_level: slog::Level) -where - D: slog::Drain + Send + 'static, - ::Err: std::fmt::Display, -{ - // use async drainer and init std log. - logger::init_log(drainer, log_level, true, true, vec![], 100).unwrap_or_else(|e| { - println!("failed to initialize log: {}", e); - }); -} - -fn main() { - println!("starting GRPC cloud-storage service"); - let decorator = slog_term::PlainDecorator::new(std::io::stdout()); - let drain = slog_term::CompactFormat::new(decorator).build(); - build_logger(drain, slog::Level::Debug); - warn!("redirect grpcio logging"); - grpcio::redirect_log(); - info!("slog logging"); - let service = new_service().expect("GRPC service creation for tikv-cloud-storage"); - wait::for_signal(); - info!("service {:?}", service); -} - -#[cfg(unix)] -mod wait { - use libc::c_int; - use signal_hook::{ - consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, - iterator::Signals, - Signals, - }; - use slog_global::info; - - pub fn for_signal() { - let mut signals = Signals::new(&[SIGTERM, SIGINT, SIGHUP]).unwrap(); - for signal in &mut signals { - match signal { - SIGTERM | SIGINT | SIGHUP => { - info!("receive signal {}, stopping server...", signal); - break; - } - // TODO: handle more signals - _ => unreachable!(), - } - } - } -} - -#[cfg(not(unix))] -mod wait { - pub fn for_signal() {} -} diff --git a/components/external_storage/export/src/dylib.rs b/components/external_storage/export/src/dylib.rs deleted file mode 100644 index 308973de95eb..000000000000 --- a/components/external_storage/export/src/dylib.rs +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Mutex; - -use anyhow::Context; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use lazy_static::lazy_static; -use once_cell::sync::OnceCell; -use protobuf::{self}; -use slog_global::{error, info}; -use tokio::runtime::{Builder, Runtime}; - -use crate::request::{restore_receiver, write_receiver}; - -static RUNTIME: OnceCell = OnceCell::new(); -lazy_static! { - static ref RUNTIME_INIT: Mutex<()> = Mutex::new(()); -} - -/// # Safety -/// Deref data pointer, thus unsafe -#[no_mangle] -pub extern "C" fn external_storage_init(error: &mut ffi_support::ExternError) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let guarded = RUNTIME_INIT.lock().unwrap(); - if RUNTIME.get().is_some() { - return Ok(()); - } - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib") - .core_threads(1) - .enable_all() - .build() - .context("build runtime")?; - if RUNTIME.set(runtime).is_err() { - error!("runtime already set") - } - #[allow(clippy::unit_arg)] - Ok(*guarded) - })() - .context("external_storage_init") - .map_err(anyhow_to_extern_err) - }) -} - -/// # Safety -/// Deref data pointer, thus unsafe -#[no_mangle] -pub unsafe extern "C" fn external_storage_write( - data: *const u8, - len: i32, - error: &mut ffi_support::ExternError, -) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let runtime = RUNTIME - .get() - .context("must first call external_storage_init")?; - let buffer = get_buffer(data, len); - let req: proto::ExternalStorageWriteRequest = protobuf::parse_from_bytes(buffer)?; - info!("write request {:?}", req.get_object_name()); - write_receiver(&runtime, req) - })() - .context("external_storage_write") - .map_err(anyhow_to_extern_err) - }) -} - -/// # Safety -/// Deref data pointer, thus unsafe -pub unsafe extern "C" fn external_storage_restore( - data: *const u8, - len: i32, - error: &mut ffi_support::ExternError, -) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let runtime = RUNTIME - .get() - .context("must first call external_storage_init")?; - let buffer = get_buffer(data, len); - let req: proto::ExternalStorageRestoreRequest = protobuf::parse_from_bytes(buffer)?; - info!("restore request {:?}", req.get_object_name()); - Ok(restore_receiver(runtime, req)?) - })() - .context("external_storage_restore") - .map_err(anyhow_to_extern_err) - }) -} - -unsafe fn get_buffer<'a>(data: *const u8, len: i32) -> &'a [u8] { - assert!(len >= 0, "Bad buffer len: {}", len); - if len == 0 { - // This will still fail, but as a bad protobuf format. - &[] - } else { - assert!(!data.is_null(), "Unexpected null data pointer"); - std::slice::from_raw_parts(data, len as usize) - } -} - -fn anyhow_to_extern_err(e: anyhow::Error) -> ffi_support::ExternError { - ffi_support::ExternError::new_error(ffi_support::ErrorCode::new(1), format!("{:?}", e)) -} - -pub mod staticlib { - use std::{ - io::{self}, - sync::Arc, - }; - - use external_storage::{ - dylib_client::extern_to_io_err, - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, - }; - use futures_io::AsyncRead; - use protobuf::Message; - use tikv_util::time::Limiter; - - use super::*; - - struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - name: &'static str, - url: url::Url, - } - - pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, - ) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib-client") - .core_threads(1) - .enable_all() - .build()?; - external_storage_init_ffi()?; - Ok(Box::new(ExternalStorageClient { - runtime: Arc::new(runtime), - backend, - name, - url, - }) as _) - } - - impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - let bytes = req.write_to_bytes()?; - info!("write request"); - external_storage_write_ffi(bytes)?; - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - let bytes = req.write_to_bytes()?; - external_storage_restore_ffi(bytes) - } - } - - fn external_storage_write_ffi(bytes: Vec) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - external_storage_write(bytes.as_ptr(), bytes.len() as i32, &mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - Err(extern_to_io_err(e)) - } else { - Ok(()) - } - } - - fn external_storage_restore_ffi(bytes: Vec) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - external_storage_restore(bytes.as_ptr(), bytes.len() as i32, &mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - Err(extern_to_io_err(e)) - } else { - Ok(()) - } - } - - fn external_storage_init_ffi() -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - external_storage_init(&mut e); - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) - } -} diff --git a/components/external_storage/export/src/grpc_service.rs b/components/external_storage/export/src/grpc_service.rs deleted file mode 100644 index 7ef2bd093d17..000000000000 --- a/components/external_storage/export/src/grpc_service.rs +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use external_storage::request::anyhow_to_io_log_error; -use grpcio::{self}; -use kvproto::brpb as proto; -use slog_global::{error, info}; -use tokio::runtime::{Builder, Runtime}; - -use crate::request::{restore_receiver, write_receiver}; - -#[derive(Debug)] -pub struct SocketService { - server: grpcio::Server, - listener: std::os::unix::net::UnixListener, -} - -pub fn new_service() -> io::Result { - (|| -> anyhow::Result { - let env = Arc::new(grpcio::EnvBuilder::new().build()); - let storage_service = Service::new().context("new storage service")?; - let builder = grpcio::ServerBuilder::new(env) - .register_service(proto::create_external_storage(storage_service)); - let grpc_socket_path = "/tmp/grpc-external-storage.sock"; - let socket_addr = format!("unix:{}", grpc_socket_path); - let socket_path = std::path::PathBuf::from(grpc_socket_path); - // Keep the listener in scope: otherwise the socket is destroyed - let listener = bind_socket(&socket_path).context("GRPC new service create socket")?; - let mut server = builder - .bind(socket_addr, 0) - .build() - .context("GRPC build server")?; - server.start(); - let (..) = server.bind_addrs().next().context("GRPC bind server")?; - Ok(SocketService { server, listener }) - })() - .context("new service") - .map_err(anyhow_to_io_log_error) -} - -/// Service handles the RPC messages for the `ExternalStorage` service. -#[derive(Clone)] -pub struct Service { - runtime: Arc, -} - -impl Service { - /// Create a new backup service. - pub fn new() -> io::Result { - let runtime = Arc::new( - Builder::new() - .basic_scheduler() - .thread_name("external-storage-grpc-service") - .core_threads(1) - .enable_all() - .build()?, - ); - Ok(Service { runtime }) - } -} - -impl proto::ExternalStorage for Service { - fn save( - &mut self, - _ctx: grpcio::RpcContext, - req: proto::ExternalStorageWriteRequest, - sink: grpcio::UnarySink, - ) { - info!("write request {:?}", req.get_object_name()); - let result = write_receiver(&self.runtime, req); - match result { - Ok(_) => { - let rsp = proto::ExternalStorageWriteResponse::default(); - info!("success write"); - sink.success(rsp); - } - Err(e) => { - error!("write {}", e); - sink.fail(make_rpc_error(anyhow_to_io_log_error(e))); - } - } - } - - fn restore( - &mut self, - _ctx: grpcio::RpcContext, - req: proto::ExternalStorageRestoreRequest, - sink: grpcio::UnarySink, - ) { - info!( - "restore request {:?} {:?}", - req.get_object_name(), - req.get_restore_name() - ); - let result = restore_receiver(&self.runtime, req); - match result { - Ok(_) => { - let rsp = proto::ExternalStorageRestoreResponse::default(); - info!("success restore"); - sink.success(rsp); - } - Err(e) => { - error!("restore {}", e); - sink.fail(make_rpc_error(e)); - } - } - } -} - -pub fn make_rpc_error(err: io::Error) -> grpcio::RpcStatus { - grpcio::RpcStatus::new( - match err.kind() { - ErrorKind::NotFound => grpcio::RpcStatusCode::NOT_FOUND, - ErrorKind::InvalidInput => grpcio::RpcStatusCode::INVALID_ARGUMENT, - ErrorKind::PermissionDenied => grpcio::RpcStatusCode::UNAUTHENTICATED, - _ => grpcio::RpcStatusCode::UNKNOWN, - }, - Some(format!("{:?}", err)), - ) -} - -fn bind_socket(socket_path: &std::path::Path) -> anyhow::Result { - let msg = format!("bind socket {:?}", &socket_path); - info!("{}", msg); - std::os::unix::net::UnixListener::bind(&socket_path).context(msg) -} diff --git a/components/external_storage/export/src/lib.rs b/components/external_storage/export/src/lib.rs deleted file mode 100644 index e04e5beb6959..000000000000 --- a/components/external_storage/export/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -mod export; -pub use export::*; - -#[cfg(feature = "cloud-storage-grpc")] -mod grpc_service; -#[cfg(feature = "cloud-storage-grpc")] -pub use grpc_service::new_service; - -#[cfg(feature = "cloud-storage-dylib")] -mod dylib; - -#[cfg(any(feature = "cloud-storage-grpc", feature = "cloud-storage-dylib"))] -mod request; diff --git a/components/external_storage/export/src/request.rs b/components/external_storage/export/src/request.rs deleted file mode 100644 index 5623c0732d73..000000000000 --- a/components/external_storage/export/src/request.rs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::io::{self}; - -use anyhow::Context; -use external_storage::request::file_name_for_write; -use file_system::File; -use futures::executor::block_on; -use futures_io::AsyncRead; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use slog_global::info; -use tikv_util::time::Limiter; -use tokio::runtime::Runtime; -use tokio_util::compat::Tokio02AsyncReadCompatExt; - -use crate::export::{create_storage_no_client, read_external_storage_into_file, ExternalStorage}; - -pub fn write_receiver( - runtime: &Runtime, - req: proto::ExternalStorageWriteRequest, -) -> anyhow::Result<()> { - let storage_backend = req.get_storage_backend(); - let object_name = req.get_object_name(); - let content_length = req.get_content_length(); - let storage = create_storage_no_client(storage_backend).context("create storage")?; - let file_path = file_name_for_write(storage.name(), object_name); - let reader = runtime - .enter(|| block_on(open_file_as_async_read(file_path))) - .context("open file")?; - storage - .write(object_name, reader, content_length) - .context("storage write") -} - -pub fn restore_receiver( - runtime: &Runtime, - req: proto::ExternalStorageRestoreRequest, -) -> io::Result<()> { - let object_name = req.get_object_name(); - let storage_backend = req.get_storage_backend(); - let file_name = std::path::PathBuf::from(req.get_restore_name()); - let expected_length = req.get_content_length(); - runtime.enter(|| { - block_on(restore_inner( - storage_backend, - object_name, - file_name, - expected_length, - )) - }) -} - -pub async fn restore_inner( - storage_backend: &proto::StorageBackend, - object_name: &str, - file_name: std::path::PathBuf, - expected_length: u64, -) -> io::Result<()> { - let storage = create_storage_no_client(&storage_backend)?; - // TODO: support encryption. The service must be launched with or sent a - // DataKeyManager - let output: &mut dyn io::Write = &mut File::create(file_name)?; - // the minimum speed of reading data, in bytes/second. - // if reading speed is slower than this rate, we will stop with - // a "TimedOut" error. - // (at 8 KB/s for a 2 MB buffer, this means we timeout after 4m16s.) - const MINIMUM_READ_SPEED: usize = 8192; - let limiter = Limiter::new(f64::INFINITY); - let x = read_external_storage_into_file( - &mut storage.read(object_name), - output, - &limiter, - expected_length, - None, - MINIMUM_READ_SPEED, - ) - .await; - x -} - -async fn open_file_as_async_read( - file_path: std::path::PathBuf, -) -> anyhow::Result> { - info!("open file {:?}", &file_path); - let f = tokio::fs::File::open(file_path) - .await - .context("open file")?; - let reader: Box = Box::new(Box::pin(f.compat())); - Ok(reader) -} diff --git a/components/external_storage/src/dylib_client.rs b/components/external_storage/src/dylib_client.rs deleted file mode 100644 index 9e2748c2011a..000000000000 --- a/components/external_storage/src/dylib_client.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use futures_io::AsyncRead; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use protobuf::{self, Message}; -use slog_global::info; -use tikv_util::time::Limiter; -use tokio::runtime::{Builder, Runtime}; - -use crate::{ - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, -}; - -struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - library: libloading::Library, - name: &'static str, - url: url::Url, -} - -pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, -) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib-client") - .core_threads(1) - .enable_all() - .build()?; - let library = unsafe { - libloading::Library::new( - std::path::Path::new("./") - .join(libloading::library_filename("external_storage_export")), - ) - .map_err(libloading_err_to_io)? - }; - external_storage_init_ffi_dynamic(&library)?; - Ok(Box::new(ExternalStorageClient { - runtime: Arc::new(runtime), - backend, - library, - name, - url, - }) as _) -} - -impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - let bytes = req.write_to_bytes()?; - info!("write request"); - call_ffi_dynamic(&self.library, b"external_storage_write", bytes)?; - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - let bytes = req.write_to_bytes()?; - call_ffi_dynamic(&self.library, b"external_storage_restore", bytes) - } -} - -pub fn extern_to_io_err(e: ffi_support::ExternError) -> io::Error { - io::Error::new(io::ErrorKind::Other, format!("{:?}", e)) -} - -type FfiInitFn<'a> = - libloading::Symbol<'a, unsafe extern "C" fn(error: &mut ffi_support::ExternError) -> ()>; -type FfiFn<'a> = libloading::Symbol< - 'a, - unsafe extern "C" fn(error: &mut ffi_support::ExternError, bytes: Vec) -> (), ->; - -fn external_storage_init_ffi_dynamic(library: &libloading::Library) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - let func: FfiInitFn = library - .get(b"external_storage_init") - .map_err(libloading_err_to_io)?; - func(&mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) -} - -fn call_ffi_dynamic( - library: &libloading::Library, - fn_name: &[u8], - bytes: Vec, -) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - let func: FfiFn = library.get(fn_name).map_err(libloading_err_to_io)?; - func(&mut e, bytes); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) -} - -fn libloading_err_to_io(e: libloading::Error) -> io::Error { - // TODO: custom error type - let kind = match e { - libloading::Error::DlOpen { .. } | libloading::Error::DlOpenUnknown => { - ErrorKind::AddrNotAvailable - } - _ => ErrorKind::Other, - }; - io::Error::new(kind, format!("{}", e)) -} diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/src/export.rs similarity index 54% rename from components/external_storage/export/src/export.rs rename to components/external_storage/src/export.rs index ad31dc363ae5..5b69a793c123 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/src/export.rs @@ -1,41 +1,23 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! To use External storage with protobufs as an application, import this -//! module. external_storage contains the actual library code -//! Cloud provider backends are under components/cloud use std::{io, path::Path, sync::Arc}; use async_trait::async_trait; -#[cfg(feature = "cloud-aws")] pub use aws::{Config as S3Config, S3Storage}; -#[cfg(feature = "cloud-azure")] pub use azure::{AzureStorage, Config as AzureConfig}; -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -use cloud::blob::BlobConfig; use cloud::blob::{BlobStorage, PutResource}; use encryption::DataKeyManager; -#[cfg(feature = "cloud-storage-dylib")] -use external_storage::dylib_client; -#[cfg(feature = "cloud-storage-grpc")] -use external_storage::grpc_client; -pub use external_storage::{ - compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_info_buff, - read_external_storage_into_file, record_storage_create, BackendConfig, ExternalData, - ExternalStorage, HdfsStorage, LocalStorage, NoopStorage, RestoreConfig, UnpinReader, - MIN_READ_SPEED, +use gcp::GcsStorage; +use kvproto::brpb::{ + AzureBlobStorage, Gcs, Noop, StorageBackend, StorageBackend_oneof_backend as Backend, S3, }; -#[cfg(feature = "cloud-gcp")] -pub use gcp::{Config as GcsConfig, GcsStorage}; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -#[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] -use kvproto::brpb::{AzureBlobStorage, Gcs, S3}; -use kvproto::brpb::{CloudDynamic, Noop, StorageBackend}; use tikv_util::time::{Instant, Limiter}; -#[cfg(feature = "cloud-storage-dylib")] -use tikv_util::warn; -#[cfg(feature = "cloud-storage-dylib")] -use crate::dylib; +use crate::{ + compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_into_file, + record_storage_create, BackendConfig, ExternalData, ExternalStorage, HdfsStorage, LocalStorage, + NoopStorage, RestoreConfig, UnpinReader, +}; pub fn create_storage( storage_backend: &StorageBackend, @@ -48,20 +30,6 @@ pub fn create_storage( } } -// when the flag cloud-storage-dylib or cloud-storage-grpc is set create_storage -// is automatically wrapped with a client This function is used by the -// library/server to avoid any wrapping -pub fn create_storage_no_client( - storage_backend: &StorageBackend, - config: BackendConfig, -) -> io::Result> { - if let Some(backend) = &storage_backend.backend { - create_backend_inner(backend, config) - } else { - Err(bad_storage_backend(storage_backend)) - } -} - fn bad_storage_backend(storage_backend: &StorageBackend) -> io::Error { io::Error::new( io::ErrorKind::NotFound, @@ -77,93 +45,11 @@ fn bad_backend(backend: Backend) -> io::Error { bad_storage_backend(&storage_backend) } -#[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] fn blob_store(store: Blob) -> Box { Box::new(BlobStore::new(store)) as Box } -#[cfg(feature = "cloud-storage-grpc")] -pub fn create_backend(backend: &Backend) -> io::Result> { - match create_config(backend) { - Some(config) => { - let conf = config?; - grpc_client::new_client(backend.clone(), conf.name(), conf.url()?) - } - None => Err(bad_backend(backend.clone())), - } -} - -#[cfg(feature = "cloud-storage-dylib")] -pub fn create_backend(backend: &Backend) -> io::Result> { - match create_config(backend) { - Some(config) => { - let conf = config?; - let r = dylib_client::new_client(backend.clone(), conf.name(), conf.url()?); - match r { - Err(e) if e.kind() == io::ErrorKind::AddrNotAvailable => { - warn!("could not open dll for external_storage_export"); - dylib::staticlib::new_client(backend.clone(), conf.name(), conf.url()?) - } - _ => r, - } - } - None => Err(bad_backend(backend.clone())), - } -} - -#[cfg(all( - not(feature = "cloud-storage-grpc"), - not(feature = "cloud-storage-dylib") -))] -pub fn create_backend( - backend: &Backend, - config: BackendConfig, -) -> io::Result> { - create_backend_inner(backend, config) -} - -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -fn create_config(backend: &Backend) -> Option>> { - match backend { - #[cfg(feature = "cloud-aws")] - Backend::S3(config) => { - let conf = S3Config::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-gcp")] - Backend::Gcs(config) => { - let conf = GcsConfig::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-azure")] - Backend::AzureBlobStorage(config) => { - let conf = AzureConfig::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - Backend::CloudDynamic(dyn_backend) => match dyn_backend.provider_name.as_str() { - #[cfg(feature = "cloud-aws")] - "aws" | "s3" => { - let conf = S3Config::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-gcp")] - "gcp" | "gcs" => { - let conf = GcsConfig::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-azure")] - "azure" | "azblob" => { - let conf = AzureConfig::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - _ => None, - }, - _ => None, - } -} - -/// Create a new storage from the given storage backend description. -fn create_backend_inner( +fn create_backend( backend: &Backend, backend_config: BackendConfig, ) -> io::Result> { @@ -176,30 +62,18 @@ fn create_backend_inner( Backend::Hdfs(hdfs) => { Box::new(HdfsStorage::new(&hdfs.remote, backend_config.hdfs_config)?) } - Backend::Noop(_) => { - Box::::default() as Box - } - #[cfg(feature = "cloud-aws")] + Backend::Noop(_) => Box::::default() as Box, Backend::S3(config) => { let mut s = S3Storage::from_input(config.clone())?; s.set_multi_part_size(backend_config.s3_multi_part_size); blob_store(s) } - #[cfg(feature = "cloud-gcp")] Backend::Gcs(config) => blob_store(GcsStorage::from_input(config.clone())?), - #[cfg(feature = "cloud-azure")] Backend::AzureBlobStorage(config) => blob_store(AzureStorage::from_input(config.clone())?), - Backend::CloudDynamic(dyn_backend) => match dyn_backend.provider_name.as_str() { - #[cfg(feature = "cloud-aws")] - "aws" | "s3" => blob_store(S3Storage::from_cloud_dynamic(dyn_backend)?), - #[cfg(feature = "cloud-gcp")] - "gcp" | "gcs" => blob_store(GcsStorage::from_cloud_dynamic(dyn_backend)?), - #[cfg(feature = "cloud-azure")] - "azure" | "azblob" => blob_store(AzureStorage::from_cloud_dynamic(dyn_backend)?), - _ => { - return Err(bad_backend(Backend::CloudDynamic(dyn_backend.clone()))); - } - }, + Backend::CloudDynamic(dyn_backend) => { + // CloudDynamic backend is no longer supported. + return Err(bad_backend(Backend::CloudDynamic(dyn_backend.clone()))); + } #[allow(unreachable_patterns)] _ => return Err(bad_backend(backend.clone())), }; @@ -207,7 +81,6 @@ fn create_backend_inner( Ok(storage) } -#[cfg(feature = "cloud-aws")] // Creates a S3 `StorageBackend` pub fn make_s3_backend(config: S3) -> StorageBackend { let mut backend = StorageBackend::default(); @@ -236,26 +109,18 @@ pub fn make_noop_backend() -> StorageBackend { backend } -#[cfg(feature = "cloud-gcp")] pub fn make_gcs_backend(config: Gcs) -> StorageBackend { let mut backend = StorageBackend::default(); backend.set_gcs(config); backend } -#[cfg(feature = "cloud-azure")] pub fn make_azblob_backend(config: AzureBlobStorage) -> StorageBackend { let mut backend = StorageBackend::default(); backend.set_azure_blob_storage(config); backend } -pub fn make_cloud_backend(config: CloudDynamic) -> StorageBackend { - let mut backend = StorageBackend::default(); - backend.set_cloud_dynamic(config); - backend -} - #[cfg(test)] mod tests { use tempfile::Builder; diff --git a/components/external_storage/src/grpc_client.rs b/components/external_storage/src/grpc_client.rs deleted file mode 100644 index e836d8fb58ac..000000000000 --- a/components/external_storage/src/grpc_client.rs +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use futures_io::AsyncRead; -use grpcio::{self}; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use tikv_util::time::Limiter; -use tokio::runtime::{Builder, Runtime}; - -use crate::{ - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, -}; - -struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - rpc: proto::ExternalStorageClient, - name: &'static str, - url: url::Url, -} - -pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, -) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-grpc-client") - .core_threads(1) - .enable_all() - .build()?; - Ok(Box::new(ExternalStorageClient { - backend, - runtime: Arc::new(runtime), - rpc: new_rpc_client()?, - name, - url, - })) -} - -fn new_rpc_client() -> io::Result { - let env = Arc::new(grpcio::EnvBuilder::new().build()); - let grpc_socket_path = "/tmp/grpc-external-storage.sock"; - let socket_addr = format!("unix:{}", grpc_socket_path); - let channel = grpcio::ChannelBuilder::new(env).connect(&socket_addr); - Ok(proto::ExternalStorageClient::new(channel)) -} - -impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - info!("grpc write request"); - self.rpc - .save(&req) - .map_err(rpc_error_to_io) - .context("rpc write")?; - info!("grpc write request finished"); - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - self.rpc.restore(&req).map_err(rpc_error_to_io).map(|_| ()) - } -} - -pub fn rpc_error_to_io(err: grpcio::Error) -> io::Error { - let msg = format!("{}", err); - match err { - grpcio::Error::RpcFailure(status) => match status.status { - grpcio::RpcStatusCode::NOT_FOUND => io::Error::new(ErrorKind::NotFound, msg), - grpcio::RpcStatusCode::INVALID_ARGUMENT => io::Error::new(ErrorKind::InvalidInput, msg), - grpcio::RpcStatusCode::UNAUTHENTICATED => { - io::Error::new(ErrorKind::PermissionDenied, msg) - } - _ => io::Error::new(ErrorKind::Other, msg), - }, - _ => io::Error::new(ErrorKind::Other, msg), - } -} diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index dd021f14bf8a..082073abe4f6 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -40,12 +40,8 @@ mod noop; pub use noop::NoopStorage; mod metrics; use metrics::EXT_STORAGE_CREATE_HISTOGRAM; -#[cfg(feature = "cloud-storage-dylib")] -pub mod dylib_client; -#[cfg(feature = "cloud-storage-grpc")] -pub mod grpc_client; -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -pub mod request; +mod export; +pub use export::*; pub fn record_storage_create(start: Instant, storage: &dyn ExternalStorage) { EXT_STORAGE_CREATE_HISTOGRAM diff --git a/components/external_storage/src/request.rs b/components/external_storage/src/request.rs deleted file mode 100644 index 7f1a81d49b70..000000000000 --- a/components/external_storage/src/request.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::io::{self, ErrorKind}; - -use anyhow::Context; -use futures::executor::block_on; -use futures_io::{AsyncRead, AsyncWrite}; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use tikv_util::time::Limiter; -use tokio::runtime::Runtime; -use tokio_util::compat::Tokio02AsyncReadCompatExt; - -pub fn write_sender( - runtime: &Runtime, - backend: Backend, - file_path: std::path::PathBuf, - name: &str, - reader: Box, - content_length: u64, -) -> io::Result { - (|| -> anyhow::Result { - // TODO: the reader should write direct to the file_path - // currently it is copying into an intermediate buffer - // Writing to a file here uses up disk space - // But as a positive it gets the backup data out of the DB the fastest - // Currently this waits for the file to be completely written before sending to - // storage - runtime.enter(|| { - block_on(async { - let msg = |action: &str| format!("{} file {:?}", action, &file_path); - let f = tokio::fs::File::create(file_path.clone()) - .await - .context(msg("create"))?; - let mut writer: Box = Box::new(Box::pin(f.compat())); - futures_util::io::copy(reader, &mut writer) - .await - .context(msg("copy")) - }) - })?; - let mut req = proto::ExternalStorageWriteRequest::default(); - req.set_object_name(name.to_string()); - req.set_content_length(content_length); - let mut sb = proto::StorageBackend::default(); - sb.backend = Some(backend); - req.set_storage_backend(sb); - Ok(req) - })() - .context("write_sender") - .map_err(anyhow_to_io_log_error) -} - -pub fn restore_sender( - backend: Backend, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - _speed_limiter: &Limiter, -) -> io::Result { - // TODO: send speed_limiter - let mut req = proto::ExternalStorageRestoreRequest::default(); - req.set_object_name(storage_name.to_string()); - let restore_str = restore_name.to_str().ok_or_else(|| { - io::Error::new( - ErrorKind::InvalidData, - format!("could not convert to str {:?}", &restore_name), - ) - })?; - req.set_restore_name(restore_str.to_string()); - req.set_content_length(expected_length); - let mut sb = proto::StorageBackend::default(); - sb.backend = Some(backend); - req.set_storage_backend(sb); - Ok(req) -} - -pub fn anyhow_to_io_log_error(err: anyhow::Error) -> io::Error { - let string = format!("{:#}", &err); - match err.downcast::() { - Ok(e) => { - // It will be difficult to propagate the context - // without changing the error type to anyhow or a custom TiKV error - error!("{}", string); - e - } - Err(_) => io::Error::new(ErrorKind::Other, string), - } -} - -pub fn file_name_for_write(storage_name: &str, object_name: &str) -> std::path::PathBuf { - let full_name = format!("{}-{}", storage_name, object_name); - std::env::temp_dir().join(full_name) -} - -pub struct DropPath(pub std::path::PathBuf); - -impl Drop for DropPath { - fn drop(&mut self) { - let _ = std::fs::remove_file(&self.0); - } -} diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index d281c0eca69a..2bd7737ade4c 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -24,10 +24,6 @@ test-engines-panic = [ "engine_test/test-engines-panic", ] -cloud-aws = ["raftstore/cloud-aws"] -cloud-gcp = ["raftstore/cloud-gcp"] -cloud-azure = ["raftstore/cloud-azure"] - [dependencies] batch-system = { workspace = true } bytes = "1.0" diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 27380a528824..1933bad6da90 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -23,10 +23,6 @@ test-engines-panic = [ "engine_test/test-engines-panic", ] -cloud-aws = ["sst_importer/cloud-aws"] -cloud-gcp = ["sst_importer/cloud-gcp"] -cloud-azure = ["sst_importer/cloud-azure"] - [dependencies] batch-system = { workspace = true } bitflags = "1.0.1" @@ -39,7 +35,7 @@ crc32fast = "1.2" crossbeam = "0.8" derivative = "2" encryption = { workspace = true } -engine_rocks = { workspace = true, optional = true } +engine_rocks = { workspace = true, optional = true } # Should be [dev-dependencies] but we need to control the features # https://github.com/rust-lang/cargo/issues/6915 diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index d292b44606e6..f4f2504a8b34 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -5,12 +5,7 @@ edition = "2021" publish = false [features] -default = ["cloud-aws", "cloud-gcp", "cloud-azure", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] -cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] -cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] test-engines-rocksdb = [ "engine_test/test-engines-rocksdb", @@ -34,7 +29,7 @@ encryption = { workspace = true } engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } futures = { version = "0.3", features = ["thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } diff --git a/components/sst_importer/src/caching/storage_cache.rs b/components/sst_importer/src/caching/storage_cache.rs index 23732545b92d..585772c25526 100644 --- a/components/sst_importer/src/caching/storage_cache.rs +++ b/components/sst_importer/src/caching/storage_cache.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use external_storage_export::ExternalStorage; +use external_storage::ExternalStorage; use kvproto::brpb::StorageBackend; use super::cache_map::{MakeCache, ShareOwned}; @@ -31,7 +31,7 @@ impl StoragePool { fn create(backend: &StorageBackend, size: usize) -> Result { let mut r = Vec::with_capacity(size); for _ in 0..size { - let s = external_storage_export::create_storage(backend, Default::default())?; + let s = external_storage::create_storage(backend, Default::default())?; r.push(Arc::from(s)); } Ok(Self(r.into_boxed_slice())) diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5530862e6a39..5cf9f1c6573b 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -23,7 +23,7 @@ use engine_traits::{ IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; -use external_storage_export::{ +use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, }; use file_system::{get_io_rate_limiter, IoType, OpenOptions}; @@ -470,7 +470,7 @@ impl SstImporter { backend: &StorageBackend, support_kms: bool, speed_limiter: &Limiter, - restore_config: external_storage_export::RestoreConfig, + restore_config: external_storage::RestoreConfig, ) -> Result<()> { self._download_rt .block_on(self.async_download_file_from_external_storage( @@ -496,7 +496,7 @@ impl SstImporter { // TODO: pass a config to support hdfs let ext_storage = if cache_id.is_empty() { EXT_STORAGE_CACHE_COUNT.with_label_values(&["skip"]).inc(); - let s = external_storage_export::create_storage(backend, Default::default())?; + let s = external_storage::create_storage(backend, Default::default())?; Arc::from(s) } else { self.cached_storage.cached_or_create(cache_id, backend)? @@ -513,7 +513,7 @@ impl SstImporter { support_kms: bool, speed_limiter: &Limiter, cache_key: &str, - restore_config: external_storage_export::RestoreConfig, + restore_config: external_storage::RestoreConfig, ) -> Result<()> { let start_read = Instant::now(); if let Some(p) = dst_file.parent() { @@ -659,7 +659,7 @@ impl SstImporter { async fn exec_download( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { let start = Instant::now(); @@ -684,7 +684,7 @@ impl SstImporter { Some((meta.get_range_offset(), range_length)) } }; - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range, compression_type: Some(meta.get_compression_type()), expected_sha256, @@ -715,7 +715,7 @@ impl SstImporter { pub async fn do_read_kv_file( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { let start = Instant::now(); @@ -764,18 +764,16 @@ impl SstImporter { &self, ext_storage: Arc, support_kms: bool, - ) -> Arc { + ) -> Arc { // kv-files needn't are decrypted with KMS when download currently because these // files are not encrypted when log-backup. It is different from // sst-files because sst-files is encrypted when saved with rocksdb env // with KMS. to do: support KMS when log-backup and restore point. match (support_kms, self.key_manager.clone()) { - (true, Some(key_manager)) => { - Arc::new(external_storage_export::EncryptedExternalStorage { - key_manager, - storage: ext_storage, - }) - } + (true, Some(key_manager)) => Arc::new(external_storage::EncryptedExternalStorage { + key_manager, + storage: ext_storage, + }), _ => ext_storage, } } @@ -784,7 +782,7 @@ impl SstImporter { &self, file_length: u64, file_name: &str, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, restore_config: RestoreConfig, ) -> Result> { @@ -806,12 +804,12 @@ impl SstImporter { encrypt_wrap_reader(file_crypter, inner)? }; - let r = external_storage_export::read_external_storage_info_buff( + let r = external_storage::read_external_storage_info_buff( &mut reader, speed_limiter, file_length, expected_sha256, - external_storage_export::MIN_READ_SPEED, + external_storage::MIN_READ_SPEED, ) .await; let url = ext_storage.url()?.to_string(); @@ -828,7 +826,7 @@ impl SstImporter { pub async fn read_from_kv_file( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, backend: &StorageBackend, speed_limiter: &Limiter, ) -> Result> { @@ -893,7 +891,7 @@ impl SstImporter { } else { Some((offset, range_length)) }; - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range, compression_type: Some(meta.compression_type), expected_sha256, @@ -1124,7 +1122,7 @@ impl SstImporter { iv: meta.cipher_iv.to_owned(), }); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { file_crypter, ..Default::default() }; @@ -1500,7 +1498,7 @@ mod tests { collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; - use external_storage_export::read_external_storage_info_buff; + use external_storage::read_external_storage_info_buff; use file_system::File; use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; @@ -1707,7 +1705,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1755,7 +1753,7 @@ mod tests { kv_meta.set_length(len as _); kv_meta.set_sha256(sha256.finish().unwrap().to_vec()); - let backend = external_storage_export::make_local_backend(ext_dir.path()); + let backend = external_storage::make_local_backend(ext_dir.path()); Ok((ext_dir, backend, kv_meta, buff.buffer().to_vec())) } @@ -1824,7 +1822,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1870,7 +1868,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1904,7 +1902,7 @@ mod tests { hasher.update(data).unwrap(); let hash256 = hasher.finish().unwrap().to_vec(); - block_on_external_io(external_storage_export::read_external_storage_into_file( + block_on_external_io(external_storage::read_external_storage_into_file( &mut input, &mut output, &Limiter::new(f64::INFINITY), @@ -1922,7 +1920,7 @@ mod tests { let mut input = pending::>().into_async_read(); let mut output = Vec::new(); - let err = block_on_external_io(external_storage_export::read_external_storage_into_file( + let err = block_on_external_io(external_storage::read_external_storage_into_file( &mut input, &mut output, &Limiter::new(f64::INFINITY), @@ -2139,7 +2137,7 @@ mod tests { }; // test read all of the file. - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { expected_sha256: Some(kv_meta.get_sha256().to_vec()), ..Default::default() }; @@ -2162,7 +2160,7 @@ mod tests { // test read range of the file. let (offset, len) = (5, 16); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range: Some((offset, len)), ..Default::default() }; @@ -2250,7 +2248,7 @@ mod tests { // perform download file into .temp dir. let file_name = "sample.sst"; let path = importer.dir.get_import_path(file_name).unwrap(); - let restore_config = external_storage_export::RestoreConfig::default(); + let restore_config = external_storage::RestoreConfig::default(); importer .download_file_from_external_storage( meta.get_length(), @@ -2285,7 +2283,7 @@ mod tests { .unwrap(); let path = importer.dir.get_import_path(kv_meta.get_name()).unwrap(); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { expected_sha256: Some(kv_meta.get_sha256().to_vec()), ..Default::default() }; @@ -2747,7 +2745,7 @@ mod tests { let cfg = Config::default(); let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); let result = importer.download::( &meta, diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index ff7526172d51..4adfe3db51ea 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -4,7 +4,7 @@ use std::path::Path; use encryption::DataKeyManager; use engine_traits::EncryptionKeyManager; -use external_storage_export::ExternalStorage; +use external_storage::ExternalStorage; use file_system::File; use super::Result; diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index 59300f993e36..c13b3008df98 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -4,12 +4,6 @@ version = "0.0.1" edition = "2021" publish = false -[features] -default = ["cloud-aws", "cloud-gcp", "cloud-azure"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] - [dependencies] api_version = { workspace = true } backup = { workspace = true } @@ -17,7 +11,7 @@ collections = { workspace = true } concurrency_manager = { workspace = true } crc64fast = "0.1" engine_traits = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } futures = "0.3" futures-executor = "0.3" diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 3a5800e989bd..6c6eae961d7f 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -12,7 +12,7 @@ use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawVa use backup::Task; use collections::HashMap; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; -use external_storage_export::make_local_backend; +use external_storage::make_local_backend; use futures::{channel::mpsc as future_mpsc, executor::block_on}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{brpb::*, kvrpcpb::*, tikvpb::TikvClient}; diff --git a/tests/Cargo.toml b/tests/Cargo.toml index aa0c2c29decc..79f5439736dd 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -40,11 +40,8 @@ harness = false path = "benches/deadlock_detector/mod.rs" [features] -default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] testexport = ["raftstore/testexport", "tikv/testexport", "pd_client/testexport"] profiling = ["profiler/profiling"] @@ -130,7 +127,7 @@ criterion-cpu-time = "0.1" engine_rocks = { workspace = true } engine_test = { workspace = true } engine_traits = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } hyper = { version = "0.14", default-features = false, features = ["runtime"] } keys = { workspace = true } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index e51c9862e474..010d12177b64 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -46,7 +46,7 @@ fn test_download_sst_blocking_sst_writer() { // Now perform a proper download. let mut download = DownloadRequest::default(); download.set_sst(meta.clone()); - download.set_storage_backend(external_storage_export::make_local_backend(temp_dir.path())); + download.set_storage_backend(external_storage::make_local_backend(temp_dir.path())); download.set_name("test.sst".to_owned()); download.mut_sst().mut_range().set_start(vec![sst_range.1]); download diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 4cfd4be07be9..f89ef0c6faa3 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -3,7 +3,7 @@ use std::{fs::File, time::Duration}; use engine_traits::{CF_DEFAULT, CF_WRITE}; -use external_storage_export::{create_storage, make_local_backend}; +use external_storage::{create_storage, make_local_backend}; use file_system::calc_crc32_bytes; use futures::{executor::block_on, AsyncReadExt, StreamExt}; use kvproto::{ diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 3d8cf85b02cf..0b11a12002e9 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -1,5 +1,5 @@ use engine_traits::CF_DEFAULT; -use external_storage_export::LocalStorage; +use external_storage::LocalStorage; use kvproto::import_sstpb::ApplyRequest; use tempfile::TempDir; diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 6c56ab0018b5..2eb1c10c72d4 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -298,7 +298,7 @@ fn test_download_sst() { // Checks that downloading a non-existing storage returns error. let mut download = DownloadRequest::default(); download.set_sst(meta.clone()); - download.set_storage_backend(external_storage_export::make_local_backend(temp_dir.path())); + download.set_storage_backend(external_storage::make_local_backend(temp_dir.path())); download.set_name("missing.sst".to_owned()); let result = import.download(&download).unwrap(); diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index d8a11d50746b..96ebc071bbc8 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -10,7 +10,7 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; use engine_traits::CF_DEFAULT; -use external_storage_export::{ExternalStorage, UnpinReader}; +use external_storage::{ExternalStorage, UnpinReader}; use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{ From 6b0a515edcd6bffe362c05f82babe26fae51e99d Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 20 Oct 2023 12:54:29 -0500 Subject: [PATCH 101/203] server: make cpu and heap profiling can be running concurrently (#15761) close tikv/tikv#15760 Make cpu and heap profiling can be running concurrently. Beside, change the behavior of - "debug/pprof/heap_activate": do not dump heap profile periodically by default - "debug/pprof/heap": dump heap profile without any delay and use embedded jeprof script Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/jeprof.in | 5727 +++++++++++++++++++++++++++ src/server/status_server/mod.rs | 45 +- src/server/status_server/profile.rs | 185 +- 3 files changed, 5855 insertions(+), 102 deletions(-) create mode 100644 src/server/status_server/jeprof.in diff --git a/src/server/status_server/jeprof.in b/src/server/status_server/jeprof.in new file mode 100644 index 000000000000..cadf15d7d8e2 --- /dev/null +++ b/src/server/status_server/jeprof.in @@ -0,0 +1,5727 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/jeprof "program" "profile" +# Enters "interactive" mode +# +# % tools/jeprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/jeprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/jeprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list= pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm= pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; +use Cwd; + +my $JEPROF_VERSION = "unknown"; +my $PPROF_VERSION = "2.0"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the JEPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); +# These are used for dynamic profiles +my @URL_FETCHER = ("curl", "-s", "--fail"); + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +##### Argument parsing ##### + +sub usage_string { + return < + is a space separated list of profile names. +jeprof [options] + is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +jeprof [options] + is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/] - a location of a service to get profile from + + The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: + jeprof http://myserver.com:80$HEAP_PAGE + If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +jeprof --symbols + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base= Subtract from before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds= Length of time for dynamic profiles [default=30 secs] + --add_lib= Read additional symbols and line info from the given library + --lib_prefix=

Comma separated list of library path prefixes + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list= Generate source listing of matching routines + --disasm= Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized jeprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount= Show at most so many nodes [default=80] + --nodefraction= Hide nodes below *total [default=.005] + --edgefraction= Hide edges below *total [default=.001] + --maxdegree= Max incoming/outgoing edges per node [default=8] + --focus= Focus on backtraces with nodes matching + --thread= Show profile for thread + --ignore= Ignore backtraces with nodes matching + --scale= Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match + +Miscellaneous: + --tools=[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + --debug-syms-by-id (Linux only) Find debug symbol files by build ID as well as by name + +Environment Variables: + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames + +Examples: + +jeprof /bin/ls ls.prof + Enters "interactive" mode +jeprof --text /bin/ls ls.prof + Outputs one line per procedure +jeprof --web /bin/ls ls.prof + Displays annotated call-graph in web browser +jeprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +jeprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +jeprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +jeprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +jeprof http://localhost:1234/ + Enters "interactive" mode +jeprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return < \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + "debug-syms-by-id!" => \$main::opt_debug_syms_by_id, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_collapsed + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (@ARGV > 0) { + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_lib_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } + + # Flag to prevent us from trying over and over to use + # elfutils if it's not installed (used only with + # --debug-syms-by-id option). + $main::gave_up_on_elfutils = 0; +} + +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; + + # Get total data in profile + my $total = TotalProfile($profile); + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system(ShellEscape(@EVINCE, $fname) . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(jeprof) "; + $_ = ; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($2); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $total_count = shift; + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while () { + print $_; + } + close(SRC); + } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $line_limit = shift; + + my $total = TotalProfile($flat); + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">$filename"); + printf CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); + if (defined $6) { + printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); + printf CG ("calls=$count $callee_line\n"); + } + printf CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + + my $total = TotalProfile($flat); + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # : + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $list_opts +sub PrintListing { + my $total = shift; + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + my $html = shift; + + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("
%s
Total: %s %s
\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); + last; + } + $addr = AddressInc($addr); + } + } + } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; + + + +Pprof listing + + + + +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; + + +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $html = shift; + my $output = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return 0; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + my $first_indentation = -1; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("%s" . + ":%d", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } + + if ($html) { + printf $output ( + "

%s

%s\n
\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
+  if (!open(FILE, "<$filename")) {
+    print STDERR "$filename: $!\n";
+    return 0;
+  }
+  my $l = 0;
+  while () {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    $l++;
+    if ($l >= $firstline - 5 &&
+        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
+      chop;
+      my $text = $_;
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "" . $dis . "";
+        }
+        my $source_class = (($n1 + $n2 > 0)
+                            ? "livesrc"
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "%5d " .
+          "%6s %6s %s%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
+    };
+  }
+  close(FILE);
+  if ($html) {
+    print $output "
\n"; + } + return 1; +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while () { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); + if ($main::opt_gv) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; + } elsif ($main::opt_evince) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; + } elsif ($main::opt_ps) { + $output = "| $escaped_dot -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; + } elsif ($main::opt_gif) { + $output = "| $escaped_dot -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $local_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $local_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = ; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # + # + # ... + # + # + # + # Change it to + # + # + # $svg_javascript + # + # + # ... + # + # + # + + # Fix width, height; drop viewBox. + $svg =~ s/(?s) above first + my $svg_javascript = SvgJavascript(); + my $viewport = "\n"; + $svg =~ s/ above . + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; + +EOF +} + +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolNameInlineStack { + my $symbols = shift; + my $address = shift; + + my @stack = (); + + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-0]; + + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + if ($fn eq '??') { + # If we can't get the symbol name, at least use the file information. + $fn = $file; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@stack, $fn.$suffix); + } + } + else { + # If we can't get a symbol name, at least fill in the address. + push (@stack, $address); + } + + return @stack; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); + for (my $i = 1; $i <= $#address; $i++) { + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; + } + } + + return $calls; +} + +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + +sub PrintCollapsedStacks { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address ); + printf("%s %d\n", join(";", @names), $count); + } +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('@JEMALLOC_PREFIX@calloc', + 'cfree', + '@JEMALLOC_PREFIX@malloc', + 'je_malloc_default', + 'newImpl', + 'void* newImpl', + 'fallbackNewImpl', + 'void* fallbackNewImpl', + '@JEMALLOC_PREFIX@free', + '@JEMALLOC_PREFIX@memalign', + '@JEMALLOC_PREFIX@posix_memalign', + '@JEMALLOC_PREFIX@aligned_alloc', + 'pvalloc', + '@JEMALLOC_PREFIX@valloc', + '@JEMALLOC_PREFIX@realloc', + '@JEMALLOC_PREFIX@mallocx', + '@JEMALLOC_PREFIX@rallocx', + 'do_rallocx', + '@JEMALLOC_PREFIX@xallocx', + '@JEMALLOC_PREFIX@dallocx', + '@JEMALLOC_PREFIX@sdallocx', + '@JEMALLOC_PREFIX@sdallocx_noflags', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu') { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + splice @addrs, 1, 1; + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + # Throw away the portion of the backtrace seen so far, under the + # assumption that previous frames were for functions internal to the + # allocator. + @path = (); + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + + $result = FilterFrames($symbols, $result); + + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); + my $line = ; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = ShellEscape(@URL_FETCHER, $url); + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = ; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); + open(CMDLINE, "$command_line |") or error($command_line); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER. Returns a new list. +sub AddFetchTimeout { + my $timeout = shift; + my @fetcher = @_; + if (defined($timeout)) { + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); + } + } + return @fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); + } else { + $url = AppendSymbolFilterParams($url); + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } + } + + my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to jeprof, which jeprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = )) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{threads} Map of thread IDs to profile objects +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap/) { + $main::profile_type = 'heap'; + $result = ReadThreadedHeapProfile($prog, $fname, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + $pc--; + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * 4, 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub HeapProfileIndex { + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + return $index; +} + +sub ReadMappedLibraries { + my $fh = shift; + my $map = ""; + # Read the /proc/self/maps data + while (<$fh>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + return $map; +} + +sub ReadMemoryMap { + my $fh = shift; + my $map = ""; + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + return $map; +} + +sub AdjustSamples { + my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_; + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + return ($n1, $s1, $n2, $s2); +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = HeapProfileIndex(); + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 + # There are two pairs , the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # : [: ] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadThreadedHeapProfile { + my ($prog, $fname, $header) = @_; + + my $index = HeapProfileIndex(); + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + # Assuming a very specific type of header for now. + if ($header =~ m"^heap_v2/(\d+)") { + $type = "_v2"; + $sampling_algorithm = 2; + $sample_adjustment = int($1); + } + if ($type ne "_v2" || !defined($sample_adjustment)) { + die "Threaded heap profiles require v2 sampling with a sample rate\n"; + } + + my $profile = {}; + my $thread_profiles = {}; + my $pcs = {}; + my $map = ""; + my $stack = ""; + + while () { + s/\r//g; + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # @ a1 a2 ... an + # t*: : [: ] + # t1: : [: ] + # ... + # tn: : [: ] + s/^\s*//; + s/\s*$//; + if (m/^@\s+(.*)$/) { + $stack = $1; + } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) { + if ($stack eq "") { + # Still in the header, so this is just a per-thread summary. + next; + } + my $thread = $2; + my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + if ($thread eq "*") { + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } else { + if (!exists($thread_profiles->{$thread})) { + $thread_profiles->{$thread} = {}; + } + AddEntries($thread_profiles->{$thread}, $pcs, + FixCallerAddresses($stack), $counts[$index]); + } + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{threads} = $thread_profiles; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. +sub HexExtend { + my $addr = shift; + + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; + } + return ("0" x $zeros_needed) . $addr; +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + + if ($file !~ m|^/|) { + return undef; + } + + # Find debug symbol file if it's named after the library's name. + + if (-f "/usr/lib/debug$file") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; } + return "/usr/lib/debug$file"; + } elsif (-f "/usr/lib/debug$file.debug") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; } + return "/usr/lib/debug$file.debug"; + } + + if(!$main::opt_debug_syms_by_id) { + if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" }; + return undef; + } + + # Find debug file if it's named after the library's build ID. + + my $readelf = ''; + if (!$main::gave_up_on_elfutils) { + $readelf = qx/eu-readelf -n ${file}/; + if ($?) { + print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n"; + $main::gave_up_on_elfutils = 1; + return undef; + } + my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s; + if (defined $buildID && length $buildID > 0) { + my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug'; + if (-e $symbolFile) { + if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" }; + return $symbolFile; + } else { + if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" }; + return undef; + } + } + } + + if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" }; + return undef; +} + + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line () { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = Cwd::abs_path(shift); + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) { + # PIEs and address space randomization do not play well with our + # default assumption that main executable is at lowest + # addresses. So we're detecting main executable in + # /proc/self/maps as well. + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } + # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in + # function procfs_doprocmap (sys/fs/procfs/procfs_map.c) + # + # Example: + # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s + # o.1 NCH -1 + elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) { + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = FindLibrary($5); + + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; } + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Use debug library if it exists + my $debug_libname = DebuggingLibrary($libname); + if ($debug_libname) { + $libname = $debug_libname; + } + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat", $main::tmpfile_sym); + print("----\n"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); + print("----\n"); + } + + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while () { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = ; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template parameters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, so this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) JEPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as jeprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# JEPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $JEPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # :. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$JEPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # jeprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/jeprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " jeprof \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $escaped_nm_command = shift; # shell-escaped + my $regexp = shift; + + my $symbol_table = {}; + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template parameters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurrence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) { + # In this mode, we do "nm --demangle " + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { + # In this mode, we do "nm | c++filt" + $cppfilt_flag = " | " . ShellEscape($cppfilt); + }; + my $flatten_flag = ""; + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + # 6nm is for Go binaries + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by jeprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} \ No newline at end of file diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 3e68b0b6310d..b76454ffab86 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,11 +40,7 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; -pub use profile::{ - activate_heap_profile, deactivate_heap_profile, heap_profiles_dir, jeprof_heap_profile, - list_heap_profiles, read_file, start_one_cpu_profile, start_one_heap_profile, - HEAP_PROFILE_REGEX, -}; +use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; use resource_control::ResourceGroupManager; @@ -170,16 +166,22 @@ where Ok(val) => val, Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), }, - None => 60, + None => 0, }; - let interval = Duration::from_secs(interval); - let period = GLOBAL_TIMER_HANDLE - .interval(Instant::now() + interval, interval) - .compat() - .map_ok(|_| ()) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_stream(); + let period = if interval == 0 { + None + } else { + let interval = Duration::from_secs(interval); + Some( + GLOBAL_TIMER_HANDLE + .interval(Instant::now() + interval, interval) + .compat() + .map_ok(|_| ()) + .map_err(|_| TIMER_CANCELED.to_owned()) + .into_stream(), + ) + }; let (tx, rx) = oneshot::channel(); let callback = move || tx.send(()).unwrap_or_default(); let res = Handle::current().spawn(activate_heap_profile(period, store_path, callback)); @@ -201,7 +203,6 @@ where Ok(make_response(StatusCode::OK, body)) } - #[allow(dead_code)] async fn dump_heap_prof_to_resp(req: Request) -> hyper::Result> { let query = req.uri().query().unwrap_or(""); let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); @@ -239,21 +240,7 @@ where return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); } } else { - let mut seconds = 10; - if let Some(s) = query_pairs.get("seconds") { - match s.parse() { - Ok(val) => seconds = val, - Err(_) => { - let errmsg = "request should have seconds argument".to_owned(); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); - } - } - } - let timer = GLOBAL_TIMER_HANDLE.delay(Instant::now() + Duration::from_secs(seconds)); - let end = Compat01As03::new(timer) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_future(); - start_one_heap_profile(end, use_jeprof).await + dump_one_heap_profile() }; match result { diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index dd49c394046b..3941c6c12b67 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -1,11 +1,11 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ fs::{File, Metadata}, - io::Read, + io::{Read, Write}, path::PathBuf, pin::Pin, - process::Command, - sync::Mutex as StdMutex, + process::{Command, Stdio}, + sync::Mutex, time::{Duration, UNIX_EPOCH}, }; @@ -23,7 +23,6 @@ use regex::Regex; use tempfile::{NamedTempFile, TempDir}; #[cfg(not(test))] use tikv_alloc::{activate_prof, deactivate_prof, dump_prof}; -use tokio::sync::{Mutex, MutexGuard}; #[cfg(test)] pub use self::test_utils::TEST_PROFILE_MUTEX; @@ -35,10 +34,10 @@ pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; lazy_static! { - // If it's locked it means there are already a heap or CPU profiling. - static ref PROFILE_MUTEX: Mutex<()> = Mutex::new(()); - // The channel is used to deactivate a profiling. - static ref PROFILE_ACTIVE: StdMutex, TempDir)>> = StdMutex::new(None); + // If it's some it means there are already a CPU profiling. + static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); + // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. + static ref HEAP_PROFILE_ACTIVE: Mutex>, TempDir)>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -48,32 +47,26 @@ lazy_static! { type OnEndFn = Box Result + Send + 'static>; -struct ProfileGuard<'a, I, T> { - _guard: MutexGuard<'a, ()>, +struct ProfileRunner { item: Option, on_end: Option>, end: BoxFuture<'static, Result<(), String>>, } -impl<'a, I, T> Unpin for ProfileGuard<'a, I, T> {} +impl Unpin for ProfileRunner {} -impl<'a, I, T> ProfileGuard<'a, I, T> { +impl ProfileRunner { fn new( on_start: F1, on_end: F2, end: BoxFuture<'static, Result<(), String>>, - ) -> Result, String> + ) -> Result where F1: FnOnce() -> Result, F2: FnOnce(I) -> Result + Send + 'static, { - let _guard = match PROFILE_MUTEX.try_lock() { - Ok(guard) => guard, - _ => return Err("Already in Profiling".to_owned()), - }; let item = on_start()?; - Ok(ProfileGuard { - _guard, + Ok(ProfileRunner { item: Some(item), on_end: Some(Box::new(on_end) as OnEndFn), end, @@ -81,7 +74,7 @@ impl<'a, I, T> ProfileGuard<'a, I, T> { } } -impl<'a, I, T> Future for ProfileGuard<'a, I, T> { +impl Future for ProfileRunner { type Output = Result; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { match self.end.as_mut().poll(cx) { @@ -99,34 +92,22 @@ impl<'a, I, T> Future for ProfileGuard<'a, I, T> { } } -/// Trigger a heap profie and return the content. -#[allow(dead_code)] -pub async fn start_one_heap_profile(end: F, use_jeprof: bool) -> Result, String> -where - F: Future> + Send + 'static, -{ - let on_start = || activate_prof().map_err(|e| format!("activate_prof: {}", e)); - - let on_end = move |_| { - deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e))?; - let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; - let path = f.path().to_str().unwrap(); - dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; - if use_jeprof { - jeprof_heap_profile(path) - } else { - read_file(path) - } - }; - - ProfileGuard::new(on_start, on_end, end.boxed())?.await +/// Trigger a heap profile and return the content. +pub fn dump_one_heap_profile() -> Result, String> { + if HEAP_PROFILE_ACTIVE.lock().unwrap().is_none() { + return Err("heap profiling is not activated".to_owned()); + } + let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; + let path = f.path().to_str().unwrap(); + dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; + read_file(path) } /// Activate heap profile and call `callback` if successfully. /// `deactivate_heap_profile` can only be called after it's notified from /// `callback`. pub async fn activate_heap_profile( - dump_period: S, + dump_period: Option, store_path: PathBuf, callback: F, ) -> Result<(), String> @@ -134,6 +115,10 @@ where S: Stream> + Send + Unpin + 'static, F: FnOnce() + Send + 'static, { + if HEAP_PROFILE_ACTIVE.lock().unwrap().is_some() { + return Err("Already in Heap Profiling".to_owned()); + } + let (tx, rx) = oneshot::channel(); let dir = tempfile::Builder::new() .prefix("heap-") @@ -142,40 +127,55 @@ where let dir_path = dir.path().to_str().unwrap().to_owned(); let on_start = move || { - let mut activate = PROFILE_ACTIVE.lock().unwrap(); + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); assert!(activate.is_none()); + *activate = Some((Some(tx), dir)); activate_prof().map_err(|e| format!("activate_prof: {}", e))?; - *activate = Some((tx, dir)); callback(); info!("periodical heap profiling is started"); Ok(()) }; let on_end = |_| { - deactivate_heap_profile(); - deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)) + let res = deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)); + *HEAP_PROFILE_ACTIVE.lock().unwrap() = None; + res }; let end = async move { - select! { - _ = rx.fuse() => { - info!("periodical heap profiling is canceled"); - Ok(()) - }, - res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { - warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); - res + if let Some(dump_period) = dump_period { + select! { + _ = rx.fuse() => { + info!("periodical heap profiling is canceled"); + Ok(()) + }, + res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { + warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); + res + } } + } else { + let _ = rx.await; + info!("periodical heap profiling is canceled"); + Ok(()) } }; - ProfileGuard::new(on_start, on_end, end.boxed())?.await + ProfileRunner::new(on_start, on_end, end.boxed())?.await } /// Deactivate heap profile. Return `false` if it hasn't been activated. pub fn deactivate_heap_profile() -> bool { - let mut activate = PROFILE_ACTIVE.lock().unwrap(); - activate.take().is_some() + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + match activate.as_mut() { + Some((tx, _)) => { + if let Some(tx) = tx.take() { + let _ = tx.send(()); + } + true + } + None => false, + } } /// Trigger one cpu profile. @@ -187,7 +187,14 @@ pub async fn start_one_cpu_profile( where F: Future> + Send + 'static, { + if CPU_PROFILE_ACTIVE.lock().unwrap().is_some() { + return Err("Already in CPU Profiling".to_owned()); + } + let on_start = || { + let mut activate = CPU_PROFILE_ACTIVE.lock().unwrap(); + assert!(activate.is_none()); + *activate = Some(()); let guard = pprof::ProfilerGuardBuilder::default() .frequency(frequency) .blocklist(&["libc", "libgcc", "pthread", "vdso"]) @@ -218,10 +225,13 @@ where .flamegraph(&mut body) .map_err(|e| format!("generate flamegraph from report fail: {}", e))?; } + drop(guard); + *CPU_PROFILE_ACTIVE.lock().unwrap() = None; + Ok(body) }; - ProfileGuard::new(on_start, on_end, end.boxed())?.await + ProfileRunner::new(on_start, on_end, end.boxed())?.await } pub fn read_file(path: &str) -> Result, String> { @@ -234,9 +244,26 @@ pub fn read_file(path: &str) -> Result, String> { pub fn jeprof_heap_profile(path: &str) -> Result, String> { info!("using jeprof to process {}", path); - let output = Command::new("./jeprof") - .args(["--show_bytes", "./bin/tikv-server", path, "--svg"]) - .output() + let bin = std::env::current_exe().map_err(|e| format!("get current exe path fail: {}", e))?; + let mut jeprof = Command::new("perl") + .args([ + "/dev/stdin", + "--show_bytes", + &bin.as_os_str().to_string_lossy(), + path, + "--svg", + ]) + .stdin(Stdio::piped()) + .spawn() + .map_err(|e| format!("spawn jeprof fail: {}", e))?; + jeprof + .stdin + .take() + .unwrap() + .write_all(include_bytes!("jeprof.in")) + .unwrap(); + let output = jeprof + .wait_with_output() .map_err(|e| format!("jeprof: {}", e))?; if !output.status.success() { let stderr = std::str::from_utf8(&output.stderr).unwrap_or("invalid utf8"); @@ -246,7 +273,7 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { } pub fn heap_profiles_dir() -> Option { - PROFILE_ACTIVE + HEAP_PROFILE_ACTIVE .lock() .unwrap() .as_ref() @@ -381,7 +408,7 @@ mod tests { .build() .unwrap(); - let expected = "Already in Profiling"; + let expected = "Already in CPU Profiling"; let (tx1, rx1) = oneshot::channel(); let rx1 = rx1.map_err(|_| "channel canceled".to_owned()); @@ -393,17 +420,29 @@ mod tests { let res2 = rt.spawn(start_one_cpu_profile(rx2, 99, false)); assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); - let (_tx2, rx2) = oneshot::channel(); - let rx2 = rx2.map_err(|_| "channel canceled".to_owned()); - let res2 = rt.spawn(start_one_heap_profile(rx2, false)); - assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); + drop(tx1); + block_on(res1).unwrap().unwrap_err(); + + let expected = "Already in Heap Profiling"; + + let (tx1, rx1) = mpsc::channel(1); + let res1 = rt.spawn(activate_heap_profile( + Some(rx1), + std::env::temp_dir(), + || {}, + )); + thread::sleep(Duration::from_millis(100)); let (_tx2, rx2) = mpsc::channel(1); - let res2 = rt.spawn(activate_heap_profile(rx2, std::env::temp_dir(), || {})); + let res2 = rt.spawn(activate_heap_profile( + Some(rx2), + std::env::temp_dir(), + || {}, + )); assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); drop(tx1); - block_on(res1).unwrap().unwrap_err(); + block_on(res1).unwrap().unwrap(); } #[test] @@ -416,7 +455,7 @@ mod tests { // Test activated profiling can be stopped by canceling the period stream. let (tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); + let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); drop(tx); block_on(res).unwrap().unwrap(); @@ -427,7 +466,7 @@ mod tests { let (_tx, _rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile( - _rx, + Some(_rx), std::env::temp_dir(), on_activated, )); @@ -446,7 +485,7 @@ mod tests { // Test heap profiling can be stopped by sending an error. let (mut tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); + let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); block_on(tx.send(Err("test".to_string()))).unwrap(); block_on(res).unwrap().unwrap_err(); @@ -457,7 +496,7 @@ mod tests { let (_tx, _rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile( - _rx, + Some(_rx), std::env::temp_dir(), on_activated, )); From 19e1d949eb3521d88ed80d100738576fa0b2570c Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 23 Oct 2023 14:12:30 +0800 Subject: [PATCH 102/203] raftstore: fix an OOM issue by paginate scan unapplied config changes (#15806) close tikv/tikv#15770 Before start election, raft-rs has to check if there is any unapplied conf change entry. In the current implementation, this needs to scan logs from [unapplied_index, committed_index]. It essentially takes unbounded memory when raft peers that has many unapplied logs. To fix the issue, TiKV can paginate scan raft log which has a fixed memory usage upper bound. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f222631d7723..ff8db7319241 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4259,7 +4259,7 @@ dependencies = [ [[package]] name = "raft" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" +source = "git+https://github.com/tikv/raft-rs?branch=master#f60fb9e143e5b93f7db8917ea376cda04effcbb4" dependencies = [ "bytes", "fxhash", @@ -4318,7 +4318,7 @@ dependencies = [ [[package]] name = "raft-proto" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" +source = "git+https://github.com/tikv/raft-rs?branch=master#f60fb9e143e5b93f7db8917ea376cda04effcbb4" dependencies = [ "bytes", "protobuf", From 6075c3a00da1ea4bf9c2f1cc9d802ec44dfa0df5 Mon Sep 17 00:00:00 2001 From: TonsnakeLin <87681388+TonsnakeLin@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:05:02 +0800 Subject: [PATCH 103/203] raftstore: calculate group id for every peer when it is syn-recover status (#15785) close tikv/tikv#15784 Signed-off-by: TonsnakeLin Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 35 ++++---- components/test_pd_client/src/pd.rs | 22 ++++- .../raftstore/test_replication_mode.rs | 86 +++++++++++++++++-- 3 files changed, 119 insertions(+), 24 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index e72d32f8e914..cab88a26585e 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -975,29 +975,32 @@ where pub fn switch_replication_mode(&mut self, state: &Mutex) { self.replication_sync = false; let guard = state.lock().unwrap(); - let enable_group_commit = if guard.status().get_mode() == ReplicationMode::Majority { - self.replication_mode_version = 0; - self.dr_auto_sync_state = DrAutoSyncState::Async; - false - } else { - self.dr_auto_sync_state = guard.status().get_dr_auto_sync().get_state(); - self.replication_mode_version = guard.status().get_dr_auto_sync().state_id; - match guard.status().get_dr_auto_sync().get_state() { - // SyncRecover will enable group commit after it catches up logs. - DrAutoSyncState::Async | DrAutoSyncState::SyncRecover => false, - _ => true, - } - }; + let (enable_group_commit, calculate_group_id) = + if guard.status().get_mode() == ReplicationMode::Majority { + self.replication_mode_version = 0; + self.dr_auto_sync_state = DrAutoSyncState::Async; + (false, false) + } else { + self.dr_auto_sync_state = guard.status().get_dr_auto_sync().get_state(); + self.replication_mode_version = guard.status().get_dr_auto_sync().state_id; + match guard.status().get_dr_auto_sync().get_state() { + // SyncRecover will enable group commit after it catches up logs. + DrAutoSyncState::Async => (false, false), + DrAutoSyncState::SyncRecover => (false, true), + _ => (true, true), + } + }; drop(guard); - self.switch_group_commit(enable_group_commit, state); + self.switch_group_commit(enable_group_commit, calculate_group_id, state); } fn switch_group_commit( &mut self, enable_group_commit: bool, + calculate_group_id: bool, state: &Mutex, ) { - if enable_group_commit { + if enable_group_commit || calculate_group_id { let mut guard = state.lock().unwrap(); let ids = mem::replace( guard.calculate_commit_group( @@ -5141,7 +5144,7 @@ where // should enable group commit to promise `IntegrityOverLabel`. then safe // to switch to the `Sync` phase. if self.dr_auto_sync_state == DrAutoSyncState::SyncRecover { - self.switch_group_commit(true, &ctx.global_replication_state) + self.switch_group_commit(true, true, &ctx.global_replication_state) } self.replication_sync = true; } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index a9141bf62992..7f00cf35ccd1 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1437,15 +1437,33 @@ impl TestPdClient { cluster.replication_status = Some(status); } - pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { + pub fn switch_replication_mode( + &self, + state: Option, + available_stores: Vec, + ) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); + if state.is_none() { + status.set_mode(ReplicationMode::Majority); + let mut dr = status.mut_dr_auto_sync(); + dr.state_id += 1; + return; + } let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; - dr.set_state(state); + dr.set_state(state.unwrap()); dr.available_stores = available_stores; } + pub fn switch_to_drautosync_mode(&self) { + let mut cluster = self.cluster.wl(); + let status = cluster.replication_status.as_mut().unwrap(); + status.set_mode(ReplicationMode::DrAutoSync); + let mut dr = status.mut_dr_auto_sync(); + dr.state_id += 1; + } + pub fn region_replication_status(&self, region_id: u64) -> RegionReplicationStatus { self.cluster .rl() diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 367ac63aabbe..38054c1a9958 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -34,6 +34,18 @@ fn run_cluster(cluster: &mut Cluster) { cluster.must_put(b"k1", b"v0"); } +fn prepare_labels(cluster: &mut Cluster) { + cluster.add_label(1, "dc", "dc1"); + cluster.add_label(2, "dc", "dc1"); + cluster.add_label(3, "dc", "dc2"); + cluster.add_label(1, "zone", "z1"); + cluster.add_label(2, "zone", "z2"); + cluster.add_label(3, "zone", "z3"); + cluster.add_label(1, "host", "h1"); + cluster.add_label(2, "host", "h2"); + cluster.add_label(3, "host", "h3"); +} + /// When using DrAutoSync replication mode, data should be replicated to /// different labels before committed. #[test] @@ -119,7 +131,7 @@ fn test_sync_recover_after_apply_snapshot() { // swith to async cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); thread::sleep(Duration::from_millis(100)); @@ -136,7 +148,7 @@ fn test_sync_recover_after_apply_snapshot() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::SyncRecover, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); // Add node 3 back, snapshot will apply cluster.clear_send_filters(); @@ -265,7 +277,7 @@ fn test_switching_replication_mode() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); thread::sleep(Duration::from_millis(100)); @@ -275,7 +287,7 @@ fn test_switching_replication_mode() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::SyncRecover, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); let mut request = new_request( region.get_id(), @@ -331,7 +343,7 @@ fn test_replication_mode_allowlist() { run_cluster(&mut cluster); cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![1]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![1]); thread::sleep(Duration::from_millis(100)); // 2,3 are paused, so it should not be able to write. @@ -357,7 +369,7 @@ fn test_replication_mode_allowlist() { // clear allowlist. cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } @@ -456,6 +468,68 @@ fn test_migrate_replication_mode() { assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); } +#[test] +fn test_migrate_majority_to_drautosync() { + // 1. start cluster, enable dr-auto-sync and set labels. + let mut cluster = new_server_cluster(0, 3); + cluster.pd_client.disable_default_operator(); + cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_log_gc_threshold = 10; + prepare_labels(&mut cluster); + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(b"k1", b"v0"); + cluster.pd_client.configure_dr_auto_sync("dc"); + thread::sleep(Duration::from_millis(100)); + let region = cluster.get_region(b"k1"); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_put_cf_cmd("default", b"k2", b"v2")], + false, + ); + request.mut_header().set_peer(new_peer(1, 1)); + let (cb, mut rx) = make_cb(&request); + cluster + .sim + .rl() + .async_command_on_node(1, request, cb) + .unwrap(); + assert_eq!(rx.recv_timeout(Duration::from_millis(100)).is_ok(), true); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); + let state = cluster.pd_client.region_replication_status(region.get_id()); + assert_eq!(state.state_id, 1); + assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); + + // 2. swith to marjority mode. + cluster.pd_client.switch_replication_mode(None, vec![]); + thread::sleep(Duration::from_millis(150)); + + // 3. spilt the region and make a new region, the regions status must be + // SimpleMajority. + cluster.must_split(®ion, b"m1"); + thread::sleep(Duration::from_millis(150)); + cluster.must_put(b"n4", b"v4"); + must_get_equal(&cluster.get_engine(1), b"n4", b"v4"); + let region_m = cluster.get_region(b"n4"); + let region_k = cluster.get_region(b"k1"); + + // 4. switch to dy-auto-sync mode, the new region generated at marjority mode + // becomes IntegrityOverLabel again. + cluster.pd_client.switch_to_drautosync_mode(); + thread::sleep(Duration::from_millis(100)); + let state_m = cluster + .pd_client + .region_replication_status(region_m.get_id()); + let state_k = cluster + .pd_client + .region_replication_status(region_k.get_id()); + assert_eq!(state_m.state_id, 3); + assert_eq!(state_m.state, RegionReplicationState::IntegrityOverLabel); + assert_eq!(state_k.state_id, 3); + assert_eq!(state_k.state, RegionReplicationState::IntegrityOverLabel); +} + /// Tests if labels are loaded correctly after rolling start. #[test] fn test_loading_label_after_rolling_start() { From 22364f2c3e936b5c3325ea724573559b12013688 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Mon, 23 Oct 2023 16:38:02 +0800 Subject: [PATCH 104/203] copr: fix cast_bytes_like_as_duration encoding failed (#15819) close tikv/tikv#15820 fix cast_bytes_like_as_duration encoding failed Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tidb_query_expr/src/impl_cast.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 76e90f79c5bf..16e33e71d134 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1038,10 +1038,10 @@ fn cast_bytes_like_as_duration( val: &[u8], overflow_as_null: bool, ) -> Result> { - let val = std::str::from_utf8(val).map_err(Error::Encoding)?; + let val = String::from_utf8_lossy(val); let result = Duration::parse_consider_overflow( ctx, - val, + &val, extra.ret_field_type.get_decimal() as i8, overflow_as_null, ); @@ -6450,6 +6450,7 @@ mod tests { b"-17:51:04.78", b"17:51:04.78", b"-17:51:04.78", + b"\x92\x6b", ]; test_as_duration_helper( From 9a0504d89ac33e87538ef8781a4871795c9a3ba4 Mon Sep 17 00:00:00 2001 From: lijie Date: Mon, 23 Oct 2023 20:00:31 +0800 Subject: [PATCH 105/203] chore: bump version to 7.6.0-alpha (#15810) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff8db7319241..52408df1ab2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6469,7 +6469,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.5.0-alpha" +version = "7.6.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 32b2d858b6a6..edebbc46f1dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.5.0-alpha" +version = "7.6.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From e254b8ad44b4d346d71bd2241776035a6c674c6d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 24 Oct 2023 00:56:02 +0800 Subject: [PATCH 106/203] metrics: Add extra necessary metrics for monitoring SlowTrend performance. (#15797) ref tikv/tikv#15271 This pr supply extra necessary metrics for `SlowTrend`, which is useful for users to monitor the performance of `SlowTrend`, including: 1. Slow Trend, shows the changing rate of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend. 2. AVG Sampling Latency, records the sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store. Only when one of TiKV node has been marked with **Slow Trend**, and its severity reaches the limitation, it will be chosen as the slow node. 3. QPS of each store, store slow trend result, showing the QPS of each store. 4. QPS Changing Rate, shows the changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.json | 392 +++++++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f2654ba3da13..fced6f6bab45 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1580,7 +1580,8 @@ "intervalFactor": 2, "legendFormat": "{{instance}} - buckets", "refId": "B", - "step": 10 + "step": 10, + "hide": true } ], "thresholds": [], @@ -14520,7 +14521,7 @@ "defaults": {}, "overrides": [] }, - "gridPos": { + "gridPos": { "h": 7, "w": 12, "x": 0, @@ -48363,6 +48364,393 @@ ], "title": "Backup Log", "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 24763573238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 53 + }, + "hiddenSeries": false, + "id": 24763574116, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Slow Trend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 53 + }, + "hiddenSeries": false, + "id": 24763574117, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "QPS Changing Trend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 60 + }, + "hiddenSeries": false, + "id": 24763574115, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [ + { + "value": 275000, + "colorMode": "critical", + "op": "gt", + "fill": false, + "line": true, + "visible": true, + "yaxis": "left" + } + ], + "timeRegions": [], + "title": "AVG Sampling Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The QPS of each store.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 60 + }, + "hiddenSeries": false, + "id": 24763573970, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_result_value{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "QPS of each store", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Slow Trend Statistics", + "type": "row" } ], "refresh": "1m", From f3bfe13c6c479fa209de308684dd7e595631527e Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 25 Oct 2023 17:50:03 +0800 Subject: [PATCH 107/203] grafana: Fix wrong scheduler command variables of grafana in cloud env (#15833) close tikv/tikv#15832 Fix wrong scheduler command variables of grafana in cloud env by adding a `\b` to regex to make sure it's at the word boundary. Signed-off-by: Connor1996 --- metrics/grafana/tikv_details.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index fced6f6bab45..4a72d3c204a9 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -48858,7 +48858,7 @@ "refId": "StandardVariableQuery" }, "refresh": 1, - "regex": "/type=\"([^\"]+)\"/", + "regex": "/\btype=\"([^\"]+)\"/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", From c6adb042c9a05fb0051bc34d11fb5d4ab5273033 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 26 Oct 2023 11:32:03 +0800 Subject: [PATCH 108/203] raftstore: Fix group commit is mistakenly enabled in sync recover state (#15830) close tikv/tikv#15817 When splitting a region, group commit is mistakenly enabled in the sync-recover state. If the region is in joint state and demoting voter is down, the commit condition can't meet. Fix group commit is mistakenly enabled in sync recover state Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 5 +- components/test_pd_client/src/pd.rs | 9 +-- .../raftstore/test_replication_mode.rs | 71 +++++++++++++++++-- 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index cab88a26585e..abe14bf7c2a9 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -966,7 +966,10 @@ where return; } self.replication_mode_version = state.status().get_dr_auto_sync().state_id; - let enable = state.status().get_dr_auto_sync().get_state() != DrAutoSyncState::Async; + let enable = !matches!( + state.status().get_dr_auto_sync().get_state(), + DrAutoSyncState::Async | DrAutoSyncState::SyncRecover + ); self.raft_group.raft.enable_group_commit(enable); self.dr_auto_sync_state = state.status().get_dr_auto_sync().get_state(); } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 7f00cf35ccd1..341495cdb52e 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1450,20 +1450,13 @@ impl TestPdClient { dr.state_id += 1; return; } + status.set_mode(ReplicationMode::DrAutoSync); let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state.unwrap()); dr.available_stores = available_stores; } - pub fn switch_to_drautosync_mode(&self) { - let mut cluster = self.cluster.wl(); - let status = cluster.replication_status.as_mut().unwrap(); - status.set_mode(ReplicationMode::DrAutoSync); - let mut dr = status.mut_dr_auto_sync(); - dr.state_id += 1; - } - pub fn region_replication_status(&self, region_id: u64) -> RegionReplicationStatus { self.cluster .rl() diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 38054c1a9958..76059fa8f876 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, thread, time::Duration}; +use std::{iter::FromIterator, sync::Arc, thread, time::Duration}; use kvproto::replication_modepb::*; use pd_client::PdClient; @@ -99,6 +99,67 @@ fn test_dr_auto_sync() { assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); } +// When in sync recover state, and the region is in joint state. The leave joint +// state should be committed successfully. +#[test] +fn test_sync_recover_joint_state() { + let mut cluster = new_server_cluster(0, 5); + cluster.pd_client.disable_default_operator(); + cluster.pd_client.configure_dr_auto_sync("zone"); + cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_log_gc_threshold = 1; + cluster.add_label(1, "zone", "ES"); + cluster.add_label(2, "zone", "ES"); + cluster.add_label(3, "zone", "ES"); + cluster.add_label(4, "zone", "WS"); // old dr + cluster.add_label(5, "zone", "WS"); // new dr + + let pd_client = Arc::clone(&cluster.pd_client); + let region_id = cluster.run_conf_change(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 5); + cluster.must_put(b"k1", b"v1"); + + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.must_add_peer(region_id, new_peer(4, 4)); + pd_client.must_add_peer(region_id, new_learner_peer(5, 5)); + + // Make one node down + cluster.stop_node(4); + + // Switch to sync recover + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); + + cluster.must_put(b"k2", b"v2"); + assert_eq!(cluster.must_get(b"k2").unwrap(), b"v2"); + + // Enter joint, now we have C_old(1, 2, 3, 4) and C_new(1, 2, 3, 5) + pd_client.must_joint_confchange( + region_id, + vec![ + (ConfChangeType::AddLearnerNode, new_learner_peer(4, 4)), + (ConfChangeType::AddNode, new_peer(5, 5)), + ], + ); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + assert_ne!(left.get_id(), right.get_id()); + + // Leave joint + pd_client.must_leave_joint(left.get_id()); + pd_client.must_leave_joint(right.get_id()); +} + #[test] fn test_sync_recover_after_apply_snapshot() { let mut cluster = prepare_cluster(); @@ -501,7 +562,7 @@ fn test_migrate_majority_to_drautosync() { assert_eq!(state.state_id, 1); assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); - // 2. swith to marjority mode. + // 2. switch to majority mode. cluster.pd_client.switch_replication_mode(None, vec![]); thread::sleep(Duration::from_millis(150)); @@ -514,9 +575,11 @@ fn test_migrate_majority_to_drautosync() { let region_m = cluster.get_region(b"n4"); let region_k = cluster.get_region(b"k1"); - // 4. switch to dy-auto-sync mode, the new region generated at marjority mode + // 4. switch to dy-auto-sync mode, the new region generated at majority mode // becomes IntegrityOverLabel again. - cluster.pd_client.switch_to_drautosync_mode(); + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); let state_m = cluster .pd_client From 12c2cf10982979e2f47b37c1561f1e02ade62526 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Thu, 26 Oct 2023 12:39:33 +0800 Subject: [PATCH 109/203] raftstore: improve the bucket split strategy (#15798) close tikv/tikv#13671 there are three reason may cause the bucket not split: 1. split check tick will refresh bucket info even info the bucket version not change 2. the suspect buckets only conside the increment flow 3. all the bucket increment flows are reset if one bucket is updated. To solve this, bucket stats only record the increment flow and reset it after meta size updated. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/bucket.rs | 391 +---------------- components/raftstore-v2/src/operation/mod.rs | 1 - components/raftstore-v2/src/raft/peer.rs | 11 +- components/raftstore/src/store/fsm/apply.rs | 6 +- components/raftstore/src/store/fsm/peer.rs | 211 +++------- components/raftstore/src/store/mod.rs | 18 +- components/raftstore/src/store/peer.rs | 37 +- components/raftstore/src/store/worker/mod.rs | 3 +- components/raftstore/src/store/worker/read.rs | 6 +- .../raftstore/src/store/worker/split_check.rs | 396 +++++++++++++++++- .../raftstore/test_split_region.rs | 13 +- 11 files changed, 508 insertions(+), 585 deletions(-) diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 242b9a9b33ba..920a4e68e8c8 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -6,12 +6,12 @@ use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - metapb::{self, RegionEpoch}, + metapb::RegionEpoch, raft_serverpb::{ExtraMessageType, RaftMessage, RefreshBuckets}, }; -use pd_client::{BucketMeta, BucketStat}; +use pd_client::BucketMeta; use raftstore::{ - coprocessor::{Config, RegionChangeEvent}, + coprocessor::RegionChangeEvent, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, }; use slog::{error, info}; @@ -24,213 +24,6 @@ use crate::{ worker::pd, }; -#[derive(Debug, Clone, Default)] -pub struct BucketStatsInfo { - // the stats is increment flow. - bucket_stat: Option, - // the report bucket stat records the increment stats after last report pd. - // it will be reset after report pd. - report_bucket_stat: Option, - // avoid the version roll back, it record the last bucket version if bucket stat isn't none. - last_bucket_version: u64, -} - -impl BucketStatsInfo { - /// returns all bucket ranges those's write_bytes exceed the given - /// diff_size_threshold. - pub fn gen_bucket_range_for_update( - &self, - region_bucket_max_size: u64, - ) -> Option> { - let region_buckets = self.bucket_stat.as_ref()?; - let stats = ®ion_buckets.stats; - let keys = ®ion_buckets.meta.keys; - let sizes = ®ion_buckets.meta.sizes; - - let mut suspect_bucket_ranges = vec![]; - assert_eq!(keys.len(), stats.write_bytes.len() + 1); - for i in 0..stats.write_bytes.len() { - let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; - if estimated_bucket_size >= region_bucket_max_size { - suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); - } - } - Some(suspect_bucket_ranges) - } - - #[inline] - pub fn version(&self) -> u64 { - self.bucket_stat - .as_ref() - .map(|b| b.meta.version) - .or(Some(self.last_bucket_version)) - .unwrap_or_default() - } - - #[inline] - pub fn add_bucket_flow(&mut self, delta: &Option) { - if let (Some(buckets), Some(report_buckets), Some(delta)) = ( - self.bucket_stat.as_mut(), - self.report_bucket_stat.as_mut(), - delta, - ) { - buckets.merge(delta); - report_buckets.merge(delta); - } - } - - #[inline] - pub fn set_bucket_stat(&mut self, buckets: Option) { - self.bucket_stat = buckets.clone(); - if let Some(new_buckets) = buckets { - self.last_bucket_version = new_buckets.meta.version; - let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); - if let Some(old) = &mut self.report_bucket_stat { - new_report_buckets.merge(old); - *old = new_report_buckets; - } else { - self.report_bucket_stat = Some(new_report_buckets); - } - } else { - self.report_bucket_stat = None; - } - } - - #[inline] - pub fn report_bucket_stat(&mut self) -> BucketStat { - let current = self.report_bucket_stat.as_mut().unwrap(); - let delta = current.clone(); - current.clear_stats(); - delta - } - - #[inline] - pub fn bucket_stat(&self) -> &Option { - &self.bucket_stat - } - - pub fn on_refresh_region_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - buckets: Vec, - region_epoch: RegionEpoch, - region: metapb::Region, - bucket_ranges: Option>, - ) -> bool { - let change_bucket_version: bool; - // The region buckets reset after this region happened split or merge. - // The message should be dropped if it's epoch is lower than the regions. - // The bucket ranges is none when the region buckets is also none. - // So this condition indicates that the region buckets needs to refresh not - // renew. - if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ - assert_eq!(buckets.len(), bucket_ranges.len()); - change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); - }else{ - change_bucket_version = true; - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); - } - change_bucket_version - } - - fn update_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - buckets: Vec, - region_epoch: RegionEpoch, - bucket_ranges: &Vec, - ) -> bool { - let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); - let mut change_bucket_version = false; - let mut meta_idx = 0; - let mut region_buckets = origin_region_buckets.clone(); - let mut meta = (*region_buckets.meta).clone(); - meta.region_epoch = region_epoch; - - // bucket stats will clean if the bucket size is updated. - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - // the bucket ranges maybe need to split or merge not all the meta keys, so it - // needs to find the first keys. - while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { - meta_idx += 1; - } - // meta_idx can't be not the last entry (which is end key) - if meta_idx >= meta.keys.len() - 1 { - break; - } - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = - cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[meta_idx] = bucket.size; - region_buckets.clean_stats(meta_idx); - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && meta_idx != 0 - && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(meta_idx); - meta.left_merge(meta_idx); - change_bucket_version = true; - continue; - } - } else { - // update size - meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; - region_buckets.clean_stats(meta_idx); - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - meta_idx += 1; - region_buckets.split(meta_idx); - meta.split(meta_idx, bucket_key); - change_bucket_version = true; - } - } - meta_idx += 1; - } - if change_bucket_version { - meta.version = next_bucket_version; - } - region_buckets.meta = Arc::new(meta); - self.set_bucket_stat(Some(region_buckets)); - change_bucket_version - } - - fn init_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - mut buckets: Vec, - region_epoch: RegionEpoch, - region: metapb::Region, - ) { - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - assert_eq!(buckets.len(), 1); - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - let mut meta = BucketMeta { - region_id: region.get_id(), - region_epoch, - version: next_bucket_version, - keys: bucket_keys, - sizes: vec![cfg.region_bucket_size.0; bucket_count], - }; - // padding the boundary keys and initialize the flow. - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - let bucket_stats = BucketStat::from_meta(Arc::new(meta)); - self.set_bucket_stat(Some(bucket_stats)); - } -} - impl Peer { #[inline] pub fn on_refresh_region_buckets( @@ -250,14 +43,13 @@ impl Peer { let current_version = self.region_buckets_info().version(); let next_bucket_version = util::gen_bucket_version(self.term(), current_version); - // let mut is_first_refresh = true; let region = self.region().clone(); let change_bucket_version = self.region_buckets_info_mut().on_refresh_region_buckets( &store_ctx.coprocessor_host.cfg, next_bucket_version, buckets, region_epoch, - region, + ®ion, bucket_ranges, ); let region_buckets = self @@ -443,178 +235,3 @@ where self.schedule_tick(PeerTick::ReportBuckets); } } - -#[cfg(test)] -mod tests { - use super::*; - - // create BucketStatsInfo include three keys: ["","100","200",""]. - fn mock_bucket_stats_info() -> BucketStatsInfo { - let mut bucket_stats_info = BucketStatsInfo::default(); - let cfg = Config::default(); - let next_bucket_version = 1; - let bucket_ranges = None; - let mut region_epoch = RegionEpoch::default(); - region_epoch.set_conf_ver(1); - region_epoch.set_version(1); - let mut region = metapb::Region::default(); - region.set_id(1); - - let mut buckets = vec![]; - let mut bucket = Bucket::default(); - bucket.keys.push(vec![100]); - bucket.keys.push(vec![200]); - buckets.insert(0, bucket); - - let _ = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch, - region, - bucket_ranges, - ); - bucket_stats_info - } - - #[test] - pub fn test_version() { - let mut bucket_stats_info = mock_bucket_stats_info(); - assert_eq!(1, bucket_stats_info.version()); - bucket_stats_info.set_bucket_stat(None); - assert_eq!(1, bucket_stats_info.version()); - - let mut meta = BucketMeta::default(); - meta.version = 2; - meta.keys.push(vec![]); - meta.keys.push(vec![]); - let bucket_stat = BucketStat::from_meta(Arc::new(meta)); - bucket_stats_info.set_bucket_stat(Some(bucket_stat)); - assert_eq!(2, bucket_stats_info.version()); - } - - #[test] - pub fn test_insert_new_buckets() { - let bucket_stats_info = mock_bucket_stats_info(); - - let cfg = Config::default(); - let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); - assert_eq!( - vec![vec![], vec![100], vec![200], vec![]], - bucket_stat.meta.keys - ); - for i in 0..bucket_stat.stats.write_bytes.len() { - assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); - assert_eq!(0, bucket_stat.stats.write_bytes[i]); - } - } - - #[test] - pub fn test_report_buckets() { - let mut bucket_stats_info = mock_bucket_stats_info(); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - let mut delta_bucket_stats = bucket_stats.clone(); - delta_bucket_stats.write_key(&[1], 1); - delta_bucket_stats.write_key(&[201], 1); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); - let bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - let report_bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - } - - #[test] - pub fn test_spilt_and_merge_buckets() { - let mut bucket_stats_info = mock_bucket_stats_info(); - let next_bucket_version = 2; - let mut region = metapb::Region::default(); - region.set_id(1); - let cfg = Config::default(); - let bucket_size = cfg.region_bucket_size.0; - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - let region_epoch = bucket_stats.meta.region_epoch.clone(); - - // step1: update buckets flow - let mut delta_bucket_stats = bucket_stats.clone(); - delta_bucket_stats.write_key(&[1], 1); - delta_bucket_stats.write_key(&[201], 1); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - // step2: tick not affect anything - let bucket_ranges = Some(vec![]); - let buckets = vec![]; - let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch.clone(), - region.clone(), - bucket_ranges, - ); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert!(!change_bucket_version); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - // step3: split key 50 - let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); - let mut bucket = Bucket::default(); - bucket.keys = vec![vec![50]]; - bucket.size = bucket_size; - let mut buckets = vec![bucket]; - change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets.clone(), - region_epoch.clone(), - region.clone(), - bucket_ranges.clone(), - ); - assert!(change_bucket_version); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!( - vec![vec![], vec![50], vec![100], vec![200], vec![]], - bucket_stats.meta.keys - ); - assert_eq!( - vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], - bucket_stats.meta.sizes - ); - assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); - - // step4: merge [50-100] to [0-50], - bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); - let mut bucket = Bucket::default(); - bucket.keys = vec![]; - bucket.size = 0; - buckets = vec![bucket]; - change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch, - region, - bucket_ranges, - ); - assert!(change_bucket_version); - - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!( - vec![vec![], vec![100], vec![200], vec![]], - bucket_stats.meta.keys - ); - assert_eq!( - vec![bucket_size / 2, bucket_size, bucket_size], - bucket_stats.meta.sizes - ); - assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); - - // report buckets doesn't be affected by the split and merge. - let report_bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); - } -} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 6d5cba9fff81..9ccf08d6d549 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -24,7 +24,6 @@ pub use ready::{ }; pub(crate) use self::{ - bucket::BucketStatsInfo, command::SplitInit, query::{LocalReader, ReadDelegatePair, SharedReadTablet}, txn_ext::TxnContext, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 2c8b8cef1db2..9b095b872e7b 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -23,8 +23,9 @@ use raftstore::{ fsm::ApplyMetrics, metrics::RAFT_PEER_PENDING_DURATION, util::{Lease, RegionReadProgress}, - Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, ReadDelegate, - ReadIndexQueue, ReadProgress, TabletSnapManager, UnsafeRecoveryState, WriteTask, + BucketStatsInfo, Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, + ReadDelegate, ReadIndexQueue, ReadProgress, TabletSnapManager, UnsafeRecoveryState, + WriteTask, }, }; use slog::{debug, info, Logger}; @@ -35,9 +36,9 @@ use crate::{ batch::StoreContext, fsm::ApplyScheduler, operation::{ - AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, - GcPeerContext, MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, - SplitFlowControl, SplitPendingAppend, TxnContext, + AbnormalPeerContext, AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, + MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, SplitFlowControl, + SplitPendingAppend, TxnContext, }, router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 038171d97151..339dff68e76e 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -679,7 +679,7 @@ where exec_res: results, metrics: mem::take(&mut delegate.metrics), applied_term: delegate.applied_term, - bucket_stat: delegate.buckets.clone().map(Box::new), + bucket_stat: delegate.buckets.clone(), }); if !self.kv_wb().is_empty() { // Pending writes not flushed, need to set seqno to following ApplyRes later @@ -3874,7 +3874,7 @@ where pub applied_term: u64, pub exec_res: VecDeque>, pub metrics: ApplyMetrics, - pub bucket_stat: Option>, + pub bucket_stat: Option, pub write_seqno: Vec, } @@ -6929,7 +6929,7 @@ mod tests { router.schedule_task(1, Msg::apply(apply2)); let res = fetch_apply_res(&rx); - let bucket_version = res.bucket_stat.unwrap().as_ref().meta.version; + let bucket_version = res.bucket_stat.unwrap().meta.version; assert_eq!(bucket_version, 2); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7504f746abef..49f558250eed 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -41,7 +41,7 @@ use kvproto::{ replication_modepb::{DrAutoSyncState, ReplicationMode}, }; use parking_lot::RwLockWriteGuard; -use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::BucketMeta; use protobuf::Message; use raft::{ self, @@ -2327,10 +2327,11 @@ where return; } let applied_index = res.apply_state.applied_index; - let buckets = self.fsm.peer.region_buckets.as_mut(); - if let (Some(delta), Some(buckets)) = (res.bucket_stat, buckets) { - buckets.merge(&delta); - } + self.fsm + .peer + .region_buckets_info_mut() + .add_bucket_flow(&res.bucket_stat); + self.fsm.has_ready |= self.fsm.peer.post_apply( self.ctx, res.apply_state, @@ -5989,7 +5990,7 @@ where fn on_refresh_region_buckets( &mut self, region_epoch: RegionEpoch, - mut buckets: Vec, + buckets: Vec, bucket_ranges: Option>, _cb: Callback, ) { @@ -6017,14 +6018,14 @@ where // test purpose #[cfg(any(test, feature = "testexport"))] { - let default_buckets = BucketStat::default(); test_only_callback( _cb, self.fsm .peer - .region_buckets + .region_buckets_info() + .bucket_stat() .as_ref() - .unwrap_or(&default_buckets) + .unwrap() .meta .clone(), ); @@ -6032,108 +6033,53 @@ where return; } - let mut current_version = self + let current_version = self.fsm.peer.region_buckets_info().version(); + let next_bucket_version = util::gen_bucket_version(self.fsm.peer.term(), current_version); + let region = self.region().clone(); + let change_bucket_version = self .fsm .peer - .region_buckets + .region_buckets_info_mut() + .on_refresh_region_buckets( + &self.ctx.coprocessor_host.cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + let region_buckets = self + .fsm + .peer + .region_buckets_info() + .bucket_stat() .as_ref() - .map(|b| b.meta.version) - .unwrap_or_default(); - if current_version == 0 { - current_version = self - .fsm - .peer - .last_region_buckets - .as_ref() - .map(|b| b.meta.version) - .unwrap_or_default(); - } - let mut region_buckets: BucketStat; - if let Some(bucket_ranges) = bucket_ranges { - assert_eq!(buckets.len(), bucket_ranges.len()); - let mut i = 0; - region_buckets = self.fsm.peer.region_buckets.clone().unwrap(); - let mut meta = (*region_buckets.meta).clone(); - if !buckets.is_empty() { - meta.version = util::gen_bucket_version(self.fsm.peer.term(), current_version); - } - meta.region_epoch = region_epoch; - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - while i < meta.keys.len() && meta.keys[i] != bucket_range.0 { - i += 1; - } - assert!(i != meta.keys.len()); - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = - self.ctx.coprocessor_host.cfg.region_bucket_merge_size_ratio - * (self.ctx.coprocessor_host.cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[i] = bucket.size; - // i is not the last entry (which is end key) - assert!(i < meta.keys.len() - 1); - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && i != 0 - && meta.sizes[i - 1] + bucket.size - < self.ctx.coprocessor_host.cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(i); - meta.left_merge(i); - continue; - } - } else { - // update size - meta.sizes[i] = bucket.size / (bucket.keys.len() + 1) as u64; - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - i += 1; - region_buckets.split(i); - meta.split(i, bucket_key); - } - } - i += 1; - } - region_buckets.meta = Arc::new(meta); - } else { - debug!( - "refresh_region_buckets re-generates buckets"; + .unwrap() + .clone(); + let buckets_count = region_buckets.meta.keys.len() - 1; + if change_bucket_version { + // TODO: we may need to make it debug once the coprocessor timeout is resolved. + info!( + "finished on_refresh_region_buckets"; "region_id" => self.fsm.region_id(), + "buckets_count" => buckets_count, + "buckets_size" => ?region_buckets.meta.sizes, ); - assert_eq!(buckets.len(), 1); - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - - let mut meta = BucketMeta { - region_id: self.fsm.region_id(), - region_epoch, - version: util::gen_bucket_version(self.fsm.peer.term(), current_version), - keys: bucket_keys, - sizes: vec![self.ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], - }; - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - region_buckets = BucketStat::from_meta(Arc::new(meta)); + } else { + // it means the buckets key range not any change, so don't need to refresh. + test_only_callback(_cb, region_buckets.meta); + return; } - - let buckets_count = region_buckets.meta.keys.len() - 1; self.ctx.coprocessor_host.on_region_changed( - region, + self.region(), RegionChangeEvent::UpdateBuckets(buckets_count), self.fsm.peer.get_role(), ); let keys = region_buckets.meta.keys.clone(); - let old_region_buckets: Option = - self.fsm.peer.region_buckets.replace(region_buckets); - self.fsm.peer.last_region_buckets = old_region_buckets; + let version = region_buckets.meta.version; let mut store_meta = self.ctx.store_meta.lock().unwrap(); - let version = self.fsm.peer.region_buckets.as_ref().unwrap().meta.version; if let Some(reader) = store_meta.readers.get_mut(&self.fsm.region_id()) { - reader.update(ReadProgress::region_buckets( - self.fsm.peer.region_buckets.as_ref().unwrap().meta.clone(), - )); + reader.update(ReadProgress::region_buckets(region_buckets.meta.clone())); } // Notify followers to refresh their buckets version @@ -6154,19 +6100,9 @@ where .send_extra_message(extra_msg, &mut self.ctx.trans, &p); } } - - debug!( - "finished on_refresh_region_buckets"; - "region_id" => self.fsm.region_id(), - "buckets_count" => buckets_count, - "buckets_size" => ?self.fsm.peer.region_buckets.as_ref().unwrap().meta.sizes, - ); // test purpose #[cfg(any(test, feature = "testexport"))] - test_only_callback( - _cb, - self.fsm.peer.region_buckets.as_ref().unwrap().meta.clone(), - ); + test_only_callback(_cb, region_buckets.meta); } pub fn on_msg_refresh_buckets(&mut self, msg: RaftMessage) { @@ -6205,50 +6141,11 @@ where if !self.ctx.coprocessor_host.cfg.enable_region_bucket() { return None; } - let region_buckets = self.fsm.peer.region_buckets.as_ref()?; - let stats = ®ion_buckets.stats; - let keys = ®ion_buckets.meta.keys; - - let empty_last_keys = vec![]; - let empty_last_stats = metapb::BucketStats::default(); - let (last_keys, last_stats, stats_reset) = self - .fsm + let region_bucket_max_size = self.ctx.coprocessor_host.cfg.region_bucket_size.0 * 2; + self.fsm .peer - .last_region_buckets - .as_ref() - .map(|b| { - ( - &b.meta.keys, - &b.stats, - region_buckets.create_time != b.create_time, - ) - }) - .unwrap_or((&empty_last_keys, &empty_last_stats, false)); - - let mut bucket_ranges = vec![]; - let mut j = 0; - assert_eq!(keys.len(), stats.write_bytes.len() + 1); - for i in 0..stats.write_bytes.len() { - let mut diff_in_bytes = stats.write_bytes[i]; - while j < last_keys.len() && keys[i] > last_keys[j] { - j += 1; - } - if j < last_keys.len() && keys[i] == last_keys[j] { - if !stats_reset { - diff_in_bytes -= last_stats.write_bytes[j]; - } - j += 1; - } - - // if the bucket's write_bytes exceed half of the configured region_bucket_size, - // add it to the bucket_ranges for checking update - let bucket_update_diff_size_threshold = - self.ctx.coprocessor_host.cfg.region_bucket_size.0 / 2; - if diff_in_bytes >= bucket_update_diff_size_threshold { - bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); - } - } - Some(bucket_ranges) + .region_buckets_info() + .gen_bucket_range_for_update(region_bucket_max_size) } fn on_schedule_half_split_region( @@ -6544,7 +6441,7 @@ where fn on_report_region_buckets_tick(&mut self) { if !self.fsm.peer.is_leader() - || self.fsm.peer.region_buckets.is_none() + || self.fsm.peer.region_buckets_info().bucket_stat().is_none() || self.fsm.hibernate_state.group_state() == GroupState::Idle { return; @@ -6552,11 +6449,11 @@ where let region_id = self.region_id(); let peer_id = self.fsm.peer_id(); - let region_buckets = self.fsm.peer.region_buckets.as_mut().unwrap(); + let region_buckets = self.fsm.peer.region_buckets_info_mut().report_bucket_stat(); if let Err(e) = self .ctx .pd_scheduler - .schedule(PdTask::ReportBuckets(region_buckets.clone())) + .schedule(PdTask::ReportBuckets(region_buckets)) { error!( "failed to report region buckets"; @@ -6565,8 +6462,6 @@ where "err" => ?e, ); } - // todo: it will delete in next pr. - region_buckets.stats = new_bucket_stats(®ion_buckets.meta); self.register_report_region_buckets_tick(); } @@ -6640,7 +6535,7 @@ where self.fsm.peer.approximate_keys = Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); - if let Some(buckets) = &mut self.fsm.peer.region_buckets { + if let Some(buckets) = &mut self.fsm.peer.region_buckets_info_mut().bucket_stat_mut() { buckets.ingest_sst(keys, size); } // The ingested file may be overlapped with the data in engine, so we need to diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 0ca99efffc4a..4cae84d1d25d 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -85,14 +85,14 @@ pub use self::{ util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, - BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, CompactThreshold, - FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, LocalReader, - LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, - ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, - SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, - TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, - DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, - DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + BucketRange, BucketStatsInfo, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, + CompactThreshold, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, + LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, + ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, + StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, + DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }, }; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index abe14bf7c2a9..5511c9760626 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -43,7 +43,7 @@ use kvproto::{ }, }; use parking_lot::RwLockUpgradableReadGuard; -use pd_client::{BucketStat, INVALID_ID}; +use pd_client::INVALID_ID; use protobuf::Message; use raft::{ self, @@ -80,6 +80,7 @@ use super::{ self, check_req_region_epoch, is_initial_msg, AdminCmdEpochState, ChangePeerI, ConfChangeKind, Lease, LeaseState, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER, }, + worker::BucketStatsInfo, DestroyPeerJob, LocalReadContext, }; use crate::{ @@ -781,9 +782,8 @@ where persisted_number: u64, /// The context of applying snapshot. apply_snap_ctx: Option, - /// region buckets. - pub region_buckets: Option, - pub last_region_buckets: Option, + /// region buckets info in this region. + region_buckets_info: BucketStatsInfo, /// lead_transferee if this peer(leader) is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, @@ -932,8 +932,7 @@ where unpersisted_ready: None, persisted_number: 0, apply_snap_ctx: None, - region_buckets: None, - last_region_buckets: None, + region_buckets_info: BucketStatsInfo::default(), lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, snapshot_recovery_state: None, @@ -1321,6 +1320,16 @@ where self.get_store().region() } + #[inline] + pub fn region_buckets_info_mut(&mut self) -> &mut BucketStatsInfo { + &mut self.region_buckets_info + } + + #[inline] + pub fn region_buckets_info(&self) -> &BucketStatsInfo { + &self.region_buckets_info + } + /// Check whether the peer can be hibernated. /// /// This should be used with `check_after_tick` to get a correct conclusion. @@ -2849,7 +2858,10 @@ where commit_term, committed_entries, cbs, - self.region_buckets.as_ref().map(|b| b.meta.clone()), + self.region_buckets_info() + .bucket_stat() + .as_ref() + .map(|b| b.meta.clone()), ); apply.on_schedule(&ctx.raft_metrics); self.mut_store() @@ -3389,10 +3401,7 @@ where } pub fn reset_region_buckets(&mut self) { - if self.region_buckets.is_some() { - self.last_region_buckets = self.region_buckets.take(); - self.region_buckets = None; - } + self.region_buckets_info_mut().set_bucket_stat(None); } /// Try to renew leader lease. @@ -4709,7 +4718,11 @@ where let mut resp = reader.execute(&req, &Arc::new(region), read_index, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); - snap.bucket_meta = self.region_buckets.as_ref().map(|b| b.meta.clone()); + snap.bucket_meta = self + .region_buckets_info() + .bucket_stat() + .as_ref() + .map(|s| s.meta.clone()); } resp.txn_extra_op = self.txn_extra_op.load(); cmd_resp::bind_term(&mut resp.response, self.term()); diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index e79f37a4bc4b..c67832385202 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -42,7 +42,8 @@ pub use self::{ }, region::{Runner as RegionRunner, Task as RegionTask}, split_check::{ - Bucket, BucketRange, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, + Bucket, BucketRange, BucketStatsInfo, KeyEntry, Runner as SplitCheckRunner, + Task as SplitCheckTask, }, split_config::{ SplitConfig, SplitConfigManager, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 2d54c00baa69..2694481494f1 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -440,7 +440,11 @@ impl ReadDelegate { read_progress: peer.read_progress.clone(), pending_remove: false, wait_data: false, - bucket_meta: peer.region_buckets.as_ref().map(|b| b.meta.clone()), + bucket_meta: peer + .region_buckets_info() + .bucket_stat() + .as_ref() + .map(|b| b.meta.clone()), track_ver: TrackVer::new(), } } diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 4ff853f70a05..94708e84f7ac 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -5,6 +5,7 @@ use std::{ collections::BinaryHeap, fmt::{self, Display, Formatter}, mem, + sync::Arc, }; use engine_traits::{ @@ -12,21 +13,23 @@ use engine_traits::{ }; use file_system::{IoType, WithIoType}; use itertools::Itertools; -use kvproto::{metapb::Region, pdpb::CheckPolicy}; +use kvproto::{ + metapb::{Region, RegionEpoch}, + pdpb::CheckPolicy, +}; use online_config::{ConfigChange, OnlineConfig}; +use pd_client::{BucketMeta, BucketStat}; use tikv_util::{ box_err, debug, error, info, keybuilder::KeyBuilder, warn, worker::Runnable, Either, }; use txn_types::Key; use super::metrics::*; -#[cfg(any(test, feature = "testexport"))] -use crate::coprocessor::Config; use crate::{ coprocessor::{ dispatcher::StoreHandle, split_observer::{is_valid_split_key, strip_timestamp_if_exists}, - CoprocessorHost, SplitCheckerHost, + Config, CoprocessorHost, SplitCheckerHost, }, Result, }; @@ -144,6 +147,216 @@ pub struct Bucket { pub size: u64, } +#[derive(Debug, Clone, Default)] +pub struct BucketStatsInfo { + // the stats is increment flow. + bucket_stat: Option, + // the report bucket stat records the increment stats after last report pd. + // it will be reset after report pd. + report_bucket_stat: Option, + // avoid the version roll back, it record the last bucket version if bucket stat isn't none. + last_bucket_version: u64, +} + +impl BucketStatsInfo { + /// returns all bucket ranges those's write_bytes exceed the given + /// diff_size_threshold. + pub fn gen_bucket_range_for_update( + &self, + region_bucket_max_size: u64, + ) -> Option> { + let region_buckets = self.bucket_stat.as_ref()?; + let stats = ®ion_buckets.stats; + let keys = ®ion_buckets.meta.keys; + let sizes = ®ion_buckets.meta.sizes; + + let mut suspect_bucket_ranges = vec![]; + assert_eq!(keys.len(), stats.write_bytes.len() + 1); + for i in 0..stats.write_bytes.len() { + let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; + if estimated_bucket_size >= region_bucket_max_size { + suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); + } + } + Some(suspect_bucket_ranges) + } + + #[inline] + pub fn version(&self) -> u64 { + self.bucket_stat + .as_ref() + .map_or(self.last_bucket_version, |b| b.meta.version) + } + + #[inline] + pub fn add_bucket_flow(&mut self, delta: &Option) { + if let (Some(buckets), Some(report_buckets), Some(delta)) = ( + self.bucket_stat.as_mut(), + self.report_bucket_stat.as_mut(), + delta, + ) { + buckets.merge(delta); + report_buckets.merge(delta); + } + } + + #[inline] + pub fn set_bucket_stat(&mut self, buckets: Option) { + self.bucket_stat = buckets.clone(); + if let Some(new_buckets) = buckets { + self.last_bucket_version = new_buckets.meta.version; + let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); + if let Some(old) = &mut self.report_bucket_stat { + new_report_buckets.merge(old); + *old = new_report_buckets; + } else { + self.report_bucket_stat = Some(new_report_buckets); + } + } else { + self.report_bucket_stat = None; + } + } + + #[inline] + pub fn report_bucket_stat(&mut self) -> BucketStat { + let current = self.report_bucket_stat.as_mut().unwrap(); + let delta = current.clone(); + current.clear_stats(); + delta + } + + #[inline] + pub fn bucket_stat(&self) -> &Option { + &self.bucket_stat + } + + #[inline] + pub fn bucket_stat_mut(&mut self) -> Option<&mut BucketStat> { + self.bucket_stat.as_mut() + } + + pub fn on_refresh_region_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + region: &Region, + bucket_ranges: Option>, + ) -> bool { + let change_bucket_version: bool; + // The region buckets reset after this region happened split or merge. + // The message should be dropped if it's epoch is lower than the regions. + // The bucket ranges is none when the region buckets is also none. + // So this condition indicates that the region buckets needs to refresh not + // renew. + if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ + assert_eq!(buckets.len(), bucket_ranges.len()); + change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); + }else{ + change_bucket_version = true; + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); + } + change_bucket_version + } + + fn update_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + bucket_ranges: &Vec, + ) -> bool { + let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); + let mut change_bucket_version = false; + let mut meta_idx = 0; + let mut region_buckets = origin_region_buckets.clone(); + let mut meta = (*region_buckets.meta).clone(); + meta.region_epoch = region_epoch; + + // bucket stats will clean if the bucket size is updated. + for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { + // the bucket ranges maybe need to split or merge not all the meta keys, so it + // needs to find the first keys. + while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { + meta_idx += 1; + } + // meta_idx can't be not the last entry (which is end key) + if meta_idx >= meta.keys.len() - 1 { + break; + } + // the bucket size is small and does not have split keys, + // then it should be merged with its left neighbor + let region_bucket_merge_size = + cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); + if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { + meta.sizes[meta_idx] = bucket.size; + region_buckets.clean_stats(meta_idx); + // the region has more than one bucket + // and the left neighbor + current bucket size is not very big + if meta.keys.len() > 2 + && meta_idx != 0 + && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 + { + // bucket is too small + region_buckets.left_merge(meta_idx); + meta.left_merge(meta_idx); + change_bucket_version = true; + continue; + } + } else { + // update size + meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; + region_buckets.clean_stats(meta_idx); + // insert new bucket keys (split the original bucket) + for bucket_key in bucket.keys { + meta_idx += 1; + region_buckets.split(meta_idx); + meta.split(meta_idx, bucket_key); + change_bucket_version = true; + } + } + meta_idx += 1; + } + if change_bucket_version { + meta.version = next_bucket_version; + } + region_buckets.meta = Arc::new(meta); + self.set_bucket_stat(Some(region_buckets)); + change_bucket_version + } + + fn init_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + mut buckets: Vec, + region_epoch: RegionEpoch, + region: &Region, + ) { + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + assert_eq!(buckets.len(), 1); + let bucket_keys = buckets.pop().unwrap().keys; + let bucket_count = bucket_keys.len() + 1; + let mut meta = BucketMeta { + region_id: region.get_id(), + region_epoch, + version: next_bucket_version, + keys: bucket_keys, + sizes: vec![cfg.region_bucket_size.0; bucket_count], + }; + // padding the boundary keys and initialize the flow. + meta.keys.insert(0, region.get_start_key().to_vec()); + meta.keys.push(region.get_end_key().to_vec()); + let bucket_stats = BucketStat::from_meta(Arc::new(meta)); + self.set_bucket_stat(Some(bucket_stats)); + } +} + pub enum Task { SplitCheckTask { region: Region, @@ -702,3 +915,178 @@ where } } } + +#[cfg(test)] +mod tests { + use super::*; + + // create BucketStatsInfo include three keys: ["","100","200",""]. + fn mock_bucket_stats_info() -> BucketStatsInfo { + let mut bucket_stats_info = BucketStatsInfo::default(); + let cfg = Config::default(); + let next_bucket_version = 1; + let bucket_ranges = None; + let mut region_epoch = RegionEpoch::default(); + region_epoch.set_conf_ver(1); + region_epoch.set_version(1); + let mut region = Region::default(); + region.set_id(1); + + let mut buckets = vec![]; + let mut bucket = Bucket::default(); + bucket.keys.push(vec![100]); + bucket.keys.push(vec![200]); + buckets.insert(0, bucket); + + let _ = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + bucket_stats_info + } + + #[test] + pub fn test_version() { + let mut bucket_stats_info = mock_bucket_stats_info(); + assert_eq!(1, bucket_stats_info.version()); + bucket_stats_info.set_bucket_stat(None); + assert_eq!(1, bucket_stats_info.version()); + + let mut meta = BucketMeta::default(); + meta.version = 2; + meta.keys.push(vec![]); + meta.keys.push(vec![]); + let bucket_stat = BucketStat::from_meta(Arc::new(meta)); + bucket_stats_info.set_bucket_stat(Some(bucket_stat)); + assert_eq!(2, bucket_stats_info.version()); + } + + #[test] + pub fn test_insert_new_buckets() { + let bucket_stats_info = mock_bucket_stats_info(); + + let cfg = Config::default(); + let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stat.meta.keys + ); + for i in 0..bucket_stat.stats.write_bytes.len() { + assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); + assert_eq!(0, bucket_stat.stats.write_bytes[i]); + } + } + + #[test] + pub fn test_report_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); + let bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + } + + #[test] + pub fn test_spilt_and_merge_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let next_bucket_version = 2; + let mut region = Region::default(); + region.set_id(1); + let cfg = Config::default(); + let bucket_size = cfg.region_bucket_size.0; + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let region_epoch = bucket_stats.meta.region_epoch.clone(); + + // step1: update buckets flow + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step2: tick not affect anything + let bucket_ranges = Some(vec![]); + let buckets = vec![]; + let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch.clone(), + ®ion, + bucket_ranges, + ); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert!(!change_bucket_version); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step3: split key 50 + let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![vec![50]]; + bucket.size = bucket_size; + let mut buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets.clone(), + region_epoch.clone(), + ®ion, + bucket_ranges.clone(), + ); + assert!(change_bucket_version); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![50], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); + + // step4: merge [50-100] to [0-50], + bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![]; + bucket.size = 0; + buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + assert!(change_bucket_version); + + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); + + // report buckets doesn't be affected by the split and merge. + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); + } +} diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index c0f754879983..35ee18388659 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -976,14 +976,13 @@ fn test_refresh_region_bucket_keys() { cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); + // case: init bucket info cluster.must_put(b"k11", b"v1"); let mut region = pd_client.get_region(b"k11").unwrap(); - let bucket = Bucket { keys: vec![b"k11".to_vec()], size: 1024 * 1024 * 200, }; - let mut expected_buckets = metapb::Buckets::default(); expected_buckets.set_keys(bucket.clone().keys.into()); expected_buckets @@ -997,6 +996,8 @@ fn test_refresh_region_bucket_keys() { Option::None, Some(expected_buckets.clone()), ); + + // case: bucket range should refresh if epoch changed let conf_ver = region.get_region_epoch().get_conf_ver() + 1; region.mut_region_epoch().set_conf_ver(conf_ver); @@ -1018,6 +1019,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version2, bucket_version + 1); + // case: stale epoch will not refresh buckets info let conf_ver = 0; region.mut_region_epoch().set_conf_ver(conf_ver); let bucket_version3 = cluster.refresh_region_bucket_keys( @@ -1028,6 +1030,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version3, bucket_version2); + // case: bucket split // now the buckets is ["", "k12", ""]. further split ["", k12], [k12, ""] // buckets into more buckets let region = pd_client.get_region(b"k11").unwrap(); @@ -1066,6 +1069,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version4, bucket_version3 + 1); + // case: merge buckets // remove k11~k12, k12~k121, k122~[] bucket let buckets = vec![ Bucket { @@ -1107,7 +1111,7 @@ fn test_refresh_region_bucket_keys() { assert_eq!(bucket_version5, bucket_version4 + 1); - // split the region + // case: split the region pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![b"k11".to_vec()]); let mut buckets = vec![Bucket { keys: vec![b"k10".to_vec()], @@ -1132,7 +1136,7 @@ fn test_refresh_region_bucket_keys() { cluster.refresh_region_bucket_keys(®ion, buckets, None, Some(expected_buckets.clone())); assert_eq!(bucket_version6, bucket_version5 + 1); - // merge the region + // case: merge the region pd_client.must_merge(left_id, right.get_id()); let region = pd_client.get_region(b"k10").unwrap(); let buckets = vec![Bucket { @@ -1145,6 +1149,7 @@ fn test_refresh_region_bucket_keys() { cluster.refresh_region_bucket_keys(®ion, buckets, None, Some(expected_buckets.clone())); assert_eq!(bucket_version7, bucket_version6 + 1); + // case: nothing changed let bucket_version8 = cluster.refresh_region_bucket_keys( ®ion, vec![], From 763069ed660ffe31e5e57586bc7ccfba94cb8f71 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Thu, 26 Oct 2023 18:17:04 +0800 Subject: [PATCH 110/203] raftstore: make release work (#15850) close tikv/tikv#15851 Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore/src/store/fsm/peer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 49f558250eed..14ad09dbde80 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6067,6 +6067,7 @@ where ); } else { // it means the buckets key range not any change, so don't need to refresh. + #[cfg(any(test, feature = "testexport"))] test_only_callback(_cb, region_buckets.meta); return; } From 69ef88b2e9d036d5975973c4d6d5a15278bcb2e0 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 27 Oct 2023 12:12:33 +0800 Subject: [PATCH 111/203] import: write RPC will check region epoch before continue (#15795) close tikv/tikv#15003 Signed-off-by: lance6716 Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi --- Makefile | 8 + components/error_code/src/sst_importer.rs | 4 +- .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 103 +++------ components/raftstore/src/store/msg.rs | 6 - .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +---------- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/sst_importer/src/errors.rs | 8 + components/sst_importer/src/import_file.rs | 50 +++-- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 12 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/import/sst_service.rs | 197 ++++++++++++++++-- 16 files changed, 300 insertions(+), 244 deletions(-) diff --git a/Makefile b/Makefile index bb1d7316e1b6..ce8d4e8b793d 100644 --- a/Makefile +++ b/Makefile @@ -406,6 +406,14 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test +docker_shell: + docker build -f Dockerfile.test \ + -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + . + docker run -it -v $(shell pwd):/tikv \ + ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + /bin/bash + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 117400e8aff4..9e568ee00c12 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -25,5 +25,7 @@ define_error_codes!( RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), SUSPENDED => ("Suspended", "this request has been suspended.", - "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them.") + "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them."), + REQUEST_TOO_NEW => ("RequestTooNew", "", ""), + REQUEST_TOO_OLD => ("RequestTooOld", "", "") ); diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index e963434fe837..45247b3f36fd 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,6 +43,11 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); + // filter old version SSTs + let ssts: Vec<_> = ssts + .into_iter() + .filter(|sst| sst.1 >= sst_importer::API_VERSION_2) + .collect(); if ssts.is_empty() { return Ok(()); } @@ -50,9 +55,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.get_region_id()) + .entry(sst.0.get_region_id()) .or_default() - .push(sst); + .push(sst.0); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index aa8fa7c318e6..2efcbf87b09e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -14,7 +14,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, }, - time::{Duration, Instant}, + time::{Duration, Instant, SystemTime}, u64, }; @@ -36,14 +36,13 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ - import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; +use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -813,9 +812,6 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), - StoreMsg::ValidateSstResult { invalid_ssts } => { - self.on_validate_sst_result(invalid_ssts) - } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1655,12 +1651,7 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new( - meta.get_id(), - self.router.clone(), - Arc::clone(&importer), - Arc::clone(&pd_client), - ); + let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2762,62 +2753,47 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } -impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { - fn on_validate_sst_result(&mut self, ssts: Vec) { - if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { - return; - } - // A stale peer can still ingest a stale Sst before it is - // destroyed. We need to make sure that no stale peer exists. - let mut delete_ssts = Vec::new(); - { - let meta = self.ctx.store_meta.lock().unwrap(); - for sst in ssts { - if !meta.regions.contains_key(&sst.get_region_id()) { - delete_ssts.push(sst); - } - } - } - if delete_ssts.is_empty() { - return; - } - - let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to delete ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } +// we will remove 1-week old version 1 SST files. +const VERSION_1_SST_CLEANUP_DURATION: Duration = Duration::from_secs(7 * 24 * 60 * 60); +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); - let mut validate_ssts = Vec::new(); - let import_size = box_try!(self.ctx.importer.get_total_size()); - STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { return Ok(()); } + let now = SystemTime::now(); { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if let Some(r) = meta.regions.get(&sst.get_region_id()) { + if let Some(r) = meta.regions.get(&sst.0.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.0.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst); + delete_ssts.push(sst.0); } + } else if sst.1 >= sst_importer::API_VERSION_2 { + // The write RPC of import sst service have make sure the region do exist at + // the write time, and now the region is not found, + // sst can be deleted because it won't be used by + // ingest in future. + delete_ssts.push(sst.0); } else { - // If the peer doesn't exist, we need to validate the SST through PD. - validate_ssts.push(sst); + // in the old protocol, we can't easily know if the SST will be used in the + // committed raft log, so we only delete the SST + // files that has not be modified for 1 week. + if let Ok(duration) = now.duration_since(sst.2) { + if duration > VERSION_1_SST_CLEANUP_DURATION { + warn!( + "found 1-week old SST file of version 1, will delete it"; + "sst_meta" => ?sst.0, + "last_modified" => ?sst.2 + ); + delete_ssts.push(sst.0); + } + } } } } @@ -2837,27 +2813,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } - // When there is an import job running, the region which this sst belongs may - // has not been split from the origin region because the apply thread is so busy - // that it can not apply SplitRequest as soon as possible. So we can not - // delete this sst file. - if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { - let task = CleanupSstTask::ValidateSst { - ssts: validate_ssts, - }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to validate ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a33ca0e476ea..a92e5169549d 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,7 +10,6 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, - import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -824,10 +823,6 @@ where { RaftMessage(InspectedRaftMessage), - ValidateSstResult { - invalid_ssts: Vec, - }, - // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -884,7 +879,6 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), - StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 632e85f40cc3..726b7abe5ceb 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,7 +3,6 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; -use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -11,7 +10,6 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; -use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -29,29 +27,26 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, - S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -60,12 +55,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 8174b872f4b7..44f188e6f8fb 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,62 +1,30 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; +use std::{fmt, sync::Arc}; -use engine_traits::KvEngine; -use kvproto::{import_sstpb::SstMeta, metapb::Region}; -use pd_client::PdClient; +use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; -use tikv_util::{error, worker::Runnable}; - -use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; - -type Result = std::result::Result>; +use tikv_util::worker::Runnable; pub enum Task { DeleteSst { ssts: Vec }, - ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), - Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner -where - EK: KvEngine, - S: StoreRouter, -{ - store_id: u64, - store_router: S, +pub struct Runner { importer: Arc, - pd_client: Arc, - _engine: PhantomData, } -impl Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ - pub fn new( - store_id: u64, - store_router: S, - importer: Arc, - pd_client: Arc, - ) -> Runner { - Runner { - store_id, - store_router, - importer, - pd_client, - _engine: PhantomData, - } +impl Runner { + pub fn new(importer: Arc) -> Runner { + Runner { importer } } /// Deletes SST files from the importer. @@ -65,78 +33,9 @@ where let _ = self.importer.delete(sst); } } - - fn get_region_by_meta(&self, sst: &SstMeta) -> Result { - // The SST meta has been delivered with a range, use it directly. - // For now, no case will reach this. But this still could be a guard for - // reducing the superise in the future... - if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { - return self - .pd_client - .get_region(sst.get_range().get_start()) - .map_err(Into::into); - } - // Once there isn't range provided. - let query_by_start_key_of_full_meta = || { - let start_key = self - .importer - .load_start_key_by_meta::(sst)? - .ok_or_else(|| -> Box { - "failed to load start key from sst, the sst might be empty".into() - })?; - let region = self.pd_client.get_region(&start_key)?; - Result::Ok(region) - }; - query_by_start_key_of_full_meta() - .map_err(|err| - format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() - ) - } - - /// Validates whether the SST is stale or not. - fn handle_validate_sst(&self, ssts: Vec) { - let store_id = self.store_id; - let mut invalid_ssts = Vec::new(); - for sst in ssts { - match self.get_region_by_meta(&sst) { - Ok(r) => { - // The region id may or may not be the same as the - // SST file, but it doesn't matter, because the - // epoch of a range will not decrease anyway. - if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { - // Region has not been updated. - continue; - } - if r.get_id() == sst.get_region_id() - && r.get_peers().iter().any(|p| p.get_store_id() == store_id) - { - // The SST still belongs to this store. - continue; - } - invalid_ssts.push(sst); - } - Err(e) => { - error!("get region failed"; "err" => %e); - } - } - } - - // We need to send back the result to check for the stale - // peer, which may ingest the stale SST before it is - // destroyed. - let msg = StoreMsg::ValidateSstResult { invalid_ssts }; - if let Err(e) = self.store_router.send(msg) { - error!(%e; "send validate sst result failed"); - } - } } -impl Runnable for Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { @@ -144,9 +43,6 @@ where Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } - Task::ValidateSst { ssts } => { - self.handle_validate_sst(ssts); - } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8d44890e5a63..a4b6276a5878 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,6 +366,7 @@ where router.clone(), config.coprocessor.clone(), )); + let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1080,6 +1081,7 @@ where servers.importer.clone(), None, self.resource_manager.clone(), + Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 2593035618da..65d02f58c088 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,6 +948,7 @@ where backup_worker.start(backup_endpoint); // Import SST service. + let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -956,6 +957,7 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), + Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index acca7523427d..e03288bb3e12 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -118,6 +118,12 @@ pub enum Error { #[error("Importing a SST file with imcompatible api version")] IncompatibleApiVersion, + #[error("{0}, please retry write later")] + RequestTooNew(String), + + #[error("{0}, please rescan region later")] + RequestTooOld(String), + #[error("Key mode mismatched with the request mode, writer: {:?}, storage: {:?}, key: {}", .writer, .storage_api_version, .key)] InvalidKeyMode { writer: SstWriterType, @@ -213,6 +219,8 @@ impl ErrorCodeExt for Error { Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, + Error::RequestTooNew(_) => error_code::sst_importer::REQUEST_TOO_NEW, + Error::RequestTooOld(_) => error_code::sst_importer::REQUEST_TOO_OLD, } } } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b270d26a4111..b3b7c051ce44 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -6,6 +6,7 @@ use std::{ io::{self, Write}, path::{Path, PathBuf}, sync::Arc, + time::SystemTime, }; use api_version::api_v2::TIDB_RANGES_COMPLEMENT; @@ -440,7 +441,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -449,7 +450,10 @@ impl ImportDir { } let path = e.path(); match parse_meta_from_path(&path) { - Ok(sst) => ssts.push(sst), + Ok(sst) => { + let last_modify = e.metadata()?.modified()?; + ssts.push((sst.0, sst.1, last_modify)) + } Err(e) => error!(%e; "path_to_sst_meta failed"; "path" => %path.display(),), } } @@ -458,20 +462,28 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; - +// version 2: compared to version 1 which is the default version, we will check +// epoch of request and local region in write API. +pub const API_VERSION_2: i32 = 2; + +/// sst_meta_to_path will encode the filepath with default api version (current +/// is 2). So when the SstMeta is created in old version of TiKV and filepath +/// will not correspond to the real file, in the deletion logic we can't remove +/// these files. pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), + API_VERSION_2, SST_SUFFIX, ))) } -pub fn parse_meta_from_path>(path: P) -> Result { +pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -500,7 +512,11 @@ pub fn parse_meta_from_path>(path: P) -> Result { // cf_name to path. meta.set_cf_name(elems[4].to_owned()); } - Ok(meta) + let mut api_version = 1; + if elems.len() > 5 { + api_version = elems[5].parse()?; + } + Ok((meta, api_version)) } #[cfg(test)] @@ -520,11 +536,12 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default.sst", uuid); + let expected_path = format!("{}_1_2_3_default_2.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.0); + assert_eq!(2, meta_with_ver.1); } #[test] @@ -543,8 +560,9 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.0); + assert_eq!(1, meta_with_ver.1); } #[cfg(feature = "test-engines-rocksdb")] @@ -596,14 +614,20 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta| { + ssts.iter_mut().for_each(|meta_with_ver| { + let meta = &mut meta_with_ver.0; let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!(ssts, vec![meta]); + assert_eq!( + ssts.iter() + .map(|meta_with_ver| { meta_with_ver.0.clone() }) + .collect(), + vec![meta] + ); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 0cfc3bab774c..ff137005b09b 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::sst_meta_to_path, + import_file::{sst_meta_to_path, API_VERSION_2}, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5cf9f1c6573b..358bc0545ded 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -11,7 +11,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, }, - time::Duration, + time::{Duration, SystemTime}, }; use collections::HashSet; @@ -1383,9 +1383,9 @@ impl SstImporter { } /// List the basic information of the current SST files. - /// The information contains UUID, region ID, region Epoch. - /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + /// The information contains UUID, region ID, region Epoch, api version, + /// last modified time. Other fields may be left blank. + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1585,9 +1585,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.get_uuid()) + .find(|s| s.get_uuid() == sst.0.get_uuid()) .unwrap(); - dir.delete(sst, key_manager.as_deref()).unwrap(); + dir.delete(&sst.0, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 299e93eb7461..5073304e17a6 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,6 +561,7 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0002f36d647e..f5c64fa86e91 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,6 +451,7 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 68403e226f8b..92e73ca9f8f1 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -27,6 +27,12 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, + metapb::RegionEpoch, +}; +use raftstore::{ + coprocessor::{RegionInfo, RegionInfoProvider}, + store::util::is_epoch_stale, + RegionInfoAccessor, }; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; @@ -39,7 +45,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::create_stream_with_buffer, + future::{create_stream_with_buffer, paired_future_callback}, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -124,6 +130,7 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, + region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -318,6 +325,7 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, + region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -365,6 +373,7 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, + region_info_accessor, writer, store_meta, resource_manager, @@ -675,6 +684,45 @@ impl ImportSstService { } } +fn check_local_region_stale( + region_id: u64, + epoch: &RegionEpoch, + local_region_info: Option, +) -> Result<()> { + match local_region_info { + Some(local_region_info) => { + let local_region_epoch = local_region_info.region.region_epoch.unwrap(); + + // when local region epoch is stale, client can retry write later + if is_epoch_stale(&local_region_epoch, epoch) { + return Err(Error::RequestTooNew(format!( + "request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", + region_id, local_region_epoch, epoch + ))); + } + // when local region epoch is ahead, client need to rescan region from PD to get + // latest region later + if is_epoch_stale(epoch, &local_region_epoch) { + return Err(Error::RequestTooOld(format!( + "request region {} is staler than local region, local epoch {:?}, request epoch {:?}", + region_id, local_region_epoch, epoch + ))); + } + + // not match means to rescan + Ok(()) + } + None => { + // when region not found, we can't tell whether it's stale or ahead, so we just + // return the safest case + Err(Error::RequestTooOld(format!( + "region {} is not found", + region_id + ))) + } + } +} + #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -686,6 +734,7 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); + let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -694,8 +743,11 @@ macro_rules! impl_write { let label = stringify!($fn); let resource_manager = self.resource_manager.clone(); let handle_task = async move { - let res = async move { - let first_req = rx.try_next().await?; + let (res, rx) = async move { + let first_req = match rx.try_next().await { + Ok(r) => r, + Err(e) => return (Err(e), Some(rx)), + }; let (meta, resource_limiter) = match first_req { Some(r) => { let limiter = resource_manager.as_ref().and_then(|m| { @@ -708,18 +760,49 @@ macro_rules! impl_write { }); match r.chunk { Some($chunk_ty::Meta(m)) => (m, limiter), - _ => return Err(Error::InvalidChunk), + _ => return (Err(Error::InvalidChunk), Some(rx)), } } - _ => return Err(Error::InvalidChunk), + _ => return (Err(Error::InvalidChunk), Some(rx)), }; + // wait the region epoch on this TiKV to catch up with the epoch + // in request, which comes from PD and represents the majority + // peers' status. let region_id = meta.get_region_id(); + let (cb, f) = paired_future_callback(); + if let Err(e) = region_info_accessor + .find_region_by_id(region_id, cb) + .map_err(|e| { + // when region not found, we can't tell whether it's stale or ahead, so + // we just return the safest case + Error::RequestTooOld(format!( + "failed to find region {} err {:?}", + region_id, e + )) + }) + { + return (Err(e), Some(rx)); + }; + let res = match f.await { + Ok(r) => r, + Err(e) => return (Err(From::from(e)), Some(rx)), + }; + if let Err(e) = + check_local_region_stale(region_id, meta.get_region_epoch(), res) + { + return (Err(e), Some(rx)); + }; + let tablet = match tablets.get(region_id) { Some(t) => t, None => { - return Err(Error::Engine( - format!("region {} not found", region_id).into(), - )); + return ( + Err(Error::RequestTooOld(format!( + "region {} not found", + region_id + ))), + Some(rx), + ); } }; @@ -727,10 +810,10 @@ macro_rules! impl_write { Ok(w) => w, Err(e) => { error!("build writer failed {:?}", e); - return Err(Error::InvalidChunk); + return (Err(Error::InvalidChunk), Some(rx)); } }; - let (writer, resource_limiter) = rx + let result = rx .try_fold( (writer, resource_limiter), |(mut writer, limiter), req| async move { @@ -747,7 +830,11 @@ macro_rules! impl_write { .map(|w| (w, limiter)) }, ) - .await?; + .await; + let (writer, resource_limiter) = match result { + Ok(r) => r, + Err(e) => return (Err(e), None), + }; let finish_fn = async { let metas = writer.finish()?; @@ -756,13 +843,18 @@ macro_rules! impl_write { }; let metas: Result<_> = with_resource_limiter(finish_fn, resource_limiter).await; - let metas = metas?; + let metas = match metas { + Ok(r) => r, + Err(e) => return (Err(e), None), + }; let mut resp = $resp_ty::default(); resp.set_metas(metas.into()); - Ok(resp) + (Ok(resp), None) } .await; $crate::send_rpc_response!(res, sink, label, timer); + // don't drop rx before send response + _ = rx; }; self.threads.spawn(buf_driver); @@ -1392,14 +1484,19 @@ mod test { use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::RegionEpoch, + metapb::{Region, RegionEpoch}, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::Message; + use protobuf::{Message, SingularPtrField}; + use raft::StateRole::Follower; + use raftstore::RegionInfo; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{import::sst_service::RequestCollector, server::raftkv}; + use crate::{ + import::sst_service::{check_local_region_stale, RequestCollector}, + server::raftkv, + }; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1683,4 +1780,72 @@ mod test { } assert_eq!(total, 100); } + + #[test] + fn test_write_rpc_check_region_epoch() { + let mut req_epoch = RegionEpoch { + conf_ver: 10, + version: 10, + ..Default::default() + }; + // test for region not found + let result = check_local_region_stale(1, &req_epoch, None); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + let mut local_region_info = RegionInfo { + region: Region { + id: 1, + region_epoch: SingularPtrField::some(req_epoch.clone()), + ..Default::default() + }, + role: Follower, + buckets: 1, + }; + // test the local region epoch is same as request + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + result.unwrap(); + + // test the local region epoch is ahead of request + local_region_info + .region + .region_epoch + .as_mut() + .unwrap() + .conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + req_epoch.conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + result.unwrap(); + + // test the local region epoch is staler than request + req_epoch.version = 12; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info)); + assert!(result.is_err()); + // check error message contains "retry write later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("retry write later") + ); + } } From f5361da27d6f74070b0641eb72e1f09bd47d5b65 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 27 Oct 2023 14:41:04 +0800 Subject: [PATCH 112/203] doc: Add CPU and heap profiling HTTP API doc (#15852) ref tikv/tikv#15732 Add CPU and heap profiling HTTP API doc Signed-off-by: Connor1996 Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- doc/http.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 doc/http.md diff --git a/doc/http.md b/doc/http.md new file mode 100644 index 000000000000..5aff02e75eae --- /dev/null +++ b/doc/http.md @@ -0,0 +1,142 @@ +# HTTP API + +In the context of the following line: `TIKV_ADDRESS=$TIKV_IP:$TIKV_STATUS_PORT` + +By default: + +- `TIKV_IP` should be set to `127.0.0.1` +- `TIKV_STATUS_PORT` should be set to `20180` + +## CPU Profiling + +Collect and export CPU profiling data within a specified time range. + +```bash +curl -H 'Content-Type:' -X GET 'http://$TIKV_ADDRESS/debug/pprof/profile?seconds=&frequency=' +``` + +#### Parameters + +- **seconds** (optional): Specifies the number of seconds to collect CPU profiling data. + - Default: 10 + - Example: `?seconds=20` + +- **frequency** (optional): Specifies the sampling frequency for CPU profiling data. + - Default: 99 + - Example: `?frequency=100` + +- **type** (optional): Specifies the Content-Type of the response. + - Options: `application/protobuf` for raw profile data, any other types for flame graph. + - Default: `N/A` + - Example: `-H "Content-Type:application/protobuf"` + +#### Response + +The server will return CPU profiling data. The response format is determined by the Content-Type in the request header and can be either raw profile data in protobuf format or flame graph in SVG format. + +The raw profile data can be handled by `pprof` tool. For example, use `go tool pprof --http=0.0.0.0:1234 xxx.proto` to open a interactive web browser. + +## Activate Heap Profiling + +Activate heap profiling of jemalloc. When activated, jemalloc would collect memory usage at malloc, demalloc, etc., walking the call stack to capture a backtrace. So it would affect performance in some extent. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_activate?interval=' +``` + +#### Parameters + +- **interval** (optional): Specifies the interval (in seconds) for dumping heap profiles in a temporary directory under TiKV data directory. If set to 0, period dumping is disable. You can dump heap profiles manually by the other API. + - Default: 0 + - Example: `?interval=60` + +#### Response + +A confirmation message indicating whether heap profiling activation was successful. If it has been already activated, it would return a error message without any side effect. + +## Deactivate Heap Profiling + +Deactivate the currently running heap profiling. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_deactivate' +``` + +#### Response + +If heap profiling is active, it will be stopped. The server will return a message indicating whether the deactivation was successful. +If heap profiling is not currently active, the server will return a message indicating that no heap profiling is running. + +## List Heap Profiles + +List available heap profiling profiles which are periodically dumped when activated by `heap_activate` API with `interval` specified. + +Note that, once deactivation is performed, all existing profiles will be deleted. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_list' +``` + +#### Response + +It will return a list of profiles, each represented as a file name and last modification timestamp, in plain text format. The profiles are sorted in reverse order based on their modification timestamps. + +If there are no available heap profiles or heap profiling is inactive, the server will return an empty list. + +## Retrieve Heap Profile + +Collect and export heap profiling data. + +Note that, heap profile is not like CPU profile which is collected within the specified time range right after the request. Instead, heap profile is just a snapshot of the accumulated memory usage at the time of request, as the memory usage is always being collected once activated. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?name=&jeprof=' +``` + +#### Parameters + +- **name** (optional): Specifies the name of the heap profile to retrieve. If not specified, a heap profile will be retrieved. + - Default: `` + - Example: `?name=000001.heap` + +- **jeprof** (optional): Indicates whether to use Jeprof to process the heap profile to generate call graph. It needs `perl` being installed. + - Default: false + - Example: `?jeprof=true` + +#### Response + +The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format. Otherwise, the response will be raw profile data in jemalloc dedicated format. + +## Heap Profile Symbolization + +The heap profile retrieved by `heap` API by default is a raw profile data in jemalloc dedicated format, which should be handled by `jeporf` to visualize. + +There are two ways to generate a call graph in SVG format from the raw profile data: + +- local: by provided profile and use TiKV binary to resolve symbols + +```bash +jeprof --svg +``` + +- remote: by latest heap profile retrieved by HTTP and use symbolization service provided by TiKV to resolve symbols + +```bash +jeprof --svg http://$TIKV_ADDRESS/debug/pprof/heap +``` + +To support the remote way, TiKV provides a symbolization service to resolve symbols from memory addresses. Jeprof would implicitly call the `.../debug/pprof/symbol` to map call stack's addresses to corresponding function names. For most of the cases, you don't need to +it explicitly. But if you want to use it for other purposes, you can refer as follows. + +```bash +curl -X POST -d '' 'http://$TIKV_ADDRESS/debug/pprof/symbol' +``` + +#### Parameters + +- **address_list** (required): A list of memory addresses to be resolved. The addresses should be provided in hexadecimal format(whether or not start with '0x' is okay), separated by a '+' character. + +#### Response + +A list of resolved symbols in plain text. Each line represented as a hexadecimal address followed by the corresponding function name. If a memory address cannot be resolved, it will be marked with "??". + From 913f783a62b5940186cca6239e4129b2cb3094ea Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 27 Oct 2023 17:06:34 +0800 Subject: [PATCH 113/203] metrics: fix TiKV Detail command regex escape issue (#15858) ref tikv/tikv#15832 Signed-off-by: Neil Shen --- metrics/grafana/tikv_details.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 4a72d3c204a9..8a43cb245549 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -48858,7 +48858,7 @@ "refId": "StandardVariableQuery" }, "refresh": 1, - "regex": "/\btype=\"([^\"]+)\"/", + "regex": "/\\btype=\"([^\"]+)\"/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", @@ -48956,4 +48956,4 @@ "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", "version": 1 -} \ No newline at end of file +} From 36ff6881787f66a5fb234fd1795983910522c2fa Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 27 Oct 2023 17:50:05 +0800 Subject: [PATCH 114/203] metrics: fix 2 incorrect grafana expression (#15860) close tikv/tikv#15859 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/alertmanager/tikv.accelerate.rules.yml | 4 ++-- metrics/alertmanager/tikv.rules.yml | 4 ++-- metrics/grafana/performance_read.json | 4 ++-- metrics/grafana/tikv_details.json | 10 +++++----- metrics/grafana/tikv_fast_tune.json | 10 +++++----- metrics/grafana/tikv_trouble_shooting.json | 8 ++++---- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/metrics/alertmanager/tikv.accelerate.rules.yml b/metrics/alertmanager/tikv.accelerate.rules.yml index 4bc48336c60e..e5ad2daa8cfa 100644 --- a/metrics/alertmanager/tikv.accelerate.rules.yml +++ b/metrics/alertmanager/tikv.accelerate.rules.yml @@ -32,7 +32,7 @@ groups: - record: tikv_pd_request_duration_seconds:avg:1m expr: sum(rate(tikv_pd_request_duration_seconds_sum{instance=~".*"}[1m])) by (type) / sum(rate(tikv_pd_request_duration_seconds_count{instance=~".*"}[1m])) by (type) - record: tikv_coprocessor_request_wait_seconds:p95:1m - expr: histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{instance=~".*"}[1m])) by (le, instance,req)) + expr: histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{instance=~".*", type="all"}[1m])) by (le, instance,req)) - record: tikv_grpc_msg_duration_seconds:avg:1m expr: sum(rate(tikv_grpc_msg_duration_seconds_sum{instance=~".*"}[1m])) by (type) / sum(rate(tikv_grpc_msg_duration_seconds_count[1m])) by (type) - record: tikv_raftstore_apply_wait_time_duration_secs:p99:1m @@ -48,7 +48,7 @@ groups: - record: tikv_coprocessor_request_duration_seconds:1m expr: sum(rate(tikv_coprocessor_request_duration_seconds_bucket{instance=~".*"}[1m])) by (le) - record: tikv_futurepool_pending_task:1m - expr: sum(rate(tikv_futurepool_pending_task_total{instance=~".*"}[1m])) by (name) + expr: sum(avg_over_time(tikv_futurepool_pending_task_total{instance=~".*"}[1m])) by (name) - record: tikv_storage_engine_async_request:1m expr: sum(rate(tikv_storage_engine_async_request_total{instance=~".*", status!~"all|success"}[1m])) by (status) - record: tikv_thread_cpu_seconds_nogrpc:1m diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index e43ca401d42e..bc092562773a 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -98,12 +98,12 @@ groups: summary: TiKV async request write duration seconds more than 1s - alert: TiKV_coprocessor_request_wait_seconds - expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, req)) > 10 + expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{type="all"}[1m])) by (le, instance, req)) > 10 for: 1m labels: env: ENV_LABELS_ENV level: critical - expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, req)) > 10 + expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{type="all"}[1m])) by (le, instance, req)) > 10 annotations: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' value: '{{ $value }}' diff --git a/metrics/grafana/performance_read.json b/metrics/grafana/performance_read.json index caa2635d34c3..aaf24de396e5 100644 --- a/metrics/grafana/performance_read.json +++ b/metrics/grafana/performance_read.json @@ -2686,14 +2686,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le,req))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-100%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 8a43cb245549..cc89e8aeae5a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -297,7 +297,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le))", "hide": false, "interval": "", "legendFormat": "Cop Wait .99", @@ -25117,7 +25117,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{name}}", @@ -26236,14 +26236,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-100%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", @@ -26340,7 +26340,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{req}}", diff --git a/metrics/grafana/tikv_fast_tune.json b/metrics/grafana/tikv_fast_tune.json index 85e9d5c7f02c..f5c3a634c77f 100644 --- a/metrics/grafana/tikv_fast_tune.json +++ b/metrics/grafana/tikv_fast_tune.json @@ -2712,7 +2712,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-.*\"}[1m]))", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-.*\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -5629,7 +5629,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -5645,14 +5645,14 @@ "refId": "A" }, { - "expr": "histogram_quantile(0.999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "duration-999%", "refId": "B" }, { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -5763,7 +5763,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop-normal\"}[1m]))", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop-normal\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, diff --git a/metrics/grafana/tikv_trouble_shooting.json b/metrics/grafana/tikv_trouble_shooting.json index bf1fd5baacfb..f4f5261ad3cc 100644 --- a/metrics/grafana/tikv_trouble_shooting.json +++ b/metrics/grafana/tikv_trouble_shooting.json @@ -3995,14 +3995,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99.99%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", @@ -4010,7 +4010,7 @@ "step": 4 }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-95%", @@ -4234,7 +4234,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{req}}", From 0a34c6f4798e6462739152d2364a202996009984 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:32:05 +0800 Subject: [PATCH 115/203] txn: Fix to the prewrite requests retry problem by using TxnStatusCache (#15658) ref tikv/tikv#11187 Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/test_raftstore-v2/src/util.rs | 2 + components/test_raftstore/src/util.rs | 106 +++- src/storage/config.rs | 10 + src/storage/mod.rs | 505 ++++++++++++++++++ src/storage/mvcc/metrics.rs | 16 + .../txn/commands/acquire_pessimistic_lock.rs | 1 + .../acquire_pessimistic_lock_resumed.rs | 3 + src/storage/txn/commands/atomic_store.rs | 6 +- .../txn/commands/check_secondary_locks.rs | 14 +- src/storage/txn/commands/check_txn_status.rs | 10 + src/storage/txn/commands/cleanup.rs | 1 + src/storage/txn/commands/commit.rs | 1 + src/storage/txn/commands/compare_and_swap.rs | 8 +- .../txn/commands/flashback_to_version.rs | 1 + src/storage/txn/commands/mod.rs | 12 +- src/storage/txn/commands/pause.rs | 1 + .../txn/commands/pessimistic_rollback.rs | 3 + src/storage/txn/commands/prewrite.rs | 48 +- src/storage/txn/commands/resolve_lock.rs | 10 +- src/storage/txn/commands/resolve_lock_lite.rs | 6 + src/storage/txn/commands/rollback.rs | 1 + src/storage/txn/commands/txn_heart_beat.rs | 8 +- src/storage/txn/scheduler.rs | 26 + src/storage/txn/txn_status_cache.rs | 17 + tests/failpoints/cases/test_kv_service.rs | 113 +++- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 27 files changed, 912 insertions(+), 19 deletions(-) diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index af2bab261837..315150e29c26 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -544,6 +544,7 @@ impl PeerClient { &self.cli, self.ctx.clone(), muts, + vec![], pk, ts, 0, @@ -557,6 +558,7 @@ impl PeerClient { &self.cli, self.ctx.clone(), muts, + vec![], pk, ts, 0, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index e88df1fb0ca1..ff47525ea371 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -958,6 +958,7 @@ pub fn must_kv_prewrite_with( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -967,7 +968,7 @@ pub fn must_kv_prewrite_with( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; + prewrite_req.pessimistic_actions = pessimistic_actions; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; @@ -994,6 +995,7 @@ pub fn try_kv_prewrite_with( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -1004,6 +1006,7 @@ pub fn try_kv_prewrite_with( client, ctx, muts, + pessimistic_actions, pk, ts, for_update_ts, @@ -1017,6 +1020,7 @@ pub fn try_kv_prewrite_with_impl( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -1026,7 +1030,7 @@ pub fn try_kv_prewrite_with_impl( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; + prewrite_req.pessimistic_actions = pessimistic_actions; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; @@ -1046,7 +1050,7 @@ pub fn try_kv_prewrite( pk: Vec, ts: u64, ) -> PrewriteResponse { - try_kv_prewrite_with(client, ctx, muts, pk, ts, 0, false, false) + try_kv_prewrite_with(client, ctx, muts, vec![], pk, ts, 0, false, false) } pub fn try_kv_prewrite_pessimistic( @@ -1056,7 +1060,18 @@ pub fn try_kv_prewrite_pessimistic( pk: Vec, ts: u64, ) -> PrewriteResponse { - try_kv_prewrite_with(client, ctx, muts, pk, ts, ts, false, false) + let len = muts.len(); + try_kv_prewrite_with( + client, + ctx, + muts, + vec![DoPessimisticCheck; len], + pk, + ts, + ts, + false, + false, + ) } pub fn must_kv_prewrite( @@ -1066,7 +1081,7 @@ pub fn must_kv_prewrite( pk: Vec, ts: u64, ) { - must_kv_prewrite_with(client, ctx, muts, pk, ts, 0, false, false) + must_kv_prewrite_with(client, ctx, muts, vec![], pk, ts, 0, false, false) } pub fn must_kv_prewrite_pessimistic( @@ -1076,7 +1091,18 @@ pub fn must_kv_prewrite_pessimistic( pk: Vec, ts: u64, ) { - must_kv_prewrite_with(client, ctx, muts, pk, ts, ts, false, false) + let len = muts.len(); + must_kv_prewrite_with( + client, + ctx, + muts, + vec![DoPessimisticCheck; len], + pk, + ts, + ts, + false, + false, + ) } pub fn must_kv_commit( @@ -1232,6 +1258,50 @@ pub fn must_check_txn_status( resp } +pub fn must_kv_have_locks( + client: &TikvClient, + ctx: Context, + ts: u64, + start_key: &[u8], + end_key: &[u8], + expected_locks: &[( + // key + &[u8], + Op, + // start_ts + u64, + // for_update_ts + u64, + )], +) { + let mut req = ScanLockRequest::default(); + req.set_context(ctx); + req.set_limit(100); + req.set_start_key(start_key.to_vec()); + req.set_end_key(end_key.to_vec()); + req.set_max_version(ts); + let resp = client.kv_scan_lock(&req).unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.error.is_none(), "{:?}", resp.get_error()); + + assert_eq!( + resp.locks.len(), + expected_locks.len(), + "lock count not match, expected: {:?}; got: {:?}", + expected_locks, + resp.locks + ); + + for (lock_info, (expected_key, expected_op, expected_start_ts, expected_for_update_ts)) in + resp.locks.into_iter().zip(expected_locks.iter()) + { + assert_eq!(lock_info.get_key(), *expected_key); + assert_eq!(lock_info.get_lock_type(), *expected_op); + assert_eq!(lock_info.get_lock_version(), *expected_start_ts); + assert_eq!(lock_info.get_lock_for_update_ts(), *expected_for_update_ts); + } +} + pub fn get_tso(pd_client: &TestPdClient) -> u64 { block_on(pd_client.get_tso()).unwrap().into_inner() } @@ -1440,11 +1510,31 @@ impl PeerClient { } pub fn must_kv_prewrite_async_commit(&self, muts: Vec, pk: Vec, ts: u64) { - must_kv_prewrite_with(&self.cli, self.ctx.clone(), muts, pk, ts, 0, true, false) + must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + vec![], + pk, + ts, + 0, + true, + false, + ) } pub fn must_kv_prewrite_one_pc(&self, muts: Vec, pk: Vec, ts: u64) { - must_kv_prewrite_with(&self.cli, self.ctx.clone(), muts, pk, ts, 0, false, true) + must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + vec![], + pk, + ts, + 0, + false, + true, + ) } pub fn must_kv_commit(&self, keys: Vec>, start_ts: u64, commit_ts: u64) { diff --git a/src/storage/config.rs b/src/storage/config.rs index a40db2c424b8..91c98ebf57bc 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -31,6 +31,13 @@ const DEFAULT_SCHED_PENDING_WRITE_MB: u64 = 100; const DEFAULT_RESERVED_SPACE_GB: u64 = 5; const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; +// In tests, we've observed 1.2M entries in the TxnStatusCache. We +// conservatively set the limit to 5M entries in total. +// As TxnStatusCache have 128 slots by default. We round it to 5.12M. +// This consumes at most around 300MB memory theoretically, but usually it's +// much less as it's hard to see the capacity being used up. +const DEFAULT_TXN_STATUS_CACHE_CAPACITY: usize = 40_000 * 128; + // Block cache capacity used when TikvConfig isn't validated. It should only // occur in tests. const FALLBACK_BLOCK_CACHE_CAPACITY: ReadableSize = ReadableSize::mb(128); @@ -76,6 +83,8 @@ pub struct Config { pub background_error_recovery_window: ReadableDuration, /// Interval to check TTL for all SSTs, pub ttl_check_poll_interval: ReadableDuration, + #[online_config(skip)] + pub txn_status_cache_capacity: usize, #[online_config(submodule)] pub flow_control: FlowControlConfig, #[online_config(submodule)] @@ -105,6 +114,7 @@ impl Default for Config { api_version: 1, enable_ttl: false, ttl_check_poll_interval: ReadableDuration::hours(12), + txn_status_cache_capacity: DEFAULT_TXN_STATUS_CACHE_CAPACITY, flow_control: FlowControlConfig::default(), block_cache: BlockCacheConfig::default(), io_rate_limit: IoRateLimitConfig::default(), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cb4057bfd7e2..cc48d9e36e3b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3853,6 +3853,7 @@ mod tests { commands, commands::{AcquirePessimisticLock, Prewrite}, tests::must_rollback, + txn_status_cache::TxnStatusCache, Error as TxnError, ErrorInner as TxnErrorInner, }, types::{PessimisticLockKeyResult, PessimisticLockResults}, @@ -3884,6 +3885,7 @@ mod tests { statistics: &mut Statistics::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -10869,4 +10871,507 @@ mod tests { // Prewrite still succeeds rx.recv().unwrap().unwrap(); } + + #[test] + fn test_prewrite_cached_committed_transaction_do_not_skip_constraint_check() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let cm = storage.concurrency_manager.clone(); + let k1 = Key::from_raw(b"k1"); + let pk = b"pk"; + // Simulate the case that the current TiKV instance have a non-unique + // index key of a pessimistic transaction. It won't be pessimistic + // locked, and prewrite skips constraint checks. + // Simulate the case that a prewrite is performed twice, with async + // commit enabled, and max_ts changes when the second request arrives. + + // A retrying prewrite request arrives. + cm.update_max_ts(20.into()); + let mut ctx = Context::default(); + ctx.set_is_retry_request(true); + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(k1.clone(), b"v".to_vec()), + SkipPessimisticCheck, + )], + pk.to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + ctx, + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.min_commit_ts, 21.into()); + + // Commit it. + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::Commit::new(vec![k1.clone()], 10.into(), 21.into(), Context::default()), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // The txn's status is cached + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 21.into() + ); + + // Check committed; push max_ts to 30 + assert_eq!( + block_on(storage.get(Context::default(), k1.clone(), 30.into())) + .unwrap() + .0, + Some(b"v".to_vec()) + ); + + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(k1.clone(), b"v".to_vec()), + SkipPessimisticCheck, + )], + pk.to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + Context::default(), + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.min_commit_ts, 21.into()); + + // Key must not be locked. + assert_eq!( + block_on(storage.get(Context::default(), k1, 50.into())) + .unwrap() + .0, + Some(b"v".to_vec()) + ); + } + + #[test] + fn test_updating_txn_status_cache() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let cm = storage.concurrency_manager.clone(); + + // Commit + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), + SkipPessimisticCheck, + )], + b"k1".to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .is_none() + ); + + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k1")], + 10.into(), + 20.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 20.into() + ); + + // Unsuccessful commit won't update cache + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k2")], + 30.into(), + 40.into(), + Context::default(), + ), + expect_fail_callback(tx, 0, |_| ()), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(30.into()) + .is_none() + ); + + // 1PC update + let (tx, rx) = channel(); + cm.update_max_ts(59.into()); + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k3"), b"v3".to_vec())], + b"k3".to_vec(), + 50.into(), + 3000, + false, + 1, + 51.into(), + 0.into(), + Some(vec![]), + true, + AssertionLevel::Off, + Context::default(), + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.one_pc_commit_ts, 60.into()); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(50.into()) + .unwrap(), + 60.into() + ); + + // Resolve lock commit + let (tx, rx) = channel(); + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k4"), b"v4".to_vec())], + b"pk".to_vec(), + 70.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + storage + .sched_txn_command( + commands::ResolveLockReadPhase::new( + vec![(TimeStamp::from(70), TimeStamp::from(80))] + .into_iter() + .collect(), + None, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(70.into()) + .unwrap(), + 80.into() + ); + + // Resolve lock lite + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k5"), b"v5".to_vec())], + b"pk".to_vec(), + 90.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + storage + .sched_txn_command( + commands::ResolveLockLite::new( + 90.into(), + 100.into(), + vec![Key::from_raw(b"k5")], + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(90.into()) + .unwrap(), + 100.into() + ); + + // CheckTxnStatus: uncommitted transaction + storage + .sched_txn_command( + commands::CheckTxnStatus::new( + Key::from_raw(b"k1"), + 9.into(), + 110.into(), + 110.into(), + true, + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(9.into()) + .is_none() + ); + + // CheckTxnStatus: committed transaction + storage.sched.get_txn_status_cache().remove(10.into()); + storage + .sched_txn_command( + commands::CheckTxnStatus::new( + Key::from_raw(b"k1"), + 10.into(), + 110.into(), + 110.into(), + true, + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 20.into() + ); + + // CheckSecondaryLocks: uncommitted transaction + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k6"), b"v6".to_vec())], + b"pk".to_vec(), + 120.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Lock exists but the transaction status is still unknown + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k6")], + 120.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(120.into()) + .is_none() + ); + + // One of the lock doesn't exist so the transaction becomes rolled-back status. + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k6"), Key::from_raw(b"k7")], + 120.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(120.into()) + .is_none() + ); + + // CheckSecondaryLocks: committed transaction + storage + .sched_txn_command( + Prewrite::new( + vec![ + Mutation::make_put(Key::from_raw(b"k8"), b"v8".to_vec()), + Mutation::make_put(Key::from_raw(b"k9"), b"v9".to_vec()), + ], + b"pk".to_vec(), + 130.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + // Commit one of the key + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k9")], + 130.into(), + 140.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .remove(130.into()) + .unwrap(), + 140.into() + ); + + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k8"), Key::from_raw(b"k9")], + 130.into(), + Context::default(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(130.into()) + .unwrap(), + 140.into() + ); + } } diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index 3c4bda63f7e0..22d2760a7692 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -51,6 +51,13 @@ make_static_metric! { pub struct MvccPrewriteAssertionPerfCounterVec: IntCounter { "type" => MvccPrewriteAssertionPerfKind, } + + pub struct MvccPrewriteRequestAfterCommitCounterVec: IntCounter { + "type" => { + non_retry_req, + retry_req, + }, + } } lazy_static! { @@ -104,4 +111,13 @@ lazy_static! { ) .unwrap() }; + pub static ref MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC: MvccPrewriteRequestAfterCommitCounterVec = { + register_static_int_counter_vec!( + MvccPrewriteRequestAfterCommitCounterVec, + "tikv_storage_mvcc_prewrite_request_after_commit_counter", + "Counter of prewrite requests of already-committed transactions that are determined by checking TxnStatucCache", + &["type"] + ) + .unwrap() + }; } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 58c33706bbce..ceb7957c9260 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -183,6 +183,7 @@ impl WriteCommand for AcquirePessimisticLock new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs index 7640edd7c0c7..a1e2e6fc119b 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -194,6 +194,7 @@ impl WriteCommand for AcquirePessimisticLockR new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, + known_txn_status: vec![], }) } } @@ -239,6 +240,7 @@ mod tests { txn::{ commands::pessimistic_rollback::tests::must_success as must_pessimistic_rollback, tests::{must_commit, must_pessimistic_locked, must_prewrite_put, must_rollback}, + txn_status_cache::TxnStatusCache, }, TestEngineBuilder, }; @@ -275,6 +277,7 @@ mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 9a54895e7e20..4bca5d514c55 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -63,6 +63,7 @@ impl WriteCommand for RawAtomicStore { new_acquired_locks: vec![], lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -77,7 +78,9 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Statistics, TestEngineBuilder, + lock_manager::MockLockManager, + txn::{scheduler::get_raw_ext, txn_status_cache::TxnStatusCache}, + Statistics, TestEngineBuilder, }; #[test] @@ -116,6 +119,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 92985c4d90d5..ceb169f79b2a 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -201,6 +201,12 @@ impl WriteCommand for CheckSecondaryLocks { } } + let write_result_known_txn_status = + if let SecondaryLocksStatus::Committed(commit_ts) = &result { + vec![(self.start_ts, *commit_ts)] + } else { + vec![] + }; let mut rows = 0; if let SecondaryLocksStatus::RolledBack = &result { // One row is mutated only when a secondary lock is rolled back. @@ -220,6 +226,7 @@ impl WriteCommand for CheckSecondaryLocks { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: write_result_known_txn_status, }) } } @@ -235,7 +242,10 @@ pub mod tests { kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, + txn::{ + commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, + }, Engine, }; @@ -265,6 +275,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -303,6 +314,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index dc99ebf3b019..9e9a6cc08952 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -131,6 +131,12 @@ impl WriteCommand for CheckTxnStatus { let mut released_locks = ReleasedLocks::new(); released_locks.push(released); + let write_result_known_txn_status = if let TxnStatus::Committed { commit_ts } = &txn_status + { + vec![(self.lock_ts, *commit_ts)] + } else { + vec![] + }; let pr = ProcessResult::TxnStatus { txn_status }; let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); @@ -145,6 +151,7 @@ impl WriteCommand for CheckTxnStatus { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: write_result_known_txn_status, }) } } @@ -168,6 +175,7 @@ pub mod tests { commands::{pessimistic_rollback, WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, }, types::TxnStatus, ProcessResult, TestEngineBuilder, @@ -211,6 +219,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -259,6 +268,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .map(|r| { diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index 302c4fe1308b..886094a7f34e 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -80,6 +80,7 @@ impl WriteCommand for Cleanup { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 4f05df8fe838..8daff9b2aeec 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -80,6 +80,7 @@ impl WriteCommand for Commit { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![(self.lock_ts, self.commit_ts)], }) } } diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index ca9213b57d36..3725de47273f 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -117,6 +117,7 @@ impl WriteCommand for RawCompareAndSwap { new_acquired_locks: vec![], lock_guards, response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -134,8 +135,9 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Engine, Statistics, - TestEngineBuilder, + lock_manager::MockLockManager, + txn::{scheduler::get_raw_ext, txn_status_cache::TxnStatusCache}, + Engine, Statistics, TestEngineBuilder, }; #[test] @@ -215,6 +217,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; match ret.pr { @@ -269,6 +272,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 37d288fa2665..efbeefa24949 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -185,6 +185,7 @@ impl WriteCommand for FlashbackToVersion { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 5896d6562f12..dabef707e616 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -70,7 +70,7 @@ use crate::storage::{ }, metrics, mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, - txn::{latch, ProcessResult, Result}, + txn::{latch, txn_status_cache::TxnStatusCache, ProcessResult, Result}, types::{ MvccInfo, PessimisticLockParameters, PessimisticLockResults, PrewriteResult, SecondaryLocksStatus, StorageCallbackType, TxnStatus, @@ -422,6 +422,12 @@ pub struct WriteResult { pub new_acquired_locks: Vec, pub lock_guards: Vec, pub response_policy: ResponsePolicy, + /// The txn status that can be inferred by the successful writing. This will + /// be used to update the cache. + /// + /// Currently only commit_ts of committed transactions will be collected. + /// Rolled-back transactions may also be collected in the future. + pub known_txn_status: Vec<(TimeStamp, TimeStamp)>, } pub struct WriteResultLockInfo { @@ -573,6 +579,7 @@ pub struct WriteContext<'a, L: LockManager> { pub statistics: &'a mut Statistics, pub async_apply_prewrite: bool, pub raw_ext: Option, // use for apiv2 + pub txn_status_cache: &'a TxnStatusCache, } pub struct ReaderWithStats<'a, S: Snapshot> { @@ -823,6 +830,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; let res = match ret.pr { @@ -983,6 +991,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; @@ -1008,6 +1017,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 5d3aa7f6d2f1..1f5d40b2d4e3 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -53,6 +53,7 @@ impl WriteCommand for Pause { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 4e0bf8c8c568..531eb256c404 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -96,6 +96,7 @@ impl WriteCommand for PessimisticRollback { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -116,6 +117,7 @@ pub mod tests { commands::{WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, }, TestEngineBuilder, }; @@ -146,6 +148,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let result = command.process_write(snapshot, write_context).unwrap(); write(engine, &ctx, result.to_be_write.modifies); diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 10446db6292b..34c98dab156a 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -24,7 +24,7 @@ use crate::storage::{ kv::WriteData, lock_manager::LockManager, mvcc::{ - has_data_in_range, Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, + has_data_in_range, metrics::*, Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, TxnCommitRecord, }, txn::{ @@ -489,6 +489,36 @@ impl Prewriter { snapshot: impl Snapshot, mut context: WriteContext<'_, impl LockManager>, ) -> Result { + // Handle special cases about retried prewrite requests for pessimistic + // transactions. + if let TransactionKind::Pessimistic(_) = self.kind.txn_kind() { + if let Some(commit_ts) = context.txn_status_cache.get_no_promote(self.start_ts) { + fail_point!("before_prewrite_txn_status_cache_hit"); + if self.ctx.is_retry_request { + MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC + .retry_req + .inc(); + } else { + MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC + .non_retry_req + .inc(); + } + warn!("prewrite request received due to transaction is known to be already committed"; "start_ts" => %self.start_ts, "commit_ts" => %commit_ts); + // In normal cases if the transaction is committed, then the key should have + // been already prewritten successfully. But in order to + // simplify code as well as prevent possible corner cases or + // special cases in the future, we disallow skipping constraint + // check in this case. + // We regard this request as a retried request no matter if it really is (the + // original request may arrive later than retried request due to + // network latency, in which case we'd better handle it like a + // retried request). + self.ctx.is_retry_request = true; + } else { + fail_point!("before_prewrite_txn_status_cache_miss"); + } + } + self.kind .can_skip_constraint_check(&mut self.mutations, &snapshot, &mut context)?; self.check_max_ts_synced(&snapshot)?; @@ -748,6 +778,11 @@ impl Prewriter { new_acquired_locks, lock_guards, response_policy: ResponsePolicy::OnApplied, + known_txn_status: if !one_pc_commit_ts.is_zero() { + vec![(self.start_ts, one_pc_commit_ts)] + } else { + vec![] + }, } } else { // Skip write stage if some keys are locked. @@ -768,6 +803,7 @@ impl Prewriter { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], } }; @@ -1002,6 +1038,7 @@ mod tests { must_acquire_pessimistic_lock, must_acquire_pessimistic_lock_err, must_commit, must_prewrite_put_err_impl, must_prewrite_put_impl, must_rollback, }, + txn_status_cache::TxnStatusCache, Error, ErrorInner, }, types::TxnStatus, @@ -1647,6 +1684,7 @@ mod tests { statistics: &mut Statistics::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), } }; } @@ -1818,6 +1856,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: case.async_apply_prewrite, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let mut engine = TestEngineBuilder::new().build().unwrap(); let snap = engine.snapshot(Default::default()).unwrap(); @@ -1932,6 +1971,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -1960,6 +2000,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2043,6 +2084,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2075,6 +2117,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2345,6 +2388,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2369,6 +2413,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2575,6 +2620,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let res = prewrite_cmd.cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index f3d141807e8e..cd01fc60475c 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -83,6 +83,7 @@ impl WriteCommand for ResolveLock { let mut scan_key = self.scan_key.take(); let rows = key_locks.len(); let mut released_locks = ReleasedLocks::new(); + let mut known_txn_status = vec![]; for (current_key, current_lock) in key_locks { txn.start_ts = current_lock.ts; reader.start_ts = current_lock.ts; @@ -103,7 +104,10 @@ impl WriteCommand for ResolveLock { // type. They could be left if the transaction is finally committed and // pessimistic conflict retry happens during execution. match commit(&mut txn, &mut reader, current_key.clone(), commit_ts) { - Ok(res) => res, + Ok(res) => { + known_txn_status.push((current_lock.ts, commit_ts)); + res + } Err(MvccError(box MvccErrorInner::TxnLockNotFound { .. })) if current_lock.is_pessimistic_lock() => { @@ -125,6 +129,9 @@ impl WriteCommand for ResolveLock { } } + known_txn_status.sort(); + known_txn_status.dedup(); + let pr = if scan_key.is_none() { ProcessResult::Res } else { @@ -151,6 +158,7 @@ impl WriteCommand for ResolveLock { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status, }) } } diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index 63fe201596d4..318e5d573138 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -63,6 +63,11 @@ impl WriteCommand for ResolveLockLite { }); } + let known_txn_status = if !self.commit_ts.is_zero() { + vec![(self.start_ts, self.commit_ts)] + } else { + vec![] + }; let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -76,6 +81,7 @@ impl WriteCommand for ResolveLockLite { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status, }) } } diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index f3b674f49160..df60767e7167 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -71,6 +71,7 @@ impl WriteCommand for Rollback { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 448395fc4366..c900464099a8 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -96,6 +96,7 @@ impl WriteCommand for TxnHeartBeat { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -111,7 +112,10 @@ pub mod tests { kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, + txn::{ + commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, + }, Engine, }; @@ -143,6 +147,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -185,6 +190,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .is_err() diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3c6a66c3941c..36492f227011 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -83,6 +83,7 @@ use crate::{ flow_controller::FlowController, latch::{Latches, Lock}, sched_pool::{tls_collect_query, tls_collect_scan_details, SchedPool}, + txn_status_cache::TxnStatusCache, Error, ErrorInner, ProcessResult, }, types::StorageCallback, @@ -293,6 +294,8 @@ struct TxnSchedulerInner { quota_limiter: Arc, resource_manager: Option>, feature_gate: FeatureGate, + + txn_status_cache: TxnStatusCache, } #[inline] @@ -484,6 +487,7 @@ impl TxnScheduler { quota_limiter, resource_manager, feature_gate, + txn_status_cache: TxnStatusCache::new(config.txn_status_cache_capacity), }); slow_log!( @@ -815,6 +819,7 @@ impl TxnScheduler { pipelined: bool, async_apply_prewrite: bool, new_acquired_locks: Vec, + known_txn_status: Vec<(TimeStamp, TimeStamp)>, tag: CommandKind, metadata: TaskMetadata<'_>, sched_details: &SchedulerDetails, @@ -837,6 +842,17 @@ impl TxnScheduler { debug!("write command finished"; "cid" => cid, "pipelined" => pipelined, "async_apply_prewrite" => async_apply_prewrite); drop(lock_guards); + + if result.is_ok() && !known_txn_status.is_empty() { + // Update cache before calling the callback. + // Reversing the order can lead to test failures as the cache may still + // remain not updated after receiving signal from the callback. + let now = std::time::SystemTime::now(); + for (start_ts, commit_ts) in known_txn_status { + self.inner.txn_status_cache.insert(start_ts, commit_ts, now); + } + } + let tctx = self.inner.dequeue_task_context(cid); let mut do_wake_up = !tctx.woken_up_resumable_lock_requests.is_empty(); @@ -1258,6 +1274,7 @@ impl TxnScheduler { statistics: &mut sched_details.stat, async_apply_prewrite: self.inner.enable_async_apply_prewrite, raw_ext, + txn_status_cache: &self.inner.txn_status_cache, }; let begin_instant = Instant::now(); let res = unsafe { @@ -1328,6 +1345,7 @@ impl TxnScheduler { new_acquired_locks, lock_guards, response_policy, + known_txn_status, } = match deadline .check() .map_err(StorageError::from) @@ -1406,6 +1424,7 @@ impl TxnScheduler { false, false, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1441,6 +1460,7 @@ impl TxnScheduler { false, false, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1636,6 +1656,7 @@ impl TxnScheduler { pipelined, is_async_apply_prewrite, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1879,6 +1900,11 @@ impl TxnScheduler { .push_lock_wait(entry, Default::default()); } } + + #[cfg(test)] + pub fn get_txn_status_cache(&self) -> &TxnStatusCache { + &self.inner.txn_status_cache + } } pub async fn get_raw_ext( diff --git a/src/storage/txn/txn_status_cache.rs b/src/storage/txn/txn_status_cache.rs index 2428bbb99c58..ab50bd0412e3 100644 --- a/src/storage/txn/txn_status_cache.rs +++ b/src/storage/txn/txn_status_cache.rs @@ -371,6 +371,23 @@ impl TxnStatusCache { let mut slot = self.slots[self.slot_index(start_ts)].lock(); slot.get(&start_ts).map(|entry| entry.commit_ts) } + + /// Remove an entry from the cache. We usually don't need to remove anything + /// from the `TxnStatusCache`, but it's useful in tests to construct cache- + /// miss cases. + #[cfg(test)] + pub fn remove(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let res = { + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + slot.remove(&start_ts).map(|e| e.commit_ts) + }; + debug_assert!(self.get_no_promote(start_ts).is_none()); + res + } } #[cfg(test)] diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index 00f5c3c778e1..2ec1109edd4d 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -3,10 +3,14 @@ use std::{sync::Arc, time::Duration}; use grpcio::{ChannelBuilder, Environment}; -use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; +use kvproto::{ + kvrpcpb::{PrewriteRequestPessimisticAction::SkipPessimisticCheck, *}, + tikvpb::TikvClient, +}; use test_raftstore::{ - configure_for_lease_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, - must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with_impl, + configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_prewrite, + must_kv_prewrite_with, must_new_cluster_and_kv_client, must_new_cluster_mul, + new_server_cluster, try_kv_prewrite_with, try_kv_prewrite_with_impl, }; use tikv_util::{config::ReadableDuration, HandyRwLock}; @@ -92,6 +96,7 @@ fn test_undetermined_write_err() { &client, ctx, vec![mutation], + vec![], b"k".to_vec(), 10, 0, @@ -156,3 +161,105 @@ fn test_stale_read_on_local_leader() { assert!(resp.region_error.is_none()); assert_eq!(v, resp.get_value()); } + +#[test] +fn test_storage_do_not_update_txn_status_cache_on_write_error() { + let cache_hit_fp = "before_prewrite_txn_status_cache_hit"; + let cache_miss_fp = "before_prewrite_txn_status_cache_miss"; + + let (cluster, leader, ctx) = must_new_cluster_mul(1); + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env) + .connect(&cluster.sim.read().unwrap().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let pk = b"pk".to_vec(); + + // Case 1: Test write successfully. + + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(b"k1".to_vec()); + mutation.set_value(b"v1".to_vec()); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation.clone()], + vec![SkipPessimisticCheck], + pk.clone(), + 10, + 10, + true, + false, + ); + must_kv_commit(&client, ctx.clone(), vec![b"k1".to_vec()], 10, 15, 15); + + // Expect cache hit + fail::cfg(cache_miss_fp, "panic").unwrap(); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![SkipPessimisticCheck], + pk.clone(), + 10, + 10, + true, + false, + ); + // Key not locked. + must_kv_have_locks(&client, ctx.clone(), 19, b"k1", b"k2", &[]); + fail::remove(cache_miss_fp); + + // Case 2: Write failed. + + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(b"k2".to_vec()); + mutation.set_value(b"v2".to_vec()); + + try_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation.clone()], + vec![SkipPessimisticCheck], + pk.clone(), + 20, + 20, + true, + false, + ); + fail::cfg("raftkv_early_error_report", "return").unwrap(); + let mut commit_req = CommitRequest::default(); + commit_req.set_context(ctx.clone()); + commit_req.set_start_version(20); + commit_req.set_commit_version(25); + commit_req.set_keys(vec![b"k2".to_vec()].into()); + let commit_resp = client.kv_commit(&commit_req).unwrap(); + assert!(commit_resp.has_region_error()); + fail::remove("raftkv_early_error_report"); + must_kv_have_locks( + &client, + ctx.clone(), + 29, + b"k2", + b"k3", + &[(b"k2", Op::Put, 20, 20)], + ); + + // Expect cache miss + fail::cfg(cache_hit_fp, "panic").unwrap(); + try_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![SkipPessimisticCheck], + pk, + 20, + 20, + true, + false, + ); + must_kv_have_locks(&client, ctx, 29, b"k2", b"k3", &[(b"k2", Op::Put, 20, 20)]); + fail::remove(cache_hit_fp); +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 1ac6e3840f1a..2f4f5ba76956 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -761,6 +761,7 @@ fn test_serde_custom_tikv_config() { other_priority: IoPriority::Low, }, background_error_recovery_window: ReadableDuration::hours(1), + txn_status_cache_capacity: 1000, }; value.coprocessor = CopConfig { split_region_on_table: false, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index fe1fa066ae8d..1bb52fad5fc5 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -101,6 +101,7 @@ reserve-space = "10GB" reserve-raft-space = "2GB" enable-ttl = true ttl-check-poll-interval = "0s" +txn-status-cache-capacity = 1000 [storage.block-cache] capacity = "40GB" From 4093bda19289c12e201d6d940f18aa5beca0975a Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 1 Nov 2023 01:35:06 +0800 Subject: [PATCH 116/203] tidb_query_expr: fix the behavior of `field` function (#15879) close tikv/tikv#15878 Signed-off-by: Yang Keao --- components/tidb_query_expr/src/impl_string.rs | 52 +++++++++++++++---- components/tidb_query_expr/src/lib.rs | 10 +++- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index f3b9b03c287d..25c9294d533a 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -635,15 +635,22 @@ fn field(args: &[Option<&T>]) -> Result #[rpn_fn(nullable, varg, min_args = 1)] #[inline] -fn field_bytes(args: &[Option]) -> Result> { +fn field_bytes(args: &[Option]) -> Result> { Ok(Some(match args[0] { // As per the MySQL doc, if the first argument is NULL, this function always returns 0. None => 0, - Some(val) => args - .iter() - .skip(1) - .position(|&i| i == Some(val)) - .map_or(0, |pos| (pos + 1) as i64), + Some(val) => { + for (pos, arg) in args.iter().enumerate().skip(1) { + if arg.is_none() { + continue; + } + match C::sort_compare(val, arg.unwrap()) { + Ok(Ordering::Equal) => return Ok(Some(pos as i64)), + _ => continue, + } + } + 0 + } })) } @@ -3214,6 +3221,7 @@ mod tests { Some(b"baz".to_vec()), ], Some(1), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3223,6 +3231,7 @@ mod tests { Some(b"hello".to_vec()), ], Some(0), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3232,6 +3241,7 @@ mod tests { Some(b"hello".to_vec()), ], Some(3), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3244,6 +3254,7 @@ mod tests { Some(b"Hello".to_vec()), ], Some(6), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3252,14 +3263,37 @@ mod tests { Some(b"Hello World!".to_vec()), ], Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![None, None, Some(b"Hello World!".to_vec())], + Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![Some(b"Hello World!".to_vec())], + Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![ + Some(b"a".to_vec()), + Some(b"A".to_vec()), + Some(b"a".to_vec()), + ], + Some(1), + Collation::Utf8Mb4GeneralCi, ), - (vec![None, None, Some(b"Hello World!".to_vec())], Some(0)), - (vec![Some(b"Hello World!".to_vec())], Some(0)), ]; - for (args, expect_output) in test_cases { + for (args, expect_output, collation) in test_cases { let output = RpnFnScalarEvaluator::new() .push_params(args) + .return_field_type( + FieldTypeBuilder::new() + .tp(FieldTypeTp::Long) + .collation(collation), + ) .evaluate(ScalarFuncSig::FieldString) .unwrap(); assert_eq!(output, expect_output); diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index c2ef67221486..61fb3612b632 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -409,6 +409,14 @@ fn map_lower_utf8_sig(value: ScalarFuncSig, children: &[Expr]) -> Result Result { + Ok(match_template_collator! { + TT, match ret_field_type.as_accessor().collation().map_err(tidb_query_datatype::codec::Error::from)? { + Collation::TT => field_bytes_fn_meta::() + } + }) +} + #[rustfmt::skip] fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { let value = expr.get_sig(); @@ -787,7 +795,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::Locate3Args => locate_3_args_fn_meta(), ScalarFuncSig::FieldInt => field_fn_meta::(), ScalarFuncSig::FieldReal => field_fn_meta::(), - ScalarFuncSig::FieldString => field_bytes_fn_meta(), + ScalarFuncSig::FieldString => map_field_string_sig(ft)?, ScalarFuncSig::Elt => elt_fn_meta(), ScalarFuncSig::MakeSet => make_set_fn_meta(), ScalarFuncSig::Space => space_fn_meta(), From 2a24cfc4b25de341cf6b93727d11dce0c0648a5b Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Tue, 31 Oct 2023 11:54:36 -0700 Subject: [PATCH 117/203] rafstore, engine_rocks: periodic full compaction (#12729) (#15853) ref tikv/tikv#12729 Signed-off-by: Alex Feinberg Co-authored-by: lucasliang --- Cargo.lock | 2 + components/online_config/Cargo.toml | 1 + components/online_config/src/lib.rs | 7 + components/raftstore/Cargo.toml | 1 + components/raftstore/src/store/config.rs | 16 +- components/raftstore/src/store/fsm/store.rs | 72 ++++- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/msg.rs | 2 + .../raftstore/src/store/worker/compact.rs | 75 +++++- .../raftstore/src/store/worker/metrics.rs | 5 + components/tikv_util/src/config.rs | 248 ++++++++++++++++++ tests/integrations/config/mod.rs | 4 +- 12 files changed, 429 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52408df1ab2d..09459fd9123a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3565,6 +3565,7 @@ checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" name = "online_config" version = "0.1.0" dependencies = [ + "chrono", "online_config_derive", "serde", "serde_derive", @@ -4359,6 +4360,7 @@ dependencies = [ "byteorder", "bytes", "causal_ts", + "chrono", "collections", "concurrency_manager", "crc32fast", diff --git a/components/online_config/Cargo.toml b/components/online_config/Cargo.toml index 9d67f1cf1deb..47e8996391c4 100644 --- a/components/online_config/Cargo.toml +++ b/components/online_config/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" publish = false [dependencies] +chrono = "0.4" online_config_derive = { path = "./online_config_derive" } serde = { version = "1.0", features = ["derive"] } diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 45694305a5f3..5fec0cea9bc2 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -5,9 +5,12 @@ use std::{ fmt::{self, Debug, Display, Formatter}, }; +use chrono::{FixedOffset, NaiveTime}; pub use online_config_derive::*; pub type ConfigChange = HashMap; +pub type OffsetTime = (NaiveTime, FixedOffset); +pub type Schedule = Vec; #[derive(Clone, PartialEq)] pub enum ConfigValue { @@ -21,6 +24,8 @@ pub enum ConfigValue { Bool(bool), String(String), Module(ConfigChange), + OffsetTime(OffsetTime), + Schedule(Schedule), Skip, None, } @@ -38,6 +43,8 @@ impl Display for ConfigValue { ConfigValue::Bool(v) => write!(f, "{}", v), ConfigValue::String(v) => write!(f, "{}", v), ConfigValue::Module(v) => write!(f, "{:?}", v), + ConfigValue::OffsetTime((t, o)) => write!(f, "{} {}", t, o), + ConfigValue::Schedule(v) => write!(f, "{:?}", v), ConfigValue::Skip => write!(f, "ConfigValue::Skip"), ConfigValue::None => write!(f, ""), } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 1933bad6da90..3a8caa421e58 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -29,6 +29,7 @@ bitflags = "1.0.1" byteorder = "1.2" bytes = "1.0" causal_ts = { workspace = true } +chrono = "0.4" collections = { workspace = true } concurrency_manager = { workspace = true } crc32fast = "1.2" diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 95c4aed93499..3d1b58a6e751 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use serde_with::with_prefix; use tikv_util::{ box_err, - config::{ReadableDuration, ReadableSize, VersionTrack}, + config::{ReadableDuration, ReadableSchedule, ReadableSize, VersionTrack}, error, info, sys::SysQuota, warn, @@ -152,6 +152,15 @@ pub struct Config { pub lock_cf_compact_interval: ReadableDuration, pub lock_cf_compact_bytes_threshold: ReadableSize, + /// Hours of the day during which we may execute a periodic full compaction. + /// If not set or empty, periodic full compaction will not run. In toml this + /// should be a list of timesin "HH:MM" format with an optional timezone + /// offset. If no timezone is specified, local timezone is used. E.g., + /// `["23:00 +0000", "03:00 +0700"]` or `["23:00", "03:00"]`. + pub periodic_full_compact_start_times: ReadableSchedule, + /// Do not start a full compaction if cpu utilization exceeds this number. + pub periodic_full_compact_start_max_cpu: f64, + #[online_config(skip)] pub notify_capacity: usize, pub messages_per_tick: usize, @@ -435,6 +444,11 @@ impl Default for Config { region_compact_redundant_rows_percent: None, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), + // Disable periodic full compaction by default. + periodic_full_compact_start_times: ReadableSchedule::default(), + // If periodic full compaction is enabled, do not start a full compaction + // if the CPU utilization is over 10%. + periodic_full_compact_start_max_cpu: 0.1, notify_capacity: 40960, snap_mgr_gc_tick_interval: ReadableDuration::minutes(1), snap_gc_timeout: ReadableDuration::hours(4), diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2efcbf87b09e..950768055e46 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -59,8 +59,11 @@ use tikv_util::{ mpsc::{self, LooseBoundedSender, Receiver}, slow_log, store::{find_peer, region_on_stores}, - sys as sys_util, - sys::disk::{get_disk_status, DiskUsage}, + sys::{ + self as sys_util, + cpu_time::ProcessStat, + disk::{get_disk_status, DiskUsage}, + }, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, timer::SteadyTimer, warn, @@ -117,6 +120,10 @@ pub const PENDING_MSG_CAP: usize = 100; pub const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region +// Every 30 minutes, check if we can run full compaction. This allows the config +// setting `periodic_full_compact_start_max_cpu` to be changed dynamically. +const PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION: Duration = Duration::from_secs(30 * 60); + pub struct StoreInfo { pub kv_engine: EK, pub raft_engine: ER, @@ -768,6 +775,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::SnapGc => self.on_snap_mgr_gc(), StoreTick::CompactLockCf => self.on_compact_lock_cf(), StoreTick::CompactCheck => self.on_compact_check_tick(), + StoreTick::PeriodicFullCompact => self.on_full_compact_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), } @@ -858,6 +866,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.fsm.store.start_time = Some(time::get_time()); self.register_cleanup_import_sst_tick(); self.register_compact_check_tick(); + self.register_full_compact_tick(); self.register_pd_store_heartbeat_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); @@ -2436,6 +2445,65 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn register_full_compact_tick(&self) { + if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { + self.ctx.schedule_store_tick( + StoreTick::PeriodicFullCompact, + PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION, + ) + } + } + + fn on_full_compact_tick(&mut self) { + self.register_full_compact_tick(); + + let local_time = chrono::Local::now(); + if !self + .ctx + .cfg + .periodic_full_compact_start_times + .is_scheduled_this_hour(&local_time) + { + debug!( + "full compaction may not run at this time"; + "local_time" => ?local_time, + "periodic_full_compact_start_times" => ?self.ctx.cfg.periodic_full_compact_start_times, + ); + return; + } + + if self.ctx.global_stat.stat.is_busy.load(Ordering::SeqCst) { + warn!("full compaction may not run at this time, `is_busy` flag is true",); + return; + } + + let mut proc_stats = ProcessStat::cur_proc_stat().unwrap(); + let cpu_usage = proc_stats.cpu_usage().unwrap(); + let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; + if cpu_usage > max_start_cpu_usage { + warn!( + "full compaction may not run at this time, cpu usage is above max"; + "cpu_usage" => cpu_usage, + "threshold" => max_start_cpu_usage, + ); + return; + } + + // Attempt executing a periodic full compaction. + // Note that full compaction will not run if other compaction tasks are running. + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::Compact(CompactTask::PeriodicFullCompact)) + { + error!( + "failed to schedule a periodic full compaction"; + "store_id" => self.fsm.store.id, + "err" => ?e + ); + } + } + fn register_compact_check_tick(&self) { self.ctx.schedule_store_tick( StoreTick::CompactCheck, diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index a4f2b7820cb0..8f7bc8af2264 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -214,6 +214,7 @@ make_static_metric! { pub label_enum RaftEventDurationType { compact_check, + periodic_full_compact, pd_store_heartbeat, snap_gc, compact_lock_cf, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a92e5169549d..0d703143a08b 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -435,6 +435,7 @@ impl PeerTick { #[derive(Debug, Clone, Copy)] pub enum StoreTick { CompactCheck, + PeriodicFullCompact, PdStoreHeartbeat, SnapGc, CompactLockCf, @@ -447,6 +448,7 @@ impl StoreTick { pub fn tag(self) -> RaftEventDurationType { match self { StoreTick::CompactCheck => RaftEventDurationType::compact_check, + StoreTick::PeriodicFullCompact => RaftEventDurationType::periodic_full_compact, StoreTick::PdStoreHeartbeat => RaftEventDurationType::pd_store_heartbeat, StoreTick::SnapGc => RaftEventDurationType::snap_gc, StoreTick::CompactLockCf => RaftEventDurationType::compact_lock_cf, diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 3b2a2ec0404f..abdbaf5e938d 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -11,11 +11,13 @@ use fail::fail_point; use thiserror::Error; use tikv_util::{box_try, error, info, time::Instant, warn, worker::Runnable}; -use super::metrics::COMPACT_RANGE_CF; +use super::metrics::{COMPACT_RANGE_CF, FULL_COMPACT}; type Key = Vec; pub enum Task { + PeriodicFullCompact, + Compact { cf_name: String, start_key: Option, // None means smallest key @@ -58,6 +60,7 @@ impl CompactThreshold { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { + Task::PeriodicFullCompact => f.debug_struct("FullCompact").finish(), Task::Compact { ref cf_name, ref start_key, @@ -127,6 +130,31 @@ where Runner { engine } } + /// Periodic full compaction. + /// + /// NOTE this is a highly experimental feature! + /// + /// TODO: Do not start if there is heavy I/O. + /// TODO: Make it possible to rate limit, pause, or abort this by compacting + /// a range at a time. + pub fn full_compact(&mut self) -> Result<(), Error> { + fail_point!("on_full_compact"); + info!("full compaction started"); + let timer = Instant::now(); + let full_compact_timer = FULL_COMPACT.start_coarse_timer(); + box_try!(self.engine.compact_range( + None, None, // Compact the entire key range. + true, // no other compaction will run when this is running + 1, // number of threads threads + )); + full_compact_timer.observe_duration(); + info!( + "full compaction finished"; + "time_takes" => ?timer.saturating_elapsed(), + ); + Ok(()) + } + /// Sends a compact range command to RocksDB to compact the range of the cf. pub fn compact_range_cf( &mut self, @@ -163,6 +191,11 @@ where fn run(&mut self, task: Task) { match task { + Task::PeriodicFullCompact => { + if let Err(e) = self.full_compact() { + error!("periodic full compaction failed"; "err" => %e); + } + } Task::Compact { cf_name, start_key, @@ -456,4 +489,44 @@ mod tests { .unwrap(); assert_eq!(ranges_need_to_compact, expected_ranges); } + + #[test] + fn test_full_compact_deletes() { + let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); + let engine = open_db(tmp_dir.path().to_str().unwrap()); + let mut runner = Runner::new(engine.clone()); + + // mvcc_put 0..5 + for i in 0..5 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let (start, end) = (data_key(b"k0"), data_key(b"k5")); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries, stats.num_versions); + + for i in 0..5 { + let k = format!("k{}", i); + delete(&engine, k.as_bytes(), 3.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 5); + + runner.run(Task::PeriodicFullCompact); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 0); + } } diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 8dca3bcfd443..bdf244590112 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -160,6 +160,11 @@ lazy_static! { &["cf"] ) .unwrap(); + pub static ref FULL_COMPACT: Histogram = register_histogram!( + "tikv_storage_full_compact_duration_seconds", + "Bucketed histogram of full compaction for the storage." + ) + .unwrap(); pub static ref REGION_HASH_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_hash_duration_seconds", "Bucketed histogram of raftstore hash computation duration" diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index c3d240d3c4f7..39e143fc04ce 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -15,6 +15,10 @@ use std::{ time::Duration, }; +use chrono::{ + format::{self, Fixed, Item, Parsed}, + DateTime, FixedOffset, Local, NaiveTime, TimeZone, Timelike, +}; use online_config::ConfigValue; use serde::{ de::{self, Unexpected, Visitor}, @@ -522,6 +526,166 @@ impl<'de> Deserialize<'de> for ReadableDuration { } } +#[derive(Clone, Debug, Copy, PartialEq)] +pub struct ReadableOffsetTime(pub NaiveTime, pub FixedOffset); + +impl From for ConfigValue { + fn from(ot: ReadableOffsetTime) -> ConfigValue { + ConfigValue::OffsetTime((ot.0, ot.1)) + } +} + +impl From for ReadableOffsetTime { + fn from(c: ConfigValue) -> ReadableOffsetTime { + if let ConfigValue::OffsetTime(ot) = c { + ReadableOffsetTime(ot.0, ot.1) + } else { + panic!("expect: ConfigValue::OffsetTime, got: {:?}", c) + } + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)] +pub struct ReadableSchedule(pub Vec); + +impl From for ConfigValue { + fn from(otv: ReadableSchedule) -> ConfigValue { + ConfigValue::Schedule(otv.0.into_iter().map(|ot| (ot.0, ot.1)).collect::>()) + } +} + +impl From for ReadableSchedule { + fn from(c: ConfigValue) -> ReadableSchedule { + if let ConfigValue::Schedule(otv) = c { + ReadableSchedule( + otv.into_iter() + .map(|(o, t)| ReadableOffsetTime(o, t)) + .collect::>(), + ) + } else { + panic!("expect: ConfigValue::Schedule, got: {:?}", c) + } + } +} + +impl ReadableSchedule { + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn is_scheduled_this_hour(&self, datetime: &DateTime) -> bool { + self.0.iter().any(|time| time.hour_matches(datetime)) + } + + pub fn is_scheduled_this_hour_minute(&self, datetime: &DateTime) -> bool { + self.0 + .iter() + .any(|time| time.hour_minutes_matches(datetime)) + } +} + +impl FromStr for ReadableOffsetTime { + type Err = String; + + fn from_str(ot_str: &str) -> Result { + let (time, offset) = if let Some((time_str, offset_str)) = ot_str.split_once(' ') { + let time = NaiveTime::parse_from_str(time_str, "%H:%M").map_err(|e| e.to_string())?; + let offset = parse_offset(offset_str)?; + (time, offset) + } else { + let time = NaiveTime::parse_from_str(ot_str, "%H:%M").map_err(|e| e.to_string())?; + (time, local_offset()) + }; + Ok(ReadableOffsetTime(time, offset)) + } +} + +/// Returns the `FixedOffset` for the timezone this `tikv` server has been +/// configured to use. +fn local_offset() -> FixedOffset { + let &offset = Local::now().offset(); + offset +} + +/// Parses the offset specified by `str`. +/// Note: `FixedOffset` in latest `chrono` implements `FromStr`. Once we are +/// able to upgrade to it (`components/tidb_query_datatype` requires a large +/// refactoring that is outside the scope of this PR), we can remove this +/// method. +fn parse_offset(offset_str: &str) -> Result { + let mut parsed = Parsed::new(); + format::parse( + &mut parsed, + offset_str, + [Item::Fixed(Fixed::TimezoneOffsetZ)].iter(), + ) + .map_err(|e| e.to_string())?; + parsed.to_fixed_offset().map_err(|e| e.to_string()) +} + +impl fmt::Display for ReadableOffsetTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.0, self.1) + } +} + +impl ReadableOffsetTime { + /// Converts `datetime` from `Tz` to the same timezone as this instance and + /// returns `true` if the hour of the day is matches hour of this + /// instance. + pub fn hour_matches(&self, datetime: &DateTime) -> bool { + self.convert_to_this_offset(datetime).hour() == self.0.hour() + } + + /// Converts `datetime` from `Tz` to the same timezone as this instance and + /// returns `true` if hours and minutes match this instance. + pub fn hour_minutes_matches(&self, datetime: &DateTime) -> bool { + let time = self.convert_to_this_offset(datetime); + time.hour() == self.0.hour() && time.minute() == self.0.minute() + } + + fn convert_to_this_offset(&self, datetime: &DateTime) -> NaiveTime { + datetime.with_timezone(&self.1).time() + } +} + +impl Serialize for ReadableOffsetTime { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut buffer = String::new(); + write!(buffer, "{}", self).unwrap(); + serializer.serialize_str(&buffer) + } +} + +impl<'de> Deserialize<'de> for ReadableOffsetTime { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct OffTimeVisitor; + + impl<'de> Visitor<'de> for OffTimeVisitor { + type Value = ReadableOffsetTime; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("valid duration") + } + + fn visit_str(self, off_time_str: &str) -> Result + where + E: de::Error, + { + off_time_str.parse().map_err(E::custom) + } + } + + deserializer.deserialize_str(OffTimeVisitor) + } +} + pub fn normalize_path>(path: P) -> PathBuf { use std::path::Component; let mut components = path.as_ref().components().peekable(); @@ -1771,6 +1935,90 @@ mod tests { assert!(toml::from_str::("d = 23").is_err()); } + #[test] + fn test_readable_offset_time() { + let decode_cases = vec![ + ( + "23:00 +0000", + ReadableOffsetTime( + NaiveTime::from_hms_opt(23, 00, 00).unwrap(), + FixedOffset::east_opt(0).unwrap(), + ), + ), + ( + "03:00", + ReadableOffsetTime(NaiveTime::from_hms_opt(3, 00, 00).unwrap(), local_offset()), + ), + ( + "13:23 +09:30", + ReadableOffsetTime( + NaiveTime::from_hms_opt(13, 23, 00).unwrap(), + FixedOffset::east_opt(3600 * 9 + 1800).unwrap(), + ), + ), + ( + "09:30 -08:00", + ReadableOffsetTime( + NaiveTime::from_hms_opt(9, 30, 00).unwrap(), + FixedOffset::west_opt(3600 * 8).unwrap(), + ), + ), + ]; + for (encoded, expected) in decode_cases { + let actual = encoded.parse::().unwrap_or_else(|e| { + panic!( + "error parsing encoded={} expected={} error={}", + encoded, expected, e + ) + }); + assert_eq!(actual, expected); + } + let time = ReadableOffsetTime( + NaiveTime::from_hms_opt(9, 30, 00).unwrap(), + FixedOffset::west_opt(0).unwrap(), + ); + assert_eq!(format!("{}", time), "09:30:00 +00:00"); + let dt = DateTime::parse_from_rfc3339("2023-10-27T09:39:57-00:00").unwrap(); + assert!(time.hour_matches(&dt)); + assert!(!time.hour_minutes_matches(&dt)); + let dt = DateTime::parse_from_rfc3339("2023-10-27T09:30:57-00:00").unwrap(); + assert!(time.hour_minutes_matches(&dt)); + } + + #[test] + fn test_readable_schedule() { + let schedule = ReadableSchedule( + vec!["09:30 +00:00", "23:00 +00:00"] + .into_iter() + .flat_map(ReadableOffsetTime::from_str) + .collect::>(), + ); + + let time_a = DateTime::parse_from_rfc3339("2023-10-27T09:30:57-00:00").unwrap(); + let time_b = DateTime::parse_from_rfc3339("2023-10-28T09:00:57-00:00").unwrap(); + let time_c = DateTime::parse_from_rfc3339("2023-10-27T23:15:00-00:00").unwrap(); + let time_d = DateTime::parse_from_rfc3339("2023-10-27T23:00:00-00:00").unwrap(); + let time_e = DateTime::parse_from_rfc3339("2023-10-27T20:00:00-00:00").unwrap(); + + // positives for schedule by hour + assert!(schedule.is_scheduled_this_hour(&time_a)); + assert!(schedule.is_scheduled_this_hour(&time_b)); + assert!(schedule.is_scheduled_this_hour(&time_c)); + assert!(schedule.is_scheduled_this_hour(&time_d)); + + // negatives for schedule by hour + assert!(!schedule.is_scheduled_this_hour(&time_e)); + + // positives for schedule by hour and minute + assert!(schedule.is_scheduled_this_hour_minute(&time_a)); + assert!(schedule.is_scheduled_this_hour_minute(&time_d)); + + // negatives for schedule by hour and minute + assert!(!schedule.is_scheduled_this_hour_minute(&time_b)); + assert!(!schedule.is_scheduled_this_hour_minute(&time_c)); + assert!(!schedule.is_scheduled_this_hour_minute(&time_e)); + } + #[test] fn test_canonicalize_path() { let tmp = Builder::new() diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2f4f5ba76956..dc61cb1b5aa0 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -37,7 +37,7 @@ use tikv::{ BlockCacheConfig, Config as StorageConfig, EngineType, FlowControlConfig, IoRateLimitConfig, }, }; -use tikv_util::config::{LogFormat, ReadableDuration, ReadableSize}; +use tikv_util::config::{LogFormat, ReadableDuration, ReadableSchedule, ReadableSize}; mod dynamic; mod test_config_client; @@ -270,6 +270,8 @@ fn test_serde_custom_tikv_config() { slow_trend_unsensitive_result: 0.5, enable_v2_compatible_learner: false, unsafe_disable_check_quorum: false, + periodic_full_compact_start_times: ReadableSchedule::default(), + periodic_full_compact_start_max_cpu: 0.1, }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { From b2300932ccde2347104c674632d2e473d89d6fa3 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 1 Nov 2023 14:54:37 +0800 Subject: [PATCH 118/203] cdc: limit cdc event fetching speed to reduce RocksDB read load (#15849) close tikv/tikv#11390 None Signed-off-by: qupeng --- components/cdc/src/endpoint.rs | 50 ++++++++++++++++++++-- components/cdc/src/initializer.rs | 28 +++++++----- src/config/mod.rs | 5 +++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 5 files changed, 71 insertions(+), 14 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index a5f00a08028f..e62650c77c6d 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -384,6 +384,7 @@ pub struct Endpoint { workers: Runtime, scan_concurrency_semaphore: Arc, scan_speed_limiter: Limiter, + fetch_speed_limiter: Limiter, max_scan_batch_bytes: usize, max_scan_batch_size: usize, sink_memory_quota: Arc, @@ -439,11 +440,16 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint 0 { + let scan_speed_limiter = Limiter::new(if config.incremental_scan_speed_limit.0 > 0 { config.incremental_scan_speed_limit.0 as f64 } else { f64::INFINITY }); + let fetch_speed_limiter = Limiter::new(if config.incremental_fetch_speed_limit.0 > 0 { + config.incremental_fetch_speed_limit.0 as f64 + } else { + f64::INFINITY + }); CDC_SINK_CAP.set(sink_memory_quota.capacity() as i64); // For scan efficiency, the scan batch bytes should be around 1MB. @@ -469,7 +475,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint 0 { + self.config.incremental_fetch_speed_limit.0 as f64 + } else { + f64::INFINITY + }; + + self.fetch_speed_limiter.set_speed_limit(new_speed_limit); + } } pub fn set_max_scan_batch_size(&mut self, max_scan_batch_size: usize) { @@ -793,7 +809,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, - pub(crate) speed_limiter: Limiter, + pub(crate) scan_speed_limiter: Limiter, + pub(crate) fetch_speed_limiter: Limiter, + pub(crate) max_scan_batch_bytes: usize, pub(crate) max_scan_batch_size: usize, @@ -404,16 +406,14 @@ impl Initializer { perf_delta, } = self.do_scan(scanner, old_value_cursors, &mut entries)?; - CDC_SCAN_BYTES.inc_by(emit as _); TLS_CDC_PERF_STATS.with(|x| *x.borrow_mut() += perf_delta); tls_flush_perf_stats(); - let require = if let Some(bytes) = disk_read { + if let Some(bytes) = disk_read { CDC_SCAN_DISK_READ_BYTES.inc_by(bytes as _); - bytes - } else { - perf_delta.block_read_byte as usize - }; - self.speed_limiter.consume(require).await; + self.scan_speed_limiter.consume(bytes).await; + } + CDC_SCAN_BYTES.inc_by(emit as _); + self.fetch_speed_limiter.consume(emit as _).await; if let Some(resolver) = resolver { // Track the locks. @@ -624,7 +624,8 @@ mod tests { } fn mock_initializer( - speed_limit: usize, + scan_limit: usize, + fetch_limit: usize, buffer: usize, engine: Option, kv_api: ChangeDataRequestKvApi, @@ -665,7 +666,8 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), - speed_limiter: Limiter::new(speed_limit as _), + scan_speed_limiter: Limiter::new(scan_limit as _), + fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, max_scan_batch_size: 1024, build_resolver: true, @@ -717,6 +719,7 @@ mod tests { // Buffer must be large enough to unblock async incremental scan. let buffer = 1000; let (mut worker, pool, mut initializer, rx, mut drain) = mock_initializer( + total_bytes, total_bytes, buffer, engine.kv_engine(), @@ -832,6 +835,7 @@ mod tests { // Buffer must be large enough to unblock async incremental scan. let buffer = 1000; let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + total_bytes, total_bytes, buffer, engine.kv_engine(), @@ -914,6 +918,7 @@ mod tests { // Do incremental scan with different `hint_min_ts` values. for checkpoint_ts in [200, 100, 150] { let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + usize::MAX, usize::MAX, 1000, engine.kv_engine(), @@ -979,6 +984,7 @@ mod tests { let total_bytes = 1; let buffer = 1; let (mut worker, _pool, mut initializer, rx, _drain) = mock_initializer( + total_bytes, total_bytes, buffer, None, @@ -1034,7 +1040,7 @@ mod tests { let total_bytes = 1; let buffer = 1; let (mut worker, pool, mut initializer, _rx, _drain) = - mock_initializer(total_bytes, buffer, None, kv_api, false); + mock_initializer(total_bytes, total_bytes, buffer, None, kv_api, false); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); diff --git a/src/config/mod.rs b/src/config/mod.rs index d1fb1e4f8d8f..237ac3c7a725 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2940,7 +2940,11 @@ pub struct CdcConfig { #[online_config(skip)] pub incremental_scan_threads: usize, pub incremental_scan_concurrency: usize, + /// Limit scan speed based on disk I/O traffic. pub incremental_scan_speed_limit: ReadableSize, + /// Limit scan speed based on memory accesing traffic. + #[doc(hidden)] + pub incremental_fetch_speed_limit: ReadableSize, /// `TsFilter` can increase speed and decrease resource usage when /// incremental content is much less than total content. However in /// other cases, `TsFilter` can make performance worse because it needs @@ -2979,6 +2983,7 @@ impl Default for CdcConfig { // TiCDC requires a SSD, the typical write speed of SSD // is more than 500MB/s, so 128MB/s is enough. incremental_scan_speed_limit: ReadableSize::mb(128), + incremental_fetch_speed_limit: ReadableSize::mb(512), incremental_scan_ts_filter_ratio: 0.2, tso_worker_threads: 1, // 512MB memory for CDC sink. diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index dc61cb1b5aa0..1239aa53fb80 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -851,6 +851,7 @@ fn test_serde_custom_tikv_config() { incremental_scan_threads: 3, incremental_scan_concurrency: 4, incremental_scan_speed_limit: ReadableSize(7), + incremental_fetch_speed_limit: ReadableSize(8), incremental_scan_ts_filter_ratio: 0.7, tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 1bb52fad5fc5..ef7a4809168f 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -699,6 +699,7 @@ hibernate-regions-compatible = false incremental-scan-threads = 3 incremental-scan-concurrency = 4 incremental-scan-speed-limit = 7 +incremental-fetch-speed-limit = 8 incremental-scan-ts-filter-ratio = 0.7 tso-worker-threads = 2 old-value-cache-memory-quota = "14MB" From 78d835d91b07fb5c18e1158c21841fd43116bc02 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Wed, 1 Nov 2023 16:29:07 +0800 Subject: [PATCH 119/203] makefile: update cargo sort installation to remove dependency on rust-toolchain (#15854) close tikv/tikv#15818 update cargo sort installation to remove dependency on rust-toolchain Signed-off-by: Smityz Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce8d4e8b793d..103c502036e8 100644 --- a/Makefile +++ b/Makefile @@ -331,7 +331,7 @@ unset-override: pre-format: unset-override @rustup component add rustfmt - @which cargo-sort &> /dev/null || cargo install -q cargo-sort + @which cargo-sort &> /dev/null || cargo +nightly install -q cargo-sort format: pre-format @cargo fmt From 4c369d2cdc19acd336226e49cdd6c903f3a47ab5 Mon Sep 17 00:00:00 2001 From: qupeng Date: Thu, 2 Nov 2023 14:24:37 +0800 Subject: [PATCH 120/203] cdc: incremental scans acquire snapshots before semaphores to avoid useless queue (#15865) close tikv/tikv#15866 cdc: incremental scans acquire snapshots before semaphores to avoid useless queue Signed-off-by: qupeng --- components/cdc/src/delegate.rs | 7 ++- components/cdc/src/endpoint.rs | 7 +-- components/cdc/src/initializer.rs | 93 ++++++++++--------------------- 3 files changed, 37 insertions(+), 70 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index c82c4cb6f13e..780cfe8dea66 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -423,10 +423,15 @@ impl Delegate { downstream.state.store(DownstreamState::Stopped); let error_event = error.clone(); if let Err(err) = downstream.sink_error_event(region_id, error_event) { - warn!("cdc broadcast error failed"; + warn!("cdc send region error failed"; "region_id" => region_id, "error" => ?err, "origin_error" => ?error, "downstream_id" => ?downstream.id, "downstream" => ?downstream.peer, "request_id" => downstream.req_id, "conn_id" => ?downstream.conn_id); + } else { + info!("cdc send region error success"; + "region_id" => region_id, "origin_error" => ?error, + "downstream_id" => ?downstream.id, "downstream" => ?downstream.peer, + "request_id" => downstream.req_id, "conn_id" => ?downstream.conn_id); } Ok(()) }; diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e62650c77c6d..82233af8f145 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -809,6 +809,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint { CDC_SCAN_TASKS.with_label_values(&["finish"]).inc(); } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index bd8f5e4e6374..2882d2e975e0 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -35,7 +35,7 @@ use tikv_kv::Iterator; use tikv_util::{ box_err, codec::number, - debug, error, info, + debug, defer, error, info, memory::MemoryQuota, sys::inspector::{self_thread_inspector, ThreadInspector}, time::{Instant, Limiter}, @@ -90,6 +90,7 @@ pub(crate) struct Initializer { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, + pub(crate) scan_concurrency_semaphore: Arc, pub(crate) scan_speed_limiter: Limiter, pub(crate) fetch_speed_limiter: Limiter, @@ -109,30 +110,9 @@ impl Initializer { &mut self, change_observer: ChangeObserver, cdc_handle: T, - concurrency_semaphore: Arc, memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); - let _permit = concurrency_semaphore.acquire().await; - - // When downstream_state is Stopped, it means the corresponding delegate - // is stopped. The initialization can be safely canceled. - // - // Acquiring a permit may take some time, it is possible that - // initialization can be canceled. - if self.downstream_state.load() == DownstreamState::Stopped { - info!("cdc async incremental scan canceled"; - "region_id" => self.region_id, - "downstream_id" => ?self.downstream_id, - "observe_id" => ?self.observe_id, - "conn_id" => ?self.conn_id); - return Err(box_err!("scan canceled")); - } - - CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); - tikv_util::defer!({ - CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec(); - }); // To avoid holding too many snapshots and holding them too long, // we need to acquire scan concurrency permit before taking snapshot. @@ -187,8 +167,8 @@ impl Initializer { memory_quota: Arc, ) -> Result<()> { if let Some(region_snapshot) = resp.snapshot { - assert_eq!(self.region_id, region_snapshot.get_region().get_id()); let region = region_snapshot.get_region().clone(); + assert_eq!(self.region_id, region.get_id()); self.async_incremental_scan(region_snapshot, region, memory_quota) .await } else { @@ -208,10 +188,29 @@ impl Initializer { region: Region, memory_quota: Arc, ) -> Result<()> { - let downstream_id = self.downstream_id; + let scan_concurrency_semaphore = self.scan_concurrency_semaphore.clone(); + let _permit = scan_concurrency_semaphore.acquire().await; + CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); + defer!(CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec()); + let region_id = region.get_id(); + let downstream_id = self.downstream_id; let observe_id = self.observe_id; + let conn_id = self.conn_id; let kv_api = self.kv_api; + let on_cancel = || -> Result<()> { + info!("cdc async incremental scan canceled"; + "region_id" => region_id, + "downstream_id" => ?downstream_id, + "observe_id" => ?observe_id, + "conn_id" => ?conn_id); + Err(box_err!("scan canceled")) + }; + + if self.downstream_state.load() == DownstreamState::Stopped { + return on_cancel(); + } + self.observed_range.update_region_key_range(®ion); debug!("cdc async incremental scan"; "region_id" => region_id, @@ -260,7 +259,6 @@ impl Initializer { }; fail_point!("cdc_incremental_scan_start"); - let conn_id = self.conn_id; let mut done = false; let start = Instant::now_coarse(); @@ -270,15 +268,6 @@ impl Initializer { DownstreamState::Initializing | DownstreamState::Stopped )); - let on_cancel = || -> Result<()> { - info!("cdc async incremental scan canceled"; - "region_id" => region_id, - "downstream_id" => ?downstream_id, - "observe_id" => ?observe_id, - "conn_id" => ?conn_id); - Err(box_err!("scan canceled")) - }; - while !done { // When downstream_state is Stopped, it means the corresponding // delegate is stopped. The initialization can be safely canceled. @@ -666,6 +655,7 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), + scan_concurrency_semaphore: Arc::new(Semaphore::new(1)), scan_speed_limiter: Limiter::new(scan_limit as _), fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, @@ -1044,51 +1034,26 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); - let concurrency_semaphore = Arc::new(Semaphore::new(1)); let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.initialize( - change_cmd, - raft_router.clone(), - concurrency_semaphore.clone(), - memory_quota.clone(), - )) - .unwrap_err(); - - let (tx, rx) = sync_channel(1); - let concurrency_semaphore_ = concurrency_semaphore.clone(); - pool.spawn(async move { - let _permit = concurrency_semaphore_.acquire().await; - tx.send(()).unwrap(); - tx.send(()).unwrap(); - tx.send(()).unwrap(); - }); - rx.recv_timeout(Duration::from_millis(200)).unwrap(); + block_on(initializer.initialize(change_cmd, raft_router.clone(), memory_quota.clone())) + .unwrap_err(); let (tx1, rx1) = sync_channel(1); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); pool.spawn(async move { // Migrated to 2021 migration. This let statement is probably not needed, see // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html - let _ = ( - &initializer, - &change_cmd, - &raft_router, - &concurrency_semaphore, - ); let res = initializer - .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) + .initialize(change_cmd, raft_router, memory_quota) .await; tx1.send(res).unwrap(); }); - // Must timeout because there is no enough permit. - rx1.recv_timeout(Duration::from_millis(200)).unwrap_err(); - // Release the permit - rx.recv_timeout(Duration::from_millis(200)).unwrap(); + // Shouldn't timeout, gets an error instead. let res = rx1.recv_timeout(Duration::from_millis(200)).unwrap(); - res.unwrap_err(); + assert!(res.is_err()); worker.stop(); } From f0ce447adf4e9d1bde7c7b1f9560a34d9fe77705 Mon Sep 17 00:00:00 2001 From: Xiaoya Wei Date: Thu, 2 Nov 2023 14:39:37 +0800 Subject: [PATCH 121/203] fuzz: Bump afl version (#15848) close tikv/tikv#15847 bump afl version Bump afl to a new version that can successfully builds on MacOS. close tikv/tikv#15847 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 93 ++++++++++++++++++++++++++++++++++---- fuzz/fuzzer-afl/Cargo.toml | 2 +- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09459fd9123a..f56c8ff13958 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,13 +31,13 @@ checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" [[package]] name = "afl" -version = "0.6.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59206260f98d163b3ca42fb29fe551dbcda1d43cf70a244066b2a0666a8fb2a9" +checksum = "330d7251127b228cb4187ac2373dc37f615d65199f93b5443edeeed839fff5df" dependencies = [ - "cc", - "clap 2.33.0", - "rustc_version 0.2.3", + "home", + "libc 0.2.146", + "rustc_version 0.4.0", "xdg", ] @@ -2515,6 +2515,15 @@ dependencies = [ "digest 0.10.6", ] +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "honggfuzz" version = "0.5.47" @@ -7555,21 +7564,51 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -7582,6 +7621,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -7594,6 +7639,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -7606,6 +7657,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -7618,12 +7675,24 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -7636,6 +7705,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winreg" version = "0.7.0" @@ -7672,9 +7747,9 @@ dependencies = [ [[package]] name = "xdg" -version = "2.2.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" [[package]] name = "xml-rs" diff --git a/fuzz/fuzzer-afl/Cargo.toml b/fuzz/fuzzer-afl/Cargo.toml index 6c97305a2536..5e9894fba3e1 100644 --- a/fuzz/fuzzer-afl/Cargo.toml +++ b/fuzz/fuzzer-afl/Cargo.toml @@ -8,4 +8,4 @@ fuzz-targets = { path = "../targets" } # AFL only works for x86 targets [target.'cfg(all(not(target_os = "windows"), target_arch = "x86_64"))'.dependencies] -afl = "0.6" +afl = "0.14" From 9f46cdc90756f63e76320c2f51c7ed4a2ab5e7d2 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Fri, 3 Nov 2023 11:42:39 +0800 Subject: [PATCH 122/203] test: fix the flaky test for `test_gen_split_check_bucket_ranges ` (#15876) close tikv/tikv#15862 Signed-off-by: bufferflies <1045931706@qq.com> Signed-off-by: tongjian <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 5 ++ components/test_raftstore/src/cluster.rs | 2 +- .../raftstore/test_split_region.rs | 48 +++++++++---------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 14ad09dbde80..5a7223dcaa3b 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6206,6 +6206,11 @@ where cb(peer_stat); } } + + // only check the suspect buckets, not split region. + if source == "bucket" { + return; + } let task = SplitCheckTask::split_check_key_range( region.clone(), start_key, diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 2a4082893e77..a08f858c0316 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1938,7 +1938,7 @@ impl Cluster { start_key: None, end_key: None, policy: CheckPolicy::Scan, - source: "test", + source: "bucket", cb, }, ) diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 35ee18388659..5439e5c8ba25 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1162,9 +1162,9 @@ fn test_refresh_region_bucket_keys() { #[test] fn test_gen_split_check_bucket_ranges() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); + let mut cluster = new_server_cluster(0, 1); + let region_bucket_size = ReadableSize::kb(1); + cluster.cfg.coprocessor.region_bucket_size = region_bucket_size; cluster.cfg.coprocessor.enable_region_bucket = Some(true); // disable report buckets; as it will reset the user traffic stats to randomize // the test result @@ -1174,14 +1174,15 @@ fn test_gen_split_check_bucket_ranges() { cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); - cluster.must_put(b"k11", b"v1"); - let region = pd_client.get_region(b"k11").unwrap(); + let mut range = 1..; + let mid_key = put_till_size(&mut cluster, region_bucket_size.0, &mut range); + let second_key = put_till_size(&mut cluster, region_bucket_size.0, &mut range); + let region = pd_client.get_region(&second_key).unwrap(); let bucket = Bucket { - keys: vec![b"k11".to_vec()], - size: 1024 * 1024 * 200, + keys: vec![mid_key.clone()], + size: region_bucket_size.0 * 2, }; - let mut expected_buckets = metapb::Buckets::default(); expected_buckets.set_keys(bucket.clone().keys.into()); expected_buckets @@ -1197,32 +1198,28 @@ fn test_gen_split_check_bucket_ranges() { Option::None, Some(expected_buckets.clone()), ); - cluster.must_put(b"k10", b"v1"); - cluster.must_put(b"k12", b"v1"); - let expected_bucket_ranges = vec![ - BucketRange(vec![], b"k11".to_vec()), - BucketRange(b"k11".to_vec(), vec![]), - ]; + // put some data into the right buckets, so the bucket range will be check by + // split check. + let latest_key = put_till_size(&mut cluster, region_bucket_size.0 + 100, &mut range); + let expected_bucket_ranges = vec![BucketRange(mid_key.clone(), vec![])]; cluster.send_half_split_region_message(®ion, Some(expected_bucket_ranges)); - // set fsm.peer.last_bucket_regions + // reset bucket stats. cluster.refresh_region_bucket_keys( ®ion, buckets, Option::None, Some(expected_buckets.clone()), ); - // because the diff between last_bucket_regions and bucket_regions is zero, - // bucket range for split check should be empty. - let expected_bucket_ranges = vec![]; - cluster.send_half_split_region_message(®ion, Some(expected_bucket_ranges)); - // split the region - pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![b"k11".to_vec()]); + thread::sleep(Duration::from_millis(100)); + cluster.send_half_split_region_message(®ion, Some(vec![])); - let left = pd_client.get_region(b"k10").unwrap(); - let right = pd_client.get_region(b"k12").unwrap(); + // split the region + pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![second_key]); + let left = pd_client.get_region(&mid_key).unwrap(); + let right = pd_client.get_region(&latest_key).unwrap(); if right.get_id() == 1 { // the bucket_ranges should be None to refresh the bucket cluster.send_half_split_region_message(&right, None); @@ -1230,11 +1227,10 @@ fn test_gen_split_check_bucket_ranges() { // the bucket_ranges should be None to refresh the bucket cluster.send_half_split_region_message(&left, None); } - + thread::sleep(Duration::from_millis(300)); // merge the region pd_client.must_merge(left.get_id(), right.get_id()); - let region = pd_client.get_region(b"k10").unwrap(); - // the bucket_ranges should be None to refresh the bucket + let region = pd_client.get_region(&mid_key).unwrap(); cluster.send_half_split_region_message(®ion, None); } From e0fe14d57136f645457bcf14ee5ae1a478be04b8 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 3 Nov 2023 16:14:09 +0800 Subject: [PATCH 123/203] titan: update titan to avoid manifest io mutex (#15914) close tikv/tikv#15351 titan: update titan to avoid manifest io mutex Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f56c8ff13958..fba26935d1f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2950,7 +2950,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2969,7 +2969,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "bzip2-sys", "cc", @@ -4890,7 +4890,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "libc 0.2.146", "librocksdb_sys", From 07141aad5a591306cacbe18aafc2c755d35a70bc Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Mon, 6 Nov 2023 10:27:39 +0800 Subject: [PATCH 124/203] server: make grpc metrics consistent (#15906) ref tikv/tikv#15803 the grpc duration should not include the stream sink. Signed-off-by: bufferflies <1045931706@qq.com> --- src/server/service/kv.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 77f92d33d955..8426143d502f 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -491,8 +491,8 @@ impl Tikv for Service { let future = future_copr(&self.copr, Some(ctx.peer()), req); let task = async move { let resp = future.await?.consume(); - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .coprocessor .observe(elapsed.as_secs_f64()); @@ -529,8 +529,8 @@ impl Tikv for Service { let future = future_raw_coprocessor(&self.copr_v2, &self.storage, req); let task = async move { let resp = future.await?; - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .raw_coprocessor .observe(elapsed.as_secs_f64()); @@ -580,8 +580,8 @@ impl Tikv for Service { if let Err(e) = res { resp.set_error(format!("{}", e)); } - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .unsafe_destroy_range .observe(elapsed.as_secs_f64()); @@ -863,10 +863,10 @@ impl Tikv for Service { } } } - sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .split_region .observe(begin_instant.saturating_elapsed().as_secs_f64()); + sink.success(resp).await?; ServerResult::Ok(()) } .map_err(|e| { @@ -1015,6 +1015,9 @@ impl Tikv for Service { .schedule(CheckLeaderTask::CheckLeader { leaders, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; let regions = resp.await?; + GRPC_MSG_HISTOGRAM_STATIC + .check_leader + .observe(begin_instant.saturating_elapsed().as_secs_f64()); let mut resp = CheckLeaderResponse::default(); resp.set_ts(ts); resp.set_regions(regions); @@ -1026,10 +1029,6 @@ impl Tikv for Service { } return Err(Error::from(e)); } - let elapsed = begin_instant.saturating_elapsed(); - GRPC_MSG_HISTOGRAM_STATIC - .check_leader - .observe(elapsed.as_secs_f64()); ServerResult::Ok(()) } .map_err(move |e| { From 1a726454bcaa3f9087e02fe92bcf3ad7f779f07f Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 6 Nov 2023 10:47:10 +0800 Subject: [PATCH 125/203] sst_importer: join can fallback to version 1 filename (#15913) close tikv/tikv#15912 Signed-off-by: lance6716 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/sst_importer/src/import_file.rs | 121 ++++++++++++-------- components/sst_importer/src/sst_importer.rs | 46 +++----- 2 files changed, 89 insertions(+), 78 deletions(-) diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b3b7c051ce44..850df867da86 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -13,8 +13,7 @@ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ - iter_option, EncryptionKeyManager, IterOptions, Iterator, KvEngine, RefIterable, SstExt, - SstMetaInfo, SstReader, + iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, }; use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; use keys::data_key; @@ -261,17 +260,36 @@ impl ImportDir { }) } - pub fn join(&self, meta: &SstMeta) -> Result { + pub fn join_for_write(&self, meta: &SstMeta) -> Result { let file_name = sst_meta_to_path(meta)?; self.get_import_path(file_name.to_str().unwrap()) } + /// Different with join_for_write, join_for_read will also handle the api + /// version 1 filenames which can be generated by old version TiKV. + pub fn join_for_read(&self, meta: &SstMeta) -> Result { + let file_name = sst_meta_to_path(meta)?; + let files_result = self.get_import_path(file_name.to_str().unwrap()); + // if files does not exists, it means the SstMeta is generated by old version + // TiKV, we try sst_meta_to_path_v1 + match files_result { + Ok(path) => { + if path.save.exists() { + return Ok(path); + } + let file_name = sst_meta_to_path_v1(meta)?; + self.get_import_path(file_name.to_str().unwrap()) + } + Err(e) => Err(e), + } + } + pub fn create( &self, meta: &SstMeta, key_manager: Option>, ) -> Result { - let path = self.join(meta)?; + let path = self.join_for_write(meta)?; if path.save.exists() { return Err(Error::FileExists(path.save, "create SST upload cache")); } @@ -290,7 +308,7 @@ impl ImportDir { } pub fn delete(&self, meta: &SstMeta, manager: Option<&DataKeyManager>) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; self.delete_file(&path.save, manager)?; self.delete_file(&path.temp, manager)?; self.delete_file(&path.clone, manager)?; @@ -298,7 +316,7 @@ impl ImportDir { } pub fn exist(&self, meta: &SstMeta) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; Ok(path.save.exists()) } @@ -307,7 +325,7 @@ impl ImportDir { meta: &SstMeta, key_manager: Option>, ) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager, get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -334,7 +352,7 @@ impl ImportDir { // otherwise we are upgrade/downgrade between V1 and V2 // this can be done if all keys are written by TiDB _ => { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager.clone(), get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -382,7 +400,7 @@ impl ImportDir { let mut paths = HashMap::new(); let mut ingest_bytes = 0; for info in metas { - let path = self.join(&info.meta)?; + let path = self.join_for_read(&info.meta)?; let cf = info.meta.get_cf_name(); super::prepare_sst_for_ingestion(&path.save, &path.clone, key_manager.as_deref())?; ingest_bytes += info.total_bytes; @@ -407,7 +425,7 @@ impl ImportDir { key_manager: Option>, ) -> Result<()> { for meta in metas { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager.clone(), get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -416,31 +434,6 @@ impl ImportDir { Ok(()) } - pub fn load_start_key_by_meta( - &self, - meta: &SstMeta, - km: Option>, - ) -> Result>> { - let path = self.join(meta)?; - let r = match km { - Some(km) => E::SstReader::open_encrypted(&path.save.to_string_lossy(), km)?, - None => E::SstReader::open(&path.save.to_string_lossy())?, - }; - let opts = IterOptions::new(None, None, false); - let mut i = r.iter(opts)?; - if !i.seek_to_first()? || !i.valid()? { - return Ok(None); - } - // Should we warn if the key doesn't start with the prefix key? (Is that - // possible?) - // Also note this brings implicit coupling between this and - // RocksEngine. Perhaps it is better to make the engine to provide - // decode functions. Anyway we have directly used the RocksSstReader - // somewhere... This won't make things worse. - let real_key = i.key().strip_prefix(keys::DATA_PREFIX_KEY); - Ok(real_key.map(ToOwned::to_owned)) - } - pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { @@ -483,6 +476,18 @@ pub fn sst_meta_to_path(meta: &SstMeta) -> Result { ))) } +pub fn sst_meta_to_path_v1(meta: &SstMeta) -> Result { + Ok(PathBuf::from(format!( + "{}_{}_{}_{}_{}{}", + UuidBuilder::from_slice(meta.get_uuid())?.build(), + meta.get_region_id(), + meta.get_region_epoch().get_conf_ver(), + meta.get_region_epoch().get_version(), + meta.get_cf_name(), + SST_SUFFIX, + ))) +} + pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { @@ -521,6 +526,8 @@ pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { #[cfg(test)] mod test { + use std::fs; + use engine_traits::CF_DEFAULT; use super::*; @@ -565,6 +572,35 @@ mod test { assert_eq!(1, meta_with_ver.1); } + #[test] + fn test_join_for_rw() { + use tempfile::TempDir; + use uuid::Uuid; + + let tmp = TempDir::new().unwrap(); + let dir = ImportDir::new(tmp.path()).unwrap(); + let mut meta = SstMeta::default(); + meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); + let filename_v1 = sst_meta_to_path_v1(&meta).unwrap(); + let path_v1 = tmp.path().join(filename_v1); + + let got = dir + .join_for_read(&meta) + .expect("fallback to version 1 because version 2 file does not exist"); + assert_eq!(got.save, path_v1); + + let filename_v2 = sst_meta_to_path(&meta).unwrap(); + let path_v2 = tmp.path().join(filename_v2); + fs::File::create(&path_v2).expect("create empty file"); + let got = dir.join_for_read(&meta).expect("read should succeed"); + assert_eq!(got.save, path_v2); + fs::remove_file(path_v2).expect("delete file"); + + fs::File::create(&path_v1).expect("create empty file"); + let got = dir.join_for_read(&meta).expect("read should succeed"); + assert_eq!(got.save, path_v1); + } + #[cfg(feature = "test-engines-rocksdb")] fn test_path_with_range_and_km(km: Option) { use engine_rocks::{RocksEngine, RocksSstWriterBuilder}; @@ -613,21 +649,6 @@ mod test { .unwrap(); w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); - let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta_with_ver| { - let meta = &mut meta_with_ver.0; - let start = dir - .load_start_key_by_meta::(meta, arcmgr.clone()) - .unwrap() - .unwrap(); - meta.mut_range().set_start(start) - }); - assert_eq!( - ssts.iter() - .map(|meta_with_ver| { meta_with_ver.0.clone() }) - .collect(), - vec![meta] - ); } #[test] diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 358bc0545ded..7e1de9cf44e7 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -289,7 +289,7 @@ impl SstImporter { } pub fn get_path(&self, meta: &SstMeta) -> PathBuf { - let path = self.dir.join(meta).unwrap(); + let path = self.dir.join_for_read(meta).unwrap(); path.save } @@ -1114,7 +1114,7 @@ impl SstImporter { engine: E, ext: DownloadExt<'_>, ) -> Result> { - let path = self.dir.join(meta)?; + let path = self.dir.join_for_write(meta)?; let file_crypter = crypter.map(|c| FileEncryptionInfo { method: to_engine_encryption_method(c.cipher_type), @@ -1389,20 +1389,10 @@ impl SstImporter { self.dir.list_ssts() } - /// Load the start key by a metadata. - /// This will open the internal SST and try to load the first user key. - /// (For RocksEngine, that is the key without the 'z' prefix.) - /// When the SST is empty or the first key cannot be parsed as user key, - /// return None. - pub fn load_start_key_by_meta(&self, meta: &SstMeta) -> Result>> { - self.dir - .load_start_key_by_meta::(meta, self.key_manager.clone()) - } - pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); - let default_path = self.dir.join(&default_meta)?; + let default_path = self.dir.join_for_write(&default_meta)?; let default = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_DEFAULT) @@ -1412,7 +1402,7 @@ impl SstImporter { let mut write_meta = meta; write_meta.set_cf_name(CF_WRITE.to_owned()); - let write_path = self.dir.join(&write_meta)?; + let write_path = self.dir.join_for_write(&write_meta)?; let write = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_WRITE) @@ -1438,7 +1428,7 @@ impl SstImporter { mut meta: SstMeta, ) -> Result> { meta.set_cf_name(CF_DEFAULT.to_owned()); - let default_path = self.dir.join(&meta)?; + let default_path = self.dir.join_for_write(&meta)?; let default = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_DEFAULT) @@ -1519,7 +1509,7 @@ mod tests { let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); - let path = dir.join(&meta).unwrap(); + let path = dir.join_for_write(&meta).unwrap(); // Test ImportDir::create() { @@ -2333,7 +2323,7 @@ mod tests { assert_eq!(range.get_end(), b"t123_r13"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2393,7 +2383,7 @@ mod tests { assert_eq!(range.get_end(), b"t123_r13"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2443,7 +2433,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2488,7 +2478,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2532,7 +2522,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2674,7 +2664,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size is changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2718,7 +2708,7 @@ mod tests { assert_eq!(range.get_end(), b"t5_r07"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2851,7 +2841,7 @@ mod tests { assert_eq!(range.get_end(), b"d"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2910,7 +2900,7 @@ mod tests { assert_eq!(range.get_end(), b"c\x00"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); @@ -2965,7 +2955,7 @@ mod tests { assert_eq!(range.get_end(), b"c"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); @@ -3011,7 +3001,7 @@ mod tests { .unwrap(); // verifies the SST is compressed using Snappy. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); @@ -3058,7 +3048,7 @@ mod tests { // verifies SST compression algorithm... for meta in metas { - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); From 6a2c9733a8873089561d2b05545fbbb85fb96f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:41:11 +0800 Subject: [PATCH 126/203] log-backup: use row-level memory usage statistic for initial scan (#15872) close tikv/tikv#15714 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 8 +- components/backup-stream/src/event_loader.rs | 129 ++++++----- components/backup-stream/src/lib.rs | 2 +- components/backup-stream/src/router.rs | 9 + .../backup-stream/tests/failpoints/mod.rs | 77 +++++-- .../backup-stream/tests/integration/mod.rs | 76 +++---- components/backup-stream/tests/suite.rs | 212 +++++++++++------- components/tikv_util/src/memory.rs | 52 ++++- 8 files changed, 362 insertions(+), 203 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 834a40f8bdd5..6c19edc9f93e 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -29,6 +29,7 @@ use tikv_util::{ box_err, config::ReadableDuration, debug, defer, info, + memory::MemoryQuota, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, warn, @@ -51,7 +52,7 @@ use crate::{ GetCheckpointResult, RegionIdWithVersion, Subscription, }, errors::{Error, Result}, - event_loader::{InitialDataLoader, PendingMemoryQuota}, + event_loader::InitialDataLoader, future, metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, @@ -139,8 +140,9 @@ where pool.spawn(Self::starts_flush_ticks(range_router.clone())); - let initial_scan_memory_quota = - PendingMemoryQuota::new(config.initial_scan_pending_memory_quota.0 as _); + let initial_scan_memory_quota = Arc::new(MemoryQuota::new( + config.initial_scan_pending_memory_quota.0 as _, + )); let limit = if config.initial_scan_rate_limit.0 > 0 { config.initial_scan_rate_limit.0 as f64 } else { diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index bfb88d5cd5f2..0a957ea87ed7 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -12,15 +12,16 @@ use raftstore::{ use tikv::storage::{ kv::StatisticsSummary, mvcc::{DeltaScanner, ScannerBuilder}, - txn::{EntryBatch, TxnEntry, TxnEntryScanner}, + txn::{TxnEntry, TxnEntryScanner}, Snapshot, Statistics, }; use tikv_util::{ box_err, + memory::{MemoryQuota, OwnedAllocated}, time::{Instant, Limiter}, worker::Scheduler, }; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; +use tokio::sync::Semaphore; use txn_types::{Key, Lock, TimeStamp}; use crate::{ @@ -34,41 +35,17 @@ use crate::{ const MAX_GET_SNAPSHOT_RETRY: usize = 5; -#[derive(Clone)] -pub struct PendingMemoryQuota(Arc); - -impl std::fmt::Debug for PendingMemoryQuota { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PendingMemoryQuota") - .field("remain", &self.0.available_permits()) - .field("total", &self.0) - .finish() - } -} - -pub struct PendingMemory(OwnedSemaphorePermit); - -impl PendingMemoryQuota { - pub fn new(quota: usize) -> Self { - Self(Arc::new(Semaphore::new(quota))) - } - - pub async fn pending(&self, size: usize) -> PendingMemory { - PendingMemory( - self.0 - .clone() - .acquire_many_owned(size as _) - .await - .expect("BUG: the semaphore is closed unexpectedly."), - ) - } +struct ScanResult { + more: bool, + out_of_memory: bool, + statistics: Statistics, } /// EventLoader transforms data from the snapshot into ApplyEvent. pub struct EventLoader { scanner: DeltaScanner, // pooling the memory. - entry_batch: EntryBatch, + entry_batch: Vec, } const ENTRY_BATCH_SIZE: usize = 1024; @@ -97,20 +74,48 @@ impl EventLoader { Ok(Self { scanner, - entry_batch: EntryBatch::with_capacity(ENTRY_BATCH_SIZE), + entry_batch: Vec::with_capacity(ENTRY_BATCH_SIZE), }) } + fn scan_result(&mut self, more: bool) -> ScanResult { + ScanResult { + more, + out_of_memory: false, + statistics: self.scanner.take_statistics(), + } + } + + fn out_of_memory(&mut self) -> ScanResult { + ScanResult { + more: true, + out_of_memory: true, + statistics: self.scanner.take_statistics(), + } + } + /// Scan a batch of events from the snapshot, and save them into the /// internal buffer. - fn fill_entries(&mut self) -> Result { + fn fill_entries(&mut self, memory_quota: &mut OwnedAllocated) -> Result { assert!( self.entry_batch.is_empty(), - "EventLoader: the entry batch isn't empty when filling entries, which is error-prone, please call `omit_entries` first. (len = {})", + "EventLoader: the entry batch isn't empty when filling entries, which is error-prone, please call `emit_entries_to` first. (len = {})", self.entry_batch.len() ); - self.scanner.scan_entries(&mut self.entry_batch)?; - Ok(self.scanner.take_statistics()) + let batch = &mut self.entry_batch; + while batch.len() < batch.capacity() { + match self.scanner.next_entry()? { + Some(entry) => { + let size = entry.size(); + batch.push(entry); + if memory_quota.alloc(size).is_err() { + return Ok(self.out_of_memory()); + } + } + None => return Ok(self.scan_result(false)), + } + } + Ok(self.scan_result(true)) } /// Drain the internal buffer, converting them to the [`ApplyEvents`], @@ -120,7 +125,7 @@ impl EventLoader { result: &mut ApplyEvents, resolver: &mut TwoPhaseResolver, ) -> Result<()> { - for entry in self.entry_batch.drain() { + for entry in self.entry_batch.drain(..) { match entry { TxnEntry::Prewrite { default: (key, value), @@ -180,7 +185,7 @@ pub struct InitialDataLoader { pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, - pub(crate) quota: PendingMemoryQuota, + pub(crate) quota: Arc, pub(crate) limit: Limiter, // If there are too many concurrent initial scanning, the limit of disk speed or pending memory // quota will probably be triggered. Then the whole scanning will be pretty slow. And when @@ -202,7 +207,7 @@ where sink: Router, tracing: SubscriptionTracer, sched: Scheduler, - quota: PendingMemoryQuota, + quota: Arc, limiter: Limiter, cdc_handle: H, concurrency_limit: Arc, @@ -384,40 +389,44 @@ where let mut events = ApplyEvents::with_capacity(1024, region.id); // Note: the call of `fill_entries` is the only step which would read the disk. // we only need to record the disk throughput of this. - let (stat, disk_read) = - utils::with_record_read_throughput(|| event_loader.fill_entries()); - // We must use the size of entry batch here to check whether we have progress. - // Or we may exit too early if there are only records: - // - can be inlined to `write` CF (hence it won't be written to default CF) - // - are prewritten. (hence it will only contains `Prewrite` records). - // In this condition, ALL records generate no ApplyEvent(only lock change), - // and we would exit after the first run of loop :( - let no_progress = event_loader.entry_batch.is_empty(); - let stat = stat?; + let mut allocated = OwnedAllocated::new(Arc::clone(&self.quota)); + let (res, disk_read) = + utils::with_record_read_throughput(|| event_loader.fill_entries(&mut allocated)); + let res = res?; self.with_resolver(region, handle, |r| { event_loader.emit_entries_to(&mut events, r) })?; - if no_progress { - metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); - return Ok(stats.stat); - } - stats.add_statistics(&stat); + stats.add_statistics(&res.statistics); let region_id = region.get_id(); let sink = self.sink.clone(); let event_size = events.size(); let sched = self.scheduler.clone(); - let permit = self.quota.pending(event_size).await; self.limit.consume(disk_read as _).await; debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); metrics::INCREMENTAL_SCAN_DISK_READ.inc_by(disk_read as f64); metrics::HEAP_MEMORY.add(event_size as _); + fail::fail_point!("scan_and_async_send::about_to_consume"); join_handles.push(tokio::spawn(async move { utils::handle_on_event_result(&sched, sink.on_events(events).await); metrics::HEAP_MEMORY.sub(event_size as _); + drop(allocated); debug!("apply event done"; "size" => %event_size, "region" => %region_id); - drop(permit); })); + if !res.more { + metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); + return Ok(stats.stat); + } + if res.out_of_memory { + futures::future::try_join_all(join_handles.drain(..)) + .await + .map_err(|err| { + annotate!( + err, + "failed to join tokio runtime during out-of-memory-quota" + ) + })?; + } } } @@ -465,10 +474,13 @@ where #[cfg(test)] mod tests { + use std::sync::Arc; + use futures::executor::block_on; use kvproto::metapb::*; use tikv::storage::{txn::tests::*, TestEngineBuilder}; use tikv_kv::SnapContext; + use tikv_util::memory::{MemoryQuota, OwnedAllocated}; use txn_types::TimeStamp; use super::EventLoader; @@ -498,10 +510,13 @@ mod tests { let snap = block_on(async { tikv_kv::snapshot(&mut engine, SnapContext::default()).await }) .unwrap(); + let quota_inf = Arc::new(MemoryQuota::new(usize::MAX)); let mut loader = EventLoader::load_from(snap, TimeStamp::zero(), TimeStamp::max(), &r).unwrap(); - let (r, data_load) = with_record_read_throughput(|| loader.fill_entries()); + let (r, data_load) = with_record_read_throughput(|| { + loader.fill_entries(&mut OwnedAllocated::new(quota_inf)) + }); r.unwrap(); let mut events = ApplyEvents::with_capacity(1024, 42); let mut res = TwoPhaseResolver::new(42, None); diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index 3d4690d7f484..0402e5d2ee38 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -10,7 +10,7 @@ mod endpoint; pub mod errors; mod event_loader; pub mod metadata; -pub(crate) mod metrics; +pub mod metrics; pub mod observer; pub mod router; mod service; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 05f1a0533921..849a503e21b0 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -539,6 +539,15 @@ impl RouterInner { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; let file_size_limit = self.temp_file_size_limit.load(Ordering::SeqCst); + #[cfg(features = "failpoints")] + { + let delayed = (|| { + fail::fail_point!("router_on_event_delay_ms", |v| { + v.and_then(|v| v.parse::().ok()).unwrap_or(0) + }) + })(); + tokio::time::sleep(Duration::from_millis(delayed)).await; + } // When this event make the size of temporary files exceeds the size limit, make // a flush. Note that we only flush if the size is less than the limit before diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index ff9b9f82ba1a..8dfc21529e47 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -9,7 +9,13 @@ pub use suite::*; mod all { - use std::time::Duration; + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::Duration, + }; use backup_stream::{ metadata::{ @@ -19,7 +25,7 @@ mod all { GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; use futures::executor::block_on; - use tikv_util::defer; + use tikv_util::{config::ReadableSize, defer}; use super::{ make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, @@ -30,7 +36,7 @@ mod all { let mut suite = SuiteBuilder::new_named("basic").build(); fail::cfg("try_start_observe", "1*return").unwrap(); - run_async_test(async { + let (round1, round2) = run_async_test(async { // write data before the task starting, for testing incremental scanning. let round1 = suite.write_records(0, 128, 1).await; suite.must_register_task(1, "test_basic"); @@ -38,13 +44,13 @@ mod all { let round2 = suite.write_records(256, 128, 1).await; suite.force_flush_files("test_basic"); suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; + (round1, round2) }); + suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ); + suite.cluster.shutdown(); } #[test] @@ -97,10 +103,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("region_failure"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); } #[test] fn initial_scan_failure() { @@ -121,10 +127,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("initial_scan_failure"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); } #[test] fn failed_during_refresh_region() { @@ -147,10 +153,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("fail_to_refresh_region"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); let leader = suite.cluster.leader_of_region(1).unwrap().store_id; let (tx, rx) = std::sync::mpsc::channel(); suite.endpoints[&leader] @@ -212,12 +218,7 @@ mod all { let items = run_async_test(suite.write_records(0, 128, 1)); suite.force_flush_files("retry_abort"); suite.wait_for_flush(); - run_async_test( - suite.check_for_write_records( - suite.flushed_files.path(), - items.iter().map(Vec::as_slice), - ), - ); + suite.check_for_write_records(suite.flushed_files.path(), items.iter().map(Vec::as_slice)); } #[test] fn failure_and_split() { @@ -240,12 +241,42 @@ mod all { let round2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("failure_and_split"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(Vec::as_slice), - )); + ); let cp = suite.global_checkpoint(); assert!(cp > 512, "it is {}", cp); suite.cluster.shutdown(); } + + #[test] + fn memory_quota() { + let mut suite = SuiteBuilder::new_named("memory_quota") + .cfg(|cfg| cfg.initial_scan_pending_memory_quota = ReadableSize::kb(2)) + .build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + let failed = Arc::new(AtomicBool::new(false)); + fail::cfg("router_on_event_delay_ms", "6*return(1000)").unwrap(); + fail::cfg_callback("scan_and_async_send::about_to_consume", { + let failed = failed.clone(); + move || { + let v = backup_stream::metrics::HEAP_MEMORY.get(); + // Not greater than max key length * concurrent initial scan number. + if v > 4096 * 6 { + println!("[[ FAILED ]] The memory usage is {v} which exceeds the quota"); + failed.store(true, Ordering::SeqCst); + } + } + }) + .unwrap(); + suite.must_register_task(1, "memory_quota"); + suite.force_flush_files("memory_quota"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys.iter().map(|v| v.as_slice()), + ); + assert!(!failed.load(Ordering::SeqCst)); + } } diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index 79a756f684dd..395159060c14 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -28,20 +28,19 @@ mod all { #[test] fn with_split() { let mut suite = SuiteBuilder::new_named("with_split").build(); - run_async_test(async { + let (round1, round2) = run_async_test(async { let round1 = suite.write_records(0, 128, 1).await; suite.must_split(&make_split_key_at_record(1, 42)); suite.must_register_task(1, "test_with_split"); let round2 = suite.write_records(256, 128, 1).await; - suite.force_flush_files("test_with_split"); - suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; + (round1, round2) }); + suite.force_flush_files("test_with_split"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ); suite.cluster.shutdown(); } @@ -63,7 +62,7 @@ mod all { #[test] fn with_split_txn() { let mut suite = SuiteBuilder::new_named("split_txn").build(); - run_async_test(async { + let (commit_ts, start_ts, keys) = run_async_test(async { let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); suite.must_kv_prewrite( @@ -76,26 +75,25 @@ mod all { start_ts, ); let commit_ts = suite.cluster.pd_client.get_tso().await.unwrap(); - suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); - suite.must_register_task(1, "test_split_txn"); - suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); - suite.force_flush_files("test_split_txn"); - suite.wait_for_flush(); - let keys_encoded = keys - .iter() - .map(|v| { - Key::from_raw(v.as_slice()) - .append_ts(commit_ts) - .into_encoded() - }) - .collect::>(); - suite - .check_for_write_records( - suite.flushed_files.path(), - keys_encoded.iter().map(Vec::as_slice), - ) - .await; + (commit_ts, start_ts, keys) }); + suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); + suite.must_register_task(1, "test_split_txn"); + suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); + suite.force_flush_files("test_split_txn"); + suite.wait_for_flush(); + let keys_encoded = keys + .iter() + .map(|v| { + Key::from_raw(v.as_slice()) + .append_ts(commit_ts) + .into_encoded() + }) + .collect::>(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys_encoded.iter().map(Vec::as_slice), + ); suite.cluster.shutdown(); } @@ -111,10 +109,10 @@ mod all { let round2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("test_leader_down"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(Vec::as_slice), - )); + ); suite.cluster.shutdown(); } @@ -346,10 +344,10 @@ mod all { } assert_eq!(items.last().unwrap().end_key, Vec::::default()); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(|x| x.as_slice()), - )); + ); } #[test] @@ -373,18 +371,18 @@ mod all { .unwrap(); suite.sync(); std::thread::sleep(Duration::from_secs(2)); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|x| x.as_slice()), - )); + ); assert!(suite.global_checkpoint() > 256); suite.force_flush_files("r"); suite.wait_for_flush(); assert!(suite.global_checkpoint() > 512); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(|x| x.as_slice()), - )); + ); } #[test] @@ -426,10 +424,10 @@ mod all { ts, cps ); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|k| k.as_slice()), - )) + ) } #[test] diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 41a57f5858b1..0e4038d07a0d 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -2,7 +2,8 @@ use std::{ collections::{HashMap, HashSet}, - path::Path, + fmt::Display, + path::{Path, PathBuf}, sync::Arc, time::Duration, }; @@ -30,7 +31,6 @@ use kvproto::{ tikvpb::*, }; use pd_client::PdClient; -use protobuf::parse_from_bytes; use raftstore::{router::CdcRaftRouter, RegionInfoAccessor}; use resolved_ts::LeadershipResolver; use tempdir::TempDir; @@ -43,13 +43,25 @@ use tikv_util::{ number::NumberEncoder, stream_event::{EventIterator, Iterator}, }, - info, + debug, info, worker::LazyWorker, HandyRwLock, }; use txn_types::{Key, TimeStamp, WriteRef}; use walkdir::WalkDir; +#[derive(Debug)] +pub struct FileSegments { + path: PathBuf, + segments: Vec<(usize, usize)>, +} + +#[derive(Default, Debug)] +pub struct LogFiles { + default_cf: Vec, + write_cf: Vec, +} + pub type TestEndpoint = Endpoint< ErrorStore, RegionInfoAccessor, @@ -453,7 +465,12 @@ impl Suite { for ts in (from..(from + n)).map(|x| x * 2) { let ts = ts as u64; let key = make_record_key(for_table, ts); - let muts = vec![mutation(key.clone(), b"hello, world".to_vec())]; + let value = if ts % 4 == 0 { + b"hello, world".to_vec() + } else { + [0xdd; 4096].to_vec() + }; + let muts = vec![mutation(key.clone(), value)]; let enc_key = Key::from_raw(&key).into_encoded(); let region = self.cluster.get_region_id(&enc_key); let start_ts = self.cluster.pd_client.get_tso().await.unwrap(); @@ -510,45 +527,53 @@ impl Suite { } } - pub fn load_metadata_for_write_records( - &self, - path: &Path, - ) -> HashMap> { - let mut meta_map: HashMap> = HashMap::new(); - for entry in WalkDir::new(path) { - let entry = entry.unwrap(); - if entry.file_type().is_file() - && entry - .file_name() - .to_str() - .map_or(false, |s| s.ends_with(".meta")) - { - let content = std::fs::read(entry.path()).unwrap(); - let meta = parse_from_bytes::(content.as_ref()).unwrap(); - for g in meta.file_groups.into_iter() { - let path = g.path.split('/').last().unwrap(); - for f in g.data_files_info.into_iter() { - let file_info = meta_map.get_mut(path); - if let Some(v) = file_info { - v.push(( - f.range_offset as usize, - (f.range_offset + f.range_length) as usize, - )); + pub fn get_files_to_check(&self, path: &Path) -> std::io::Result { + let mut res = LogFiles::default(); + for entry in WalkDir::new(path.join("v1/backupmeta")) { + let entry = entry?; + println!("reading {}", entry.path().display()); + if entry.file_name().to_str().unwrap().ends_with(".meta") { + let content = std::fs::read(entry.path())?; + let meta = protobuf::parse_from_bytes::(&content)?; + for fg in meta.get_file_groups() { + let mut default_segs = vec![]; + let mut write_segs = vec![]; + for file in fg.get_data_files_info() { + let v = if file.cf == "default" || file.cf.is_empty() { + Some(&mut default_segs) + } else if file.cf == "write" { + Some(&mut write_segs) } else { - let v = vec![( - f.range_offset as usize, - (f.range_offset + f.range_length) as usize, - )]; - meta_map.insert(String::from(path), v); - } + None + }; + v.into_iter().for_each(|v| { + v.push(( + file.get_range_offset() as usize, + (file.get_range_offset() + file.get_range_length()) as usize, + )) + }); + } + let p = path.join(fg.get_path()); + if !default_segs.is_empty() { + res.default_cf.push(FileSegments { + path: p.clone(), + segments: default_segs, + }) + } + if !write_segs.is_empty() { + res.write_cf.push(FileSegments { + path: p, + segments: write_segs, + }) } } } } - meta_map + Ok(res) } - pub async fn check_for_write_records<'a>( + #[track_caller] + pub fn check_for_write_records<'a>( &self, path: &Path, key_set: impl std::iter::Iterator, @@ -557,45 +582,72 @@ impl Suite { let n = remain_keys.len(); let mut extra_key = 0; let mut extra_len = 0; - let meta_map = self.load_metadata_for_write_records(path); - for entry in WalkDir::new(path) { - let entry = entry.unwrap(); - println!("checking: {:?}", entry); - if entry.file_type().is_file() - && entry - .file_name() - .to_str() - .map_or(false, |s| s.ends_with(".log")) - { - let buf = std::fs::read(entry.path()).unwrap(); - let file_infos = meta_map.get(entry.file_name().to_str().unwrap()).unwrap(); - for &file_info in file_infos { - let mut decoder = ZstdDecoder::new(Vec::new()); - let pbuf: &[u8] = &buf[file_info.0..file_info.1]; - decoder.write_all(pbuf).await.unwrap(); - decoder.flush().await.unwrap(); - decoder.close().await.unwrap(); - let content = decoder.into_inner(); - - let mut iter = EventIterator::new(&content); - loop { - if !iter.valid() { - break; - } - iter.next().unwrap(); - if !remain_keys.remove(iter.key()) { - extra_key += 1; - extra_len += iter.key().len() + iter.value().len(); - } + let files = self.get_files_to_check(path).unwrap_or_default(); + let mut default_keys = HashSet::new(); + let content_of = |buf: &[u8], range: (usize, usize)| { + let mut decoder = ZstdDecoder::new(Vec::new()); + let pbuf: &[u8] = &buf[range.0..range.1]; + run_async_test(async { + decoder.write_all(pbuf).await.unwrap(); + decoder.flush().await.unwrap(); + decoder.close().await.unwrap(); + }); + decoder.into_inner() + }; + for entry in files.write_cf { + debug!("checking write: {:?}", entry); + + let buf = std::fs::read(&entry.path).unwrap(); + for &file_info in entry.segments.iter() { + let data = content_of(&buf, file_info); + let mut iter = EventIterator::new(&data); + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + if !remain_keys.remove(iter.key()) { + extra_key += 1; + extra_len += iter.key().len() + iter.value().len(); + } + + let value = iter.value(); + let wf = WriteRef::parse(value).unwrap(); + if wf.short_value.is_none() { + let mut key = Key::from_encoded_slice(iter.key()).truncate_ts().unwrap(); + key.append_ts_inplace(wf.start_ts); - let value = iter.value(); - let wf = WriteRef::parse(value).unwrap(); + default_keys.insert(key.into_encoded()); + } else { assert_eq!(wf.short_value, Some(b"hello, world" as &[u8])); } } } } + for entry in files.default_cf { + debug!("checking default: {:?}", entry); + + let buf = std::fs::read(&entry.path).unwrap(); + for &file_info in entry.segments.iter() { + let data = content_of(&buf, file_info); + let mut iter = EventIterator::new(&data); + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + if !default_keys.remove(iter.key()) { + extra_key += 1; + extra_len += iter.key().len() + iter.value().len(); + } + + let value = iter.value(); + assert_eq!(value, &[0xdd; 4096]); + } + } + } + if extra_key != 0 { println!( "check_for_write_records of “{}”: extra {} keys ({:.02}% of recorded keys), extra {} bytes.", @@ -605,17 +657,19 @@ impl Suite { extra_len ) } - if !remain_keys.is_empty() { - panic!( - "not all keys are recorded: it remains {:?} (total = {})", - remain_keys - .iter() - .take(3) - .map(|v| hex::encode(v)) - .collect::>(), - remain_keys.len() - ); - } + assert_empty(&remain_keys, "not all keys are recorded"); + assert_empty(&default_keys, "some keys don't have default entry"); + } +} + +#[track_caller] +fn assert_empty(v: &HashSet>, msg: impl Display) { + if !v.is_empty() { + panic!( + "{msg}: it remains {:?}... (total = {})", + v.iter().take(3).map(|v| hex::encode(v)).collect::>(), + v.len() + ); } } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 291254c5227b..15ffece44253 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -2,7 +2,10 @@ use std::{ mem, - sync::atomic::{AtomicUsize, Ordering}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, }; use kvproto::{ @@ -87,6 +90,32 @@ pub struct MemoryQuota { capacity: AtomicUsize, } +pub struct OwnedAllocated { + allocated: usize, + from: Arc, +} + +impl OwnedAllocated { + pub fn new(target: Arc) -> Self { + Self { + allocated: 0, + from: target, + } + } + + pub fn alloc(&mut self, bytes: usize) -> Result<(), MemoryQuotaExceeded> { + self.from.alloc(bytes)?; + self.allocated += bytes; + Ok(()) + } +} + +impl Drop for OwnedAllocated { + fn drop(&mut self) { + self.from.free(self.allocated) + } +} + impl MemoryQuota { pub fn new(capacity: usize) -> MemoryQuota { MemoryQuota { @@ -182,4 +211,25 @@ mod tests { quota.alloc(40).unwrap(); assert_eq!(quota.in_use(), 50); } + + #[test] + fn test_allocated() { + let quota = Arc::new(MemoryQuota::new(100)); + let mut allocated = OwnedAllocated::new(Arc::clone("a)); + allocated.alloc(42).unwrap(); + assert_eq!(quota.in_use(), 42); + quota.alloc(59).unwrap_err(); + allocated.alloc(16).unwrap(); + assert_eq!(quota.in_use(), 58); + let mut allocated2 = OwnedAllocated::new(Arc::clone("a)); + allocated2.alloc(8).unwrap(); + allocated2.alloc(40).unwrap_err(); + assert_eq!(quota.in_use(), 66); + quota.alloc(4).unwrap(); + assert_eq!(quota.in_use(), 70); + drop(allocated); + assert_eq!(quota.in_use(), 12); + drop(allocated2); + assert_eq!(quota.in_use(), 4); + } } From 7fc3684c91f9a40ca351fc8a1c894871ad926f92 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 7 Nov 2023 14:54:41 -0600 Subject: [PATCH 127/203] raftstore: enhance split check (#15900) close tikv/tikv#15863 Signed-off-by: tonyxuqqi --- components/raftstore-v2/src/router/imp.rs | 14 +- .../raftstore/src/coprocessor/dispatcher.rs | 26 ++- .../src/coprocessor/split_check/keys.rs | 56 +++++-- .../src/coprocessor/split_check/size.rs | 43 ++++- components/raftstore/src/router.rs | 8 +- components/raftstore/src/store/fsm/peer.rs | 109 +++++++------ components/raftstore/src/store/msg.rs | 22 ++- components/raftstore/src/store/peer.rs | 150 ++++++++++++++---- .../raftstore/src/store/worker/split_check.rs | 32 +++- components/test_raftstore/src/util.rs | 40 ++++- components/tikv_util/src/log.rs | 12 ++ tests/failpoints/cases/test_split_region.rs | 62 ++++++++ .../raftstore/test_split_region.rs | 2 +- 13 files changed, 457 insertions(+), 119 deletions(-) diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 23a8a3c7d4e9..e7a63f6d48f9 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -43,12 +43,18 @@ impl AsyncReadNotifier for StoreRouter { } impl raftstore::coprocessor::StoreHandle for StoreRouter { - fn update_approximate_size(&self, region_id: u64, size: u64) { - let _ = self.send(region_id, PeerMsg::UpdateRegionSize { size }); + // TODO: add splitable logic in raftstore-v2 + fn update_approximate_size(&self, region_id: u64, size: Option, _may_split: Option) { + if let Some(size) = size { + let _ = self.send(region_id, PeerMsg::UpdateRegionSize { size }); + } } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { - let _ = self.send(region_id, PeerMsg::UpdateRegionKeys { keys }); + // TODO: add splitable logic in raftstore-v2 + fn update_approximate_keys(&self, region_id: u64, keys: Option, _may_split: Option) { + if let Some(keys) = keys { + let _ = self.send(region_id, PeerMsg::UpdateRegionKeys { keys }); + } } fn ask_split( diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d082013cd2c5..c7d6731d3e9a 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -19,8 +19,8 @@ use crate::store::BucketRange; /// A handle for coprocessor to schedule some command back to raftstore. pub trait StoreHandle: Clone + Send { - fn update_approximate_size(&self, region_id: u64, size: u64); - fn update_approximate_keys(&self, region_id: u64, keys: u64); + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option); + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option); fn ask_split( &self, region_id: u64, @@ -48,11 +48,13 @@ pub trait StoreHandle: Clone + Send { pub enum SchedTask { UpdateApproximateSize { region_id: u64, - size: u64, + splitable: Option, + size: Option, }, UpdateApproximateKeys { region_id: u64, - keys: u64, + splitable: Option, + keys: Option, }, AskSplit { region_id: u64, @@ -75,12 +77,20 @@ pub enum SchedTask { } impl StoreHandle for std::sync::mpsc::SyncSender { - fn update_approximate_size(&self, region_id: u64, size: u64) { - let _ = self.try_send(SchedTask::UpdateApproximateSize { region_id, size }); + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option) { + let _ = self.try_send(SchedTask::UpdateApproximateSize { + region_id, + splitable, + size, + }); } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { - let _ = self.try_send(SchedTask::UpdateApproximateKeys { region_id, keys }); + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option) { + let _ = self.try_send(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + keys, + }); } fn ask_split( diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 2c0e71dd8cbd..d6a49175441e 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -157,9 +157,11 @@ impl SplitCheckObserver for KeysCheckObserver } }; - self.router.update_approximate_keys(region_id, region_keys); + self.router + .update_approximate_keys(region_id, Some(region_keys), None); REGION_KEYS_HISTOGRAM.observe(region_keys as f64); + // if bucket checker using scan is added, to utilize the scan, // add keys checker as well for free // It has the assumption that the size's checker is before the keys's check in @@ -299,12 +301,28 @@ mod tests { None, )); // keys has not reached the max_keys 100 yet. - match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { - assert_eq!(region_id, region.get_id()); + let mut recv_cnt = 0; + loop { + match rx.try_recv() { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, region.get_id()); + assert!(splitable.is_none()); + recv_cnt += 1; + if recv_cnt == 2 { + break; + } + } + others => panic!("expect recv empty, but got {:?}", others), } - others => panic!("expect recv empty, but got {:?}", others), } put_data(&engine, 90, 160, true); @@ -403,12 +421,28 @@ mod tests { None, )); // keys has not reached the max_keys 100 yet. - match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { - assert_eq!(region_id, region.get_id()); + let mut recv_cnt = 0; + loop { + match rx.try_recv() { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, region.get_id()); + assert!(splitable.is_none()); + recv_cnt += 1; + if recv_cnt == 2 { + break; + } + } + others => panic!("expect recv empty, but got {:?}", others), } - others => panic!("expect recv empty, but got {:?}", others), } put_data(&engine, 90, 160, true); diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 4b320bef1b64..e5048a838263 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -158,13 +158,14 @@ impl SplitCheckObserver for SizeCheckObserver }; // send it to raftstore to update region approximate size - self.router.update_approximate_size(region_id, region_size); + self.router + .update_approximate_size(region_id, Some(region_size), None); + let need_split_region = region_size >= host.cfg.region_max_size().0; let need_bucket_checker = host.cfg.enable_region_bucket() && region_size >= 2 * host.cfg.region_bucket_size.0; REGION_SIZE_HISTOGRAM.observe(region_size as f64); - let need_split_region = region_size >= host.cfg.region_max_size().0; if need_split_region || need_bucket_checker { // when it's a large region use approximate way to produce split keys if need_split_region { @@ -265,11 +266,23 @@ pub mod tests { exp_split_keys: Vec>, ignore_split_keys: bool, ) { + let mut split = false; loop { match rx.try_recv() { - Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) - | Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { + Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, exp_region.get_id()); + split = split || splitable.unwrap_or(false); + } + Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } Ok(SchedTask::AskSplit { @@ -283,6 +296,7 @@ pub mod tests { if !ignore_split_keys { assert_eq!(split_keys, exp_split_keys); } + assert!(split); break; } others => panic!("expect split check result, but got {:?}", others), @@ -303,11 +317,23 @@ pub mod tests { exp_region: &Region, exp_split_keys_count: usize, ) { + let mut split = false; loop { match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) - | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, exp_region.get_id()); + split = split || splitable.unwrap_or(false); + } + Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } Ok(SchedTask::AskSplit { @@ -319,6 +345,7 @@ pub mod tests { assert_eq!(region_id, exp_region.get_id()); assert_eq!(®ion_epoch, exp_region.get_region_epoch()); assert_eq!(split_keys.len(), exp_split_keys_count); + assert!(split); break; } others => panic!("expect split check result, but got {:?}", others), diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 77d3a35e3068..fd50357fa38d 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -289,11 +289,11 @@ impl RaftStoreRouter for RaftRouter { // duplicated codes. impl crate::coprocessor::StoreHandle for RaftRouter { - fn update_approximate_size(&self, region_id: u64, size: u64) { + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option) { if let Err(e) = CasualRouter::send( self, region_id, - CasualMessage::RegionApproximateSize { size }, + CasualMessage::RegionApproximateSize { size, splitable }, ) { warn!( "failed to send approximate region size"; @@ -304,11 +304,11 @@ impl crate::coprocessor::StoreHandle for RaftRoute } } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option) { if let Err(e) = CasualRouter::send( self, region_id, - CasualMessage::RegionApproximateKeys { keys }, + CasualMessage::RegionApproximateKeys { keys, splitable }, ) { warn!( "failed to send approximate region keys"; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 5a7223dcaa3b..03e31938aa02 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -51,7 +51,7 @@ use raft::{ use smallvec::SmallVec; use tikv_alloc::trace::TraceEvent; use tikv_util::{ - box_err, debug, defer, error, escape, info, is_zero_duration, + box_err, debug, defer, error, escape, info, info_or_debug, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, store::{find_peer, find_peer_by_id, is_learner, region_on_same_stores}, sys::disk::DiskUsage, @@ -1085,11 +1085,11 @@ where } => { self.on_hash_computed(index, context, hash); } - CasualMessage::RegionApproximateSize { size } => { - self.on_approximate_region_size(size); + CasualMessage::RegionApproximateSize { size, splitable } => { + self.on_approximate_region_size(size, splitable); } - CasualMessage::RegionApproximateKeys { keys } => { - self.on_approximate_region_keys(keys); + CasualMessage::RegionApproximateKeys { keys, splitable } => { + self.on_approximate_region_keys(keys, splitable); } CasualMessage::RefreshRegionBuckets { region_epoch, @@ -1367,9 +1367,7 @@ where } fn on_clear_region_size(&mut self) { - self.fsm.peer.approximate_size = None; - self.fsm.peer.approximate_keys = None; - self.fsm.peer.may_skip_split_check = false; + self.fsm.peer.split_check_trigger.on_clear_region_size(); self.register_split_region_check_tick(); } @@ -4113,8 +4111,18 @@ where // if share_source_region_size is true, it means the new region contains any // data from the origin region if share_source_region_size { - share_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); - share_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + share_size = self + .fsm + .peer + .split_check_trigger + .approximate_size + .map(|v| v / new_region_count); + share_keys = self + .fsm + .peer + .split_check_trigger + .approximate_keys + .map(|v| v / new_region_count); } let mut meta = self.ctx.store_meta.lock().unwrap(); @@ -4126,14 +4134,11 @@ where ); self.fsm.peer.post_split(); - // It's not correct anymore, so set it to false to schedule a split check task. - self.fsm.peer.may_skip_split_check = false; - let is_leader = self.fsm.peer.is_leader(); if is_leader { if share_source_region_size { - self.fsm.peer.approximate_size = share_size; - self.fsm.peer.approximate_keys = share_keys; + self.fsm.peer.split_check_trigger.approximate_size = share_size; + self.fsm.peer.split_check_trigger.approximate_keys = share_keys; } self.fsm.peer.heartbeat_pd(self.ctx); // Notify pd immediately to let it update the region meta. @@ -4162,7 +4167,6 @@ where if meta.region_ranges.remove(&last_key).is_none() { panic!("{} original region should exist", self.fsm.peer.tag); } - let last_region_id = regions.last().unwrap().get_id(); for (new_region, locks) in regions.into_iter().zip(region_locks) { let new_region_id = new_region.get_id(); @@ -4269,8 +4273,8 @@ where new_peer.has_ready |= campaigned; if is_leader { - new_peer.peer.approximate_size = share_size; - new_peer.peer.approximate_keys = share_keys; + new_peer.peer.split_check_trigger.approximate_size = share_size; + new_peer.peer.split_check_trigger.approximate_keys = share_keys; *new_peer.peer.txn_ext.pessimistic_locks.write() = locks; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. @@ -4288,11 +4292,6 @@ where .insert(new_region_id, ReadDelegate::from_peer(new_peer.get_peer())); meta.region_read_progress .insert(new_region_id, new_peer.peer.read_progress.clone()); - if last_region_id == new_region_id { - // To prevent from big region, the right region needs run split - // check again after split. - new_peer.peer.size_diff_hint = self.ctx.cfg.region_split_check_diff().0; - } let mailbox = BasicMailbox::new(sender, new_peer, self.ctx.router.state_cnt().clone()); self.ctx.router.register(new_region_id, mailbox); self.ctx @@ -4787,7 +4786,7 @@ where // make approximate size and keys updated in time. // the reason why follower need to update is that there is a issue that after // merge and then transfer leader, the new leader may have stale size and keys. - self.fsm.peer.size_diff_hint = self.ctx.cfg.region_split_check_diff().0; + self.fsm.peer.split_check_trigger.reset_skip_check(); self.fsm.peer.reset_region_buckets(); if self.fsm.peer.is_leader() { info!( @@ -5248,6 +5247,14 @@ where &mut self, msg: &RaftCmdRequest, ) -> Result> { + // failpoint + fail_point!( + "fail_pre_propose_split", + msg.has_admin_request() + && msg.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit, + |_| Err(Error::Other(box_err!("fail_point"))) + ); + // Check store_id, make sure that the msg is dispatched to the right place. if let Err(e) = util::check_store_id(msg.get_header(), self.store_id()) { self.ctx @@ -5472,7 +5479,10 @@ where return; } Err(e) => { - debug!( + // log for admin requests + let is_admin_request = msg.has_admin_request(); + info_or_debug!( + is_admin_request; "failed to propose"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), @@ -5840,9 +5850,11 @@ where // whether the region should split. // We assume that `may_skip_split_check` is only set true after the split check // task is scheduled. - if self.fsm.peer.may_skip_split_check - && self.fsm.peer.compaction_declined_bytes < self.ctx.cfg.region_split_check_diff().0 - && self.fsm.peer.size_diff_hint < self.ctx.cfg.region_split_check_diff().0 + if self + .fsm + .peer + .split_check_trigger + .should_skip(self.ctx.cfg.region_split_check_diff().0) { return; } @@ -5857,6 +5869,11 @@ where return; } + // To avoid run the check if it's splitting. + if self.fsm.peer.is_splitting() { + return; + } + // When Lightning or BR is importing data to TiKV, their ingest-request may fail // because of region-epoch not matched. So we hope TiKV do not check region size // and split region during importing. @@ -5895,10 +5912,7 @@ where ); return; } - self.fsm.peer.size_diff_hint = 0; - self.fsm.peer.compaction_declined_bytes = 0; - // the task is scheduled, next tick may skip it. - self.fsm.peer.may_skip_split_check = true; + self.fsm.peer.split_check_trigger.post_triggered(); } fn on_prepare_split_region( @@ -5974,15 +5988,21 @@ where } } - fn on_approximate_region_size(&mut self, size: u64) { - self.fsm.peer.approximate_size = Some(size); + fn on_approximate_region_size(&mut self, size: Option, splitable: Option) { + self.fsm + .peer + .split_check_trigger + .on_approximate_region_size(size, splitable); self.register_split_region_check_tick(); self.register_pd_heartbeat_tick(); fail_point!("on_approximate_region_size"); } - fn on_approximate_region_keys(&mut self, keys: u64) { - self.fsm.peer.approximate_keys = Some(keys); + fn on_approximate_region_keys(&mut self, keys: Option, splitable: Option) { + self.fsm + .peer + .split_check_trigger + .on_approximate_region_keys(keys, splitable); self.register_split_region_check_tick(); self.register_pd_heartbeat_tick(); } @@ -6130,8 +6150,10 @@ where } fn on_compaction_declined_bytes(&mut self, declined_bytes: u64) { - self.fsm.peer.compaction_declined_bytes += declined_bytes; - if self.fsm.peer.compaction_declined_bytes >= self.ctx.cfg.region_split_check_diff().0 { + self.fsm.peer.split_check_trigger.compaction_declined_bytes += declined_bytes; + if self.fsm.peer.split_check_trigger.compaction_declined_bytes + >= self.ctx.cfg.region_split_check_diff().0 + { UPDATE_REGION_SIZE_BY_COMPACTION_COUNTER.inc(); } self.register_split_region_check_tick(); @@ -6536,17 +6558,14 @@ where size += sst.total_bytes; keys += sst.total_kvs; } - self.fsm.peer.approximate_size = - Some(self.fsm.peer.approximate_size.unwrap_or_default() + size); - self.fsm.peer.approximate_keys = - Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); + self.fsm + .peer + .split_check_trigger + .on_ingest_sst_result(size, keys); if let Some(buckets) = &mut self.fsm.peer.region_buckets_info_mut().bucket_stat_mut() { buckets.ingest_sst(keys, size); } - // The ingested file may be overlapped with the data in engine, so we need to - // check it again to get the accurate value. - self.fsm.peer.may_skip_split_check = false; if self.fsm.peer.is_leader() { self.on_pd_heartbeat_tick(); self.register_split_region_check_tick(); diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 0d703143a08b..249c550db148 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -559,12 +559,14 @@ pub enum CasualMessage { /// Approximate size of target region. This message can only be sent by /// split-check thread. RegionApproximateSize { - size: u64, + size: Option, + splitable: Option, }, /// Approximate key count of target region. RegionApproximateKeys { - keys: u64, + keys: Option, + splitable: Option, }, CompactionDeclinedBytes { bytes: u64, @@ -649,11 +651,19 @@ impl fmt::Debug for CasualMessage { KeysInfoFormatter(split_keys.iter()), source, ), - CasualMessage::RegionApproximateSize { size } => { - write!(fmt, "Region's approximate size [size: {:?}]", size) + CasualMessage::RegionApproximateSize { size, splitable } => { + write!( + fmt, + "Region's approximate size [size: {:?}], [splitable: {:?}]", + size, splitable + ) } - CasualMessage::RegionApproximateKeys { keys } => { - write!(fmt, "Region's approximate keys [keys: {:?}]", keys) + CasualMessage::RegionApproximateKeys { keys, splitable } => { + write!( + fmt, + "Region's approximate keys [keys: {:?}], [splitable: {:?}", + keys, splitable + ) } CasualMessage::CompactionDeclinedBytes { bytes } => { write!(fmt, "compaction declined bytes {}", bytes) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 5511c9760626..da67784e7347 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -571,6 +571,119 @@ pub fn can_amend_read( false } +/// The SplitCheckTrigger maintains the internal status to determine +/// if a split check task should be triggered. +#[derive(Default, Debug)] +pub struct SplitCheckTrigger { + /// An inaccurate difference in region size since last reset. + /// It is used to decide whether split check is needed. + size_diff_hint: u64, + /// An inaccurate difference in region size after compaction. + /// It is used to trigger check split to update approximate size and keys + /// after space reclamation of deleted entries. + pub compaction_declined_bytes: u64, + /// Approximate size of the region. + pub approximate_size: Option, + may_split_size: Option, + /// Approximate keys of the region. + pub approximate_keys: Option, + may_split_keys: Option, + /// Whether this region has scheduled a split check task. If we just + /// splitted the region or ingested one file which may be overlapped + /// with the existed data, reset the flag so that the region can be + /// splitted again. + may_skip_split_check: bool, +} + +impl SplitCheckTrigger { + pub fn should_skip(&self, threshold: u64) -> bool { + self.may_skip_split_check + && self.compaction_declined_bytes < threshold + && self.size_diff_hint < threshold + } + + pub fn post_triggered(&mut self) { + self.size_diff_hint = 0; + self.compaction_declined_bytes = 0; + // The task is scheduled, the next tick may skip it only when the size and keys + // are small. + // If either size or keys are big enough to do a split, + // keep split check tick until split is done + if !matches!(self.may_split_size, Some(true)) && !matches!(self.may_split_keys, Some(true)) + { + self.may_skip_split_check = true; + } + } + + pub fn post_split(&mut self) { + self.size_diff_hint = 0; + self.may_split_keys = None; + self.may_split_size = None; + // It's not correct anymore, so set it to false to schedule a split check task. + self.may_skip_split_check = false; + } + + pub fn add_size_diff(&mut self, size_diff: i64) { + let diff = self.size_diff_hint as i64 + size_diff; + self.size_diff_hint = cmp::max(diff, 0) as u64; + } + + pub fn reset_skip_check(&mut self) { + self.may_skip_split_check = false; + } + + pub fn on_clear_region_size(&mut self) { + self.approximate_size = None; + self.approximate_keys = None; + self.may_split_size = None; + self.may_split_keys = None; + self.may_skip_split_check = false; + } + + pub fn on_approximate_region_size(&mut self, size: Option, splitable: Option) { + // If size is none, it means no estimated size + if size.is_some() { + self.approximate_size = size; + } + + if splitable.is_some() { + self.may_split_size = splitable; + } + + // If the region is truly splitable, + // may_skip_split_check should be false + if matches!(splitable, Some(true)) { + self.may_skip_split_check = false; + } + } + + pub fn on_approximate_region_keys(&mut self, keys: Option, splitable: Option) { + // if keys is none, it means no estimated keys + if keys.is_some() { + self.approximate_keys = keys; + } + + if splitable.is_some() { + self.may_split_keys = splitable; + } + + // If the region is truly splitable, + // may_skip_split_check should be false + if matches!(splitable, Some(true)) { + self.may_skip_split_check = false; + } + } + + pub fn on_ingest_sst_result(&mut self, size: u64, keys: u64) { + self.approximate_size = Some(self.approximate_size.unwrap_or_default() + size); + self.approximate_keys = Some(self.approximate_keys.unwrap_or_default() + keys); + + // The ingested file may be overlapped with the data in engine, so we need to + // check it again to get the accurate value. + self.may_skip_split_check = false; + } +} + #[derive(Getters, MutGetters)] pub struct Peer where @@ -658,25 +771,10 @@ where pub peers_start_pending_time: Vec<(u64, Instant)>, /// A inaccurate cache about which peer is marked as down. down_peer_ids: Vec, - - /// An inaccurate difference in region size since last reset. - /// It is used to decide whether split check is needed. - pub size_diff_hint: u64, + /// the split check trigger + pub split_check_trigger: SplitCheckTrigger, /// The count of deleted keys since last reset. delete_keys_hint: u64, - /// An inaccurate difference in region size after compaction. - /// It is used to trigger check split to update approximate size and keys - /// after space reclamation of deleted entries. - pub compaction_declined_bytes: u64, - /// Approximate size of the region. - pub approximate_size: Option, - /// Approximate keys of the region. - pub approximate_keys: Option, - /// Whether this region has scheduled a split check task. If we just - /// splitted the region or ingested one file which may be overlapped - /// with the existed data, reset the flag so that the region can be - /// splitted again. - pub may_skip_split_check: bool, /// The state for consistency check. pub consistency_state: ConsistencyState, @@ -862,12 +960,8 @@ where wait_data_peers: Vec::default(), peers_start_pending_time: vec![], down_peer_ids: vec![], - size_diff_hint: 0, + split_check_trigger: SplitCheckTrigger::default(), delete_keys_hint: 0, - approximate_size: None, - approximate_keys: None, - may_skip_split_check: false, - compaction_declined_bytes: 0, leader_unreachable: false, pending_remove: false, wait_data, @@ -3361,8 +3455,8 @@ where self.peer_stat.written_keys += apply_metrics.written_keys; self.peer_stat.written_bytes += apply_metrics.written_bytes; self.delete_keys_hint += apply_metrics.delete_keys_hint; - let diff = self.size_diff_hint as i64 + apply_metrics.size_diff_hint; - self.size_diff_hint = cmp::max(diff, 0) as u64; + self.split_check_trigger + .add_size_diff(apply_metrics.size_diff_hint); if self.has_pending_snapshot() && self.ready_to_handle_pending_snap() { has_ready = true; @@ -3394,9 +3488,9 @@ where } pub fn post_split(&mut self) { - // Reset delete_keys_hint and size_diff_hint. self.delete_keys_hint = 0; - self.size_diff_hint = 0; + self.split_check_trigger.post_split(); + self.reset_region_buckets(); } @@ -5211,8 +5305,8 @@ where pending_peers: self.collect_pending_peers(ctx), written_bytes: self.peer_stat.written_bytes, written_keys: self.peer_stat.written_keys, - approximate_size: self.approximate_size, - approximate_keys: self.approximate_keys, + approximate_size: self.split_check_trigger.approximate_size, + approximate_keys: self.split_check_trigger.approximate_keys, replication_status: self.region_replication_status(ctx), wait_data_peers: self.wait_data_peers.clone(), }); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 94708e84f7ac..e3c0042acf06 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -695,6 +695,19 @@ impl Runner { }; if !split_keys.is_empty() { + // Notify peer that if the region is truly splitable. + // If it's truly splitable, then skip_split_check should be false; + self.router.update_approximate_size( + region.get_id(), + None, + Some(!split_keys.is_empty()), + ); + self.router.update_approximate_keys( + region.get_id(), + None, + Some(!split_keys.is_empty()), + ); + let region_epoch = region.get_region_epoch().clone(); self.router .ask_split(region_id, region_epoch, split_keys, "split checker".into()); @@ -736,6 +749,7 @@ impl Runner { } else { (!host.enable_region_bucket(), &empty_bucket) }; + let mut split_keys = vec![]; MergedIterator::<::Iterator>::new( tablet, LARGE_CFS, start_key, end_key, false, @@ -748,6 +762,7 @@ impl Runner { let mut skip_on_kv = false; while let Some(e) = iter.next() { if skip_on_kv && skip_check_bucket { + split_keys = host.split_keys(); return; } if !skip_on_kv && host.on_kv(region, &e) { @@ -810,6 +825,8 @@ impl Runner { } } + split_keys = host.split_keys(); + // if we scan the whole range, we can update approximate size and keys with // accurate value. if is_key_range { @@ -823,8 +840,17 @@ impl Runner { "bucket_count" => buckets.len(), "bucket_size" => bucket_size, ); - self.router.update_approximate_size(region.get_id(), size); - self.router.update_approximate_keys(region.get_id(), keys); + + self.router.update_approximate_size( + region.get_id(), + Some(size), + Some(!split_keys.is_empty()), + ); + self.router.update_approximate_keys( + region.get_id(), + Some(keys), + Some(!split_keys.is_empty()), + ); })?; if host.enable_region_bucket() { @@ -839,7 +865,7 @@ impl Runner { } timer.observe_duration(); - Ok(host.split_keys()) + Ok(split_keys) } fn change_cfg(&mut self, change: ConfigChange) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index ff47525ea371..5eb7d97796eb 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -17,7 +17,7 @@ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatist use engine_test::raft::RaftTestEngine; use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, - CF_DEFAULT, CF_RAFT, + CF_DEFAULT, CF_RAFT, CF_WRITE, }; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; @@ -783,6 +783,14 @@ pub fn put_till_size( put_cf_till_size(cluster, CF_DEFAULT, limit, range) } +pub fn put_till_count( + cluster: &mut Cluster, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + put_cf_till_count(cluster, CF_WRITE, limit, range) +} + pub fn put_cf_till_size( cluster: &mut Cluster, cf: &'static str, @@ -815,6 +823,36 @@ pub fn put_cf_till_size( key.into_bytes() } +pub fn put_cf_till_count( + cluster: &mut Cluster, + cf: &'static str, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + assert!(limit > 0); + let mut len = 0; + let mut rng = rand::thread_rng(); + let mut key = String::new(); + let mut value = vec![0; 64]; + while len < limit { + let batch_size = std::cmp::min(5, limit - len); + let mut reqs = vec![]; + for _ in 0..batch_size { + key.clear(); + let key_id = range.next().unwrap(); + write!(key, "{:09}", key_id).unwrap(); + rng.fill_bytes(&mut value); + reqs.push(new_put_cf_cmd(cf, key.as_bytes(), &value)); + } + len += batch_size; + cluster.batch_put(key.as_bytes(), reqs).unwrap(); + // Approximate size of memtable is inaccurate for small data, + // we flush it to SST so we can use the size properties instead. + cluster.must_flush_cf(cf, true); + } + key.into_bytes() +} + pub fn new_mutation(op: Op, k: &[u8], v: &[u8]) -> Mutation { let mut mutation = Mutation::default(); mutation.set_op(op); diff --git a/components/tikv_util/src/log.rs b/components/tikv_util/src/log.rs index fd351eecbd4d..91bd5013c1ed 100644 --- a/components/tikv_util/src/log.rs +++ b/components/tikv_util/src/log.rs @@ -83,6 +83,18 @@ macro_rules! trace(($($args:tt)+) => { ::slog_global::trace!($($args)+) };); +/// Logs a infor or debug level message using the slog global logger. +#[macro_export] +macro_rules! info_or_debug{ + ($cond:expr; $($args:tt)+) => { + if $cond { + info!($($args)+) + } else { + debug!($($args)+) + } + }; +} + use std::fmt::{self, Display, Write}; use slog::{BorrowedKV, OwnedKVList, Record, KV}; diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 65c50793d7a6..2ef3d499d221 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1549,3 +1549,65 @@ fn test_split_region_with_no_valid_split_keys() { rx.recv_timeout(Duration::from_secs(5)).unwrap(); rx.try_recv().unwrap_err(); } + +/// This test case test if a split failed for some reason, +/// it can continue run split check and eventually the split will finish +#[test_case(test_raftstore::new_node_cluster)] +fn test_split_by_split_check_on_size() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_size = 1440; + let region_split_size = 960; + cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // make first split fail + // 1*return means it would run "return" action once + fail::cfg("fail_pre_propose_split", "1*return").unwrap(); + + // Insert region_max_size into the cluster. + // It should trigger the split + let mut range = 1..; + let key = put_till_size(&mut cluster, region_max_size / 2, &mut range); + let region = pd_client.get_region(&key).unwrap(); + put_till_size(&mut cluster, region_max_size / 2 + 100, &mut range); + // waiting the split, + cluster.wait_region_split(®ion); +} + +/// This test case test if a split failed for some reason, +/// it can continue run split check and eventually the split will finish +#[test_case(test_raftstore::new_node_cluster)] +fn test_split_by_split_check_on_keys() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_keys = 15; + let region_split_keys = 10; + cluster.cfg.coprocessor.region_max_keys = Some(region_max_keys); + cluster.cfg.coprocessor.region_split_keys = Some(region_split_keys); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // make first split fail + // 1*return means it would run "return" action once + fail::cfg("fail_pre_propose_split", "1*return").unwrap(); + + // Insert region_max_size into the cluster. + // It should trigger the split + let mut range = 1..; + let key = put_till_count(&mut cluster, region_max_keys / 2, &mut range); + let region = pd_client.get_region(&key).unwrap(); + put_till_count(&mut cluster, region_max_keys / 2 + 3, &mut range); + // waiting the split, + cluster.wait_region_split(®ion); +} diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 5439e5c8ba25..3affbadec4bf 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -609,7 +609,7 @@ fn test_node_split_region_after_reboot_with_config_change() { sleep_ms(200); assert_eq!(pd_client.get_split_count(), 0); - // change the config to make the region splittable + // change the config to make the region splitable cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size / 3)); cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size / 3)); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(region_split_size / 3); From a932082fe406dee928cd1823e2177cc7cc62ed28 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 8 Nov 2023 15:32:12 +0800 Subject: [PATCH 128/203] server: change the log level to debug for cop error response (#15882) ref tikv/tikv#15881 Change the coprocessor error response log level to DEBUG Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/endpoint.rs | 105 +++++++++++++++--------------------- src/read_pool.rs | 4 ++ 2 files changed, 48 insertions(+), 61 deletions(-) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 7a12c7493e53..01cb571e8aa7 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -544,8 +544,9 @@ impl Endpoint { if let Err(busy_err) = self.read_pool.check_busy_threshold(Duration::from_millis( req.get_context().get_busy_threshold_ms() as u64, )) { - let mut resp = coppb::Response::default(); - resp.mut_region_error().set_server_is_busy(busy_err); + let mut pb_error = errorpb::Error::new(); + pb_error.set_server_is_busy(busy_err); + let resp = make_error_response(Error::Region(pb_error)); return Either::Left(async move { resp.into() }); } @@ -810,77 +811,59 @@ impl Endpoint { } } +macro_rules! make_error_response_common { + ($resp:expr, $tag:expr, $e:expr) => {{ + match $e { + Error::Region(e) => { + $tag = storage::get_tag_from_header(&e); + $resp.set_region_error(e); + } + Error::Locked(info) => { + $tag = "meet_lock"; + $resp.set_locked(info); + } + Error::DeadlineExceeded => { + $tag = "deadline_exceeded"; + $resp.set_other_error($e.to_string()); + } + Error::MaxPendingTasksExceeded => { + $tag = "max_pending_tasks_exceeded"; + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason($e.to_string()); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message($e.to_string()); + errorpb.set_server_is_busy(server_is_busy_err); + $resp.set_region_error(errorpb); + } + Error::Other(_) => { + $tag = "other"; + warn!("unexpected other error encountered processing coprocessor task"; + "error" => ?&$e, + ); + $resp.set_other_error($e.to_string()); + } + }; + COPR_REQ_ERROR.with_label_values(&[$tag]).inc(); + }}; +} + fn make_error_batch_response(batch_resp: &mut coppb::StoreBatchTaskResponse, e: Error) { - warn!( + debug!( "batch cop task error-response"; "err" => %e ); let tag; - match e { - Error::Region(e) => { - tag = storage::get_tag_from_header(&e); - batch_resp.set_region_error(e); - } - Error::Locked(info) => { - tag = "meet_lock"; - batch_resp.set_locked(info); - } - Error::DeadlineExceeded => { - tag = "deadline_exceeded"; - batch_resp.set_other_error(e.to_string()); - } - Error::MaxPendingTasksExceeded => { - tag = "max_pending_tasks_exceeded"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(e.to_string()); - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(e.to_string()); - errorpb.set_server_is_busy(server_is_busy_err); - batch_resp.set_region_error(errorpb); - } - Error::Other(_) => { - tag = "other"; - batch_resp.set_other_error(e.to_string()); - } - }; - COPR_REQ_ERROR.with_label_values(&[tag]).inc(); + make_error_response_common!(batch_resp, tag, e); } fn make_error_response(e: Error) -> coppb::Response { - warn!( + debug!( "error-response"; "err" => %e ); - let mut resp = coppb::Response::default(); let tag; - match e { - Error::Region(e) => { - tag = storage::get_tag_from_header(&e); - resp.set_region_error(e); - } - Error::Locked(info) => { - tag = "meet_lock"; - resp.set_locked(info); - } - Error::DeadlineExceeded => { - tag = "deadline_exceeded"; - resp.set_other_error(e.to_string()); - } - Error::MaxPendingTasksExceeded => { - tag = "max_pending_tasks_exceeded"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(e.to_string()); - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(e.to_string()); - errorpb.set_server_is_busy(server_is_busy_err); - resp.set_region_error(errorpb); - } - Error::Other(_) => { - tag = "other"; - resp.set_other_error(e.to_string()); - } - }; - COPR_REQ_ERROR.with_label_values(&[tag]).inc(); + let mut resp = coppb::Response::default(); + make_error_response_common!(resp, tag, e); resp } diff --git a/src/read_pool.rs b/src/read_pool.rs index a5898ea4f634..7821f2f946e0 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -312,6 +312,10 @@ impl ReadPoolHandle { let mut busy_err = errorpb::ServerIsBusy::default(); busy_err.set_reason("estimated wait time exceeds threshold".to_owned()); busy_err.estimated_wait_ms = u32::try_from(estimated_wait.as_millis()).unwrap_or(u32::MAX); + warn!("Already many pending tasks in the read queue, task is rejected"; + "busy_threshold" => ?&busy_threshold, + "busy_err" => ?&busy_err, + ); Err(busy_err) } } From f574ec0830f88a42290f1721a919b6a293cd7cfa Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 8 Nov 2023 15:56:12 +0800 Subject: [PATCH 129/203] raftstore: gc uninitialized stale peer after merge (#15934) close tikv/tikv#15919 A "stale peer" refers to a peer that still exists on a TiKV node but has been removed from the raft group, typically through a confchange operation. TiKV performs regular checks and validations on its peers to ensure that no such stale peer exists. However, the current stale peer check is not enough when dealing with uninitialized stale peers that its region has been merged. These uninitialized stale peers are left indefinitely, consuming CPU, memory, and blocking resolved ts. This commit introduces an in-memory state for peers whose raft messages create uninitialized stale peers. The stale peer then sends a MsgCheckStalePeer message to the corresponding peer, validating whether it should be removed. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 5 ++ components/raftstore/src/store/fsm/store.rs | 5 ++ components/raftstore/src/store/peer.rs | 13 ++++- components/raftstore/src/store/worker/pd.rs | 10 +++- tests/integrations/raftstore/test_merge.rs | 57 +++++++++++++++++++ .../integrations/raftstore/test_stale_peer.rs | 47 +++++++++++++++ 6 files changed, 134 insertions(+), 3 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 03e31938aa02..942514153c7a 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -286,6 +286,7 @@ where region, meta_peer, wait_data, + None, )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, @@ -316,6 +317,7 @@ where engines: Engines, region_id: u64, peer: metapb::Peer, + create_by_peer: metapb::Peer, ) -> Result> { // We will remove tombstone key when apply snapshot info!( @@ -323,6 +325,8 @@ where "region_id" => region_id, "peer_id" => peer.get_id(), "store_id" => store_id, + "create_by_peer_id" => create_by_peer.get_id(), + "create_by_peer_store_id" => create_by_peer.get_store_id(), ); let mut region = metapb::Region::default(); @@ -342,6 +346,7 @@ where ®ion, peer, false, + Some(create_by_peer), )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 950768055e46..6227e28cd193 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -681,6 +681,8 @@ where "region_id" => region_id, "current_region_epoch" => ?cur_epoch, "msg_type" => ?msg_type, + "to_peer_id" => ?from_peer.get_id(), + "to_peer_store_id" => ?from_peer.get_store_id(), ); self.raft_metrics.message_dropped.stale_msg.inc(); @@ -699,6 +701,8 @@ where error!(?e; "send gc message failed"; "region_id" => region_id, + "to_peer_id" => ?from_peer.get_id(), + "to_peer_store_id" => ?from_peer.get_store_id(), ); } } @@ -2385,6 +2389,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.ctx.engines.clone(), region_id, target.clone(), + msg.get_from_peer().clone(), )?; // WARNING: The checking code must be above this line. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index da67784e7347..90676411bfca 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -708,6 +708,8 @@ where pub peer_heartbeats: HashMap, /// Record the waiting data status of each follower or learner peer. pub wait_data_peers: Vec, + /// This peer is created by a raft message from `create_by_peer`. + create_by_peer: Option, proposals: ProposalQueue>, leader_missing_time: Option, @@ -904,6 +906,7 @@ where region: &metapb::Region, peer: metapb::Peer, wait_data: bool, + create_by_peer: Option, ) -> Result> { let peer_id = peer.get_id(); if peer_id == raft::INVALID_ID { @@ -958,6 +961,7 @@ where peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), wait_data_peers: Vec::default(), + create_by_peer, peers_start_pending_time: vec![], down_peer_ids: vec![], split_check_trigger: SplitCheckTrigger::default(), @@ -5436,9 +5440,16 @@ where &mut self, ctx: &mut PollContext, ) { - if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() { + if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() + || self.region().get_region_epoch().get_conf_ver() == 0 + { self.check_stale_conf_ver = self.region().get_region_epoch().get_conf_ver(); self.check_stale_peers = self.region().get_peers().to_vec(); + if let Some(create_by_peer) = self.create_by_peer.as_ref() { + // Push create_by_peer in case the peer is removed before + // initialization which has no peer in region. + self.check_stale_peers.push(create_by_peer.clone()); + } } for peer in &self.check_stale_peers { if peer.get_id() == self.peer_id() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 606576b22e49..798102896693 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1567,8 +1567,14 @@ where } } Ok(None) => { - // splitted Region has not yet reported to PD. - // TODO: handle merge + // Splitted region has not yet reported to PD. + // + // Or region has been merged. This case is handled by + // message `MsgCheckStalePeer`, stale peers will be + // removed eventually. + PD_VALIDATE_PEER_COUNTER_VEC + .with_label_values(&["region not found"]) + .inc(); } Err(e) => { error!("get region failed"; "err" => ?e); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 8d93d2c5a5c2..7d964c033192 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -407,6 +407,63 @@ fn test_node_check_merged_message() { must_get_none(&engine3, b"v5"); } +/// Test if an uninitialized stale peer will be handled properly after merge. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_gc_uninitialized_peer_after_merge() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::millis(150); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(left.get_id(), new_peer(4, 4)); + cluster.must_region_exist(left.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(left.get_id(), new_peer(4, 4)); + pd_client.must_merge(left.get_id(), right.get_id()); + cluster.clear_send_filters(); + + // Wait for the peer (4,4) to be destroyed. + sleep_ms( + 2 * cluster + .cfg + .raft_store + .max_leader_missing_duration + .as_millis(), + ); + cluster.must_region_not_exist(left.get_id(), 4); +} + // Test if a merge handled properly when there is a unfinished slow split before // merge. // No v2, it requires all peers to be available to check trim status. diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index e12584d6c600..f76373756f92 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -6,8 +6,10 @@ use std::{sync::Arc, thread, time::*}; use engine_traits::{Peekable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RegionLocalState}; +use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, HandyRwLock}; /// A helper function for testing the behaviour of the gc of stale peer @@ -310,3 +312,48 @@ fn test_stale_learner_with_read_index() { let state: RegionLocalState = engine3.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); } + +/// Test if an uninitialized stale peer will be removed after restart. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_restart_gc_uninitialized_peer_after_merge() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::millis(150); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(region.get_id(), new_peer(4, 4)); + cluster.must_region_exist(region.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(region.get_id(), new_peer(4, 4)); + + // An uninitialized stale peer is removed automatically after restart. + cluster.stop_node(4); + cluster.run_node(4).unwrap(); + cluster.must_region_not_exist(region.get_id(), 4); +} From 1819bb8f077aba4f4a28eb0aa89d43fcd3f873d7 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 9 Nov 2023 15:33:43 +0800 Subject: [PATCH 130/203] resource_control: add quota limiter per priority (#15918) ref tikv/tikv#15917 Signed-off-by: glorv --- components/backup/src/endpoint.rs | 2 +- components/resource_control/src/future.rs | 25 +- .../resource_control/src/resource_group.rs | 256 +++++++++++++++++- components/resource_control/src/service.rs | 7 +- components/resource_control/src/worker.rs | 20 +- src/coprocessor/endpoint.rs | 8 + src/import/sst_service.rs | 4 +- src/storage/mod.rs | 18 ++ src/storage/txn/scheduler.rs | 4 + 9 files changed, 319 insertions(+), 25 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index cc4d0bf0e289..5c243a1e8d8f 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -927,7 +927,7 @@ impl Endpoint { let sst_max_size = self.config_manager.0.read().unwrap().sst_max_size.0; let limit = self.softlimit.limit(); let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { - r.get_resource_limiter(&request.resource_group_name, &request.source_tag) + r.get_background_resource_limiter(&request.resource_group_name, &request.source_tag) }); self.pool.borrow_mut().spawn(async move { diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index fd98fc9a092f..a935c3b41fa9 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -16,7 +16,7 @@ use tokio_timer::Delay; use crate::{ resource_group::{ResourceConsumeType, ResourceController}, - resource_limiter::ResourceLimiter, + resource_limiter::{ResourceLimiter, ResourceType}, }; const MAX_WAIT_DURATION: Duration = Duration::from_secs(10); @@ -125,13 +125,24 @@ impl Future for LimitedFuture { if this.res.is_ready() { return std::mem::replace(this.res, Poll::Pending); } - let last_io_bytes = match get_thread_io_bytes_stats() { - Ok(b) => Some(b), - Err(e) => { - warn!("load thread io bytes failed"; "err" => e); - None + // get io stats is very expensive, so we only do so if only io control is + // enabled. + let mut last_io_bytes = None; + if this + .resource_limiter + .get_limiter(ResourceType::Io) + .get_rate_limit() + .is_finite() + { + match get_thread_io_bytes_stats() { + Ok(b) => { + last_io_bytes = Some(b); + } + Err(e) => { + warn!("load thread io bytes failed"; "err" => e); + } } - }; + } let start = Instant::now(); let res = this.f.poll(cx); let dur = start.saturating_elapsed(); diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index a4b30e3d4adf..b7e7ca28705e 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -22,6 +22,7 @@ use kvproto::{ resource_manager::{GroupMode, ResourceGroup as PbResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; +use strum::{EnumCount, EnumIter, IntoEnumIterator}; use tikv_util::{info, time::Instant}; use yatp::queue::priority::TaskPriorityProvider; @@ -56,21 +57,70 @@ pub enum ResourceConsumeType { IoBytes(u64), } +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter)] +#[repr(usize)] +pub enum TaskPriority { + High = 0, + Medium = 1, + Low = 2, +} + +impl TaskPriority { + pub fn as_str(&self) -> &'static str { + match *self { + TaskPriority::High => "high", + TaskPriority::Medium => "medium", + TaskPriority::Low => "low", + } + } +} + +impl From for TaskPriority { + fn from(value: u32) -> Self { + // map the resource group priority value (1,8,16) to (Low,Medium,High) + if value < 6 { + Self::Low + } else if value < 11 { + Self::Medium + } else { + Self::High + } + } +} + /// ResourceGroupManager manages the metadata of each resource group. pub struct ResourceGroupManager { pub(crate) resource_groups: DashMap, + // the count of all groups, a fast path because call `DashMap::len` is a little slower. + group_count: AtomicU64, registry: RwLock>>, // auto incremental version generator used for mark the background // resource limiter has changed. version_generator: AtomicU64, + // the shared resource limiter of each priority + priority_limiters: [Arc; TaskPriority::COUNT], } impl Default for ResourceGroupManager { fn default() -> Self { + let priority_limiters = TaskPriority::iter() + .map(|p| { + Arc::new(ResourceLimiter::new( + p.as_str().to_owned(), + f64::INFINITY, + f64::INFINITY, + 0, + )) + }) + .collect::>() + .try_into() + .unwrap(); let manager = Self { resource_groups: Default::default(), + group_count: AtomicU64::new(0), registry: Default::default(), version_generator: AtomicU64::new(0), + priority_limiters, }; // init the default resource group by default. @@ -90,6 +140,11 @@ impl Default for ResourceGroupManager { } impl ResourceGroupManager { + #[inline] + pub fn get_group_count(&self) -> u64 { + self.group_count.load(Ordering::Relaxed) + } + fn get_ru_setting(rg: &PbResourceGroup, is_read: bool) -> u64 { match (rg.get_mode(), is_read) { // RU mode, read and write use the same setting. @@ -129,8 +184,13 @@ impl ResourceGroupManager { .and_then(|g| g.limiter.clone()); let limiter = self.build_resource_limiter(&rg, prev_limiter); - self.resource_groups - .insert(group_name, ResourceGroup::new(rg, limiter)); + if self + .resource_groups + .insert(group_name, ResourceGroup::new(rg, limiter)) + .is_none() + { + self.group_count.fetch_add(1, Ordering::Relaxed); + } } fn build_resource_limiter( @@ -161,6 +221,7 @@ impl ResourceGroupManager { if self.resource_groups.remove(&group_name).is_some() { deregister_metrics(name); info!("remove resource group"; "name"=> name); + self.group_count.fetch_sub(1, Ordering::Relaxed); } } @@ -184,6 +245,8 @@ impl ResourceGroupManager { controller.remove_resource_group(name.as_bytes()); } }); + self.group_count + .fetch_sub(removed_names.len() as u64, Ordering::Relaxed); } } @@ -234,24 +297,79 @@ impl ResourceGroupManager { } } + // only enable priority quota limiter when there is at least 1 user-defined + // resource group. + #[inline] + fn enable_priority_limiter(&self) -> bool { + self.get_group_count() > 1 + } + + // Always return the background resource limiter if any; + // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( &self, rg: &str, request_source: &str, + override_priority: u64, + ) -> Option> { + let (limiter, group_priority) = + self.get_background_resource_limiter_with_priority(rg, request_source); + if limiter.is_some() { + return limiter; + } + + // if there is only 1 resource group, priority quota limiter is useless so just + // return None for better performance. + if !self.enable_priority_limiter() { + return None; + } + + // request priority has higher priority, 0 means priority is not set. + let mut task_priority = override_priority as u32; + if task_priority == 0 { + task_priority = group_priority; + } + Some(self.priority_limiters[TaskPriority::from(task_priority) as usize].clone()) + } + + // return a ResourceLimiter for background tasks only. + pub fn get_background_resource_limiter( + &self, + rg: &str, + request_source: &str, ) -> Option> { + self.get_background_resource_limiter_with_priority(rg, request_source) + .0 + } + + fn get_background_resource_limiter_with_priority( + &self, + rg: &str, + request_source: &str, + ) -> (Option>, u32) { fail_point!("only_check_source_task_name", |name| { assert_eq!(&name.unwrap(), request_source); - None + (None, 8) }); + let mut group_priority = None; if let Some(group) = self.resource_groups.get(rg) { + group_priority = Some(group.group.priority); if !group.fallback_default { - return group.get_resource_limiter(request_source); + return ( + group.get_background_resource_limiter(request_source), + group.group.priority, + ); } } - self.resource_groups + let default_group = self + .resource_groups .get(DEFAULT_RESOURCE_GROUP_NAME) - .and_then(|g| g.get_resource_limiter(request_source)) + .unwrap(); + ( + default_group.get_background_resource_limiter(request_source), + group_priority.unwrap_or(default_group.group.priority), + ) } } @@ -286,7 +404,10 @@ impl ResourceGroup { .get_fill_rate() } - fn get_resource_limiter(&self, request_source: &str) -> Option> { + fn get_background_resource_limiter( + &self, + request_source: &str, + ) -> Option> { self.limiter.as_ref().and_then(|limiter| { // the source task name is the last part of `request_source` separated by "_" // the request_source is @@ -871,6 +992,35 @@ pub(crate) mod tests { ); } + #[test] + fn test_resource_group_crud() { + let resource_manager = ResourceGroupManager::default(); + assert_eq!(resource_manager.get_group_count(), 1); + + let group1 = new_resource_group_ru("test1".into(), 100, HIGH_PRIORITY); + resource_manager.add_resource_group(group1); + assert_eq!(resource_manager.get_group_count(), 2); + + let group2 = new_resource_group_ru("test2".into(), 200, LOW_PRIORITY); + resource_manager.add_resource_group(group2); + assert_eq!(resource_manager.get_group_count(), 3); + + let group1 = new_resource_group_ru("test1".into(), 150, HIGH_PRIORITY); + resource_manager.add_resource_group(group1.clone()); + assert_eq!(resource_manager.get_group_count(), 3); + assert_eq!( + resource_manager.get_resource_group("test1").unwrap().group, + group1 + ); + + resource_manager.remove_resource_group("test2"); + assert!(resource_manager.get_resource_group("test2").is_none()); + assert_eq!(resource_manager.get_group_count(), 2); + + resource_manager.remove_resource_group("test2"); + assert_eq!(resource_manager.get_group_count(), 2); + } + #[test] fn test_resource_group_priority() { let resource_manager = ResourceGroupManager::default(); @@ -1165,4 +1315,96 @@ pub(crate) mod tests { assert_eq!(metadata1.group_name(), group_name.as_bytes()); } } + + #[test] + fn test_get_resource_limiter() { + let mgr = ResourceGroupManager::default(); + + let default_group = new_background_resource_group_ru( + "default".into(), + 200, + MEDIUM_PRIORITY, + vec!["br".into(), "stats".into()], + ); + mgr.add_resource_group(default_group); + let default_limiter = mgr + .get_resource_group("default") + .unwrap() + .limiter + .clone() + .unwrap(); + + assert!(mgr.get_resource_limiter("default", "query", 0).is_none()); + assert!( + mgr.get_resource_limiter("default", "query", HIGH_PRIORITY as u64) + .is_none() + ); + + let group1 = new_resource_group("test1".into(), true, 100, 100, HIGH_PRIORITY); + mgr.add_resource_group(group1); + + let bg_group = new_background_resource_group_ru( + "bg".into(), + 50, + LOW_PRIORITY, + vec!["ddl".into(), "stats".into()], + ); + mgr.add_resource_group(bg_group); + let bg_limiter = mgr + .get_resource_group("bg") + .unwrap() + .limiter + .clone() + .unwrap(); + + assert!( + mgr.get_background_resource_limiter("test1", "ddl") + .is_none() + ); + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("test1", "stats") + .unwrap(), + &default_limiter + )); + + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("bg", "stats").unwrap(), + &bg_limiter + )); + assert!(mgr.get_background_resource_limiter("bg", "br").is_none()); + assert!( + mgr.get_background_resource_limiter("bg", "invalid") + .is_none() + ); + + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("unknown", "stats") + .unwrap(), + &default_limiter + )); + + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "stats", 0).unwrap(), + &default_limiter + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "query", 0).unwrap(), + &mgr.priority_limiters[0] + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "query", LOW_PRIORITY as u64) + .unwrap(), + &mgr.priority_limiters[2] + )); + + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("default", "query", LOW_PRIORITY as u64) + .unwrap(), + &mgr.priority_limiters[2] + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("unknown", "query", 0).unwrap(), + &mgr.priority_limiters[1] + )); + } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 5ecac9d74c49..2c2bbdc55490 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -565,7 +565,10 @@ pub mod tests { s_clone.report_ru_metrics().await; }); // Mock consume. - let bg_limiter = s.manager.get_resource_limiter("background", "br").unwrap(); + let bg_limiter = s + .manager + .get_background_resource_limiter("background", "br") + .unwrap(); bg_limiter.consume( Duration::from_secs(2), IoBytes { @@ -584,7 +587,7 @@ pub mod tests { s.manager.add_resource_group(background_group); let new_bg_limiter = s .manager - .get_resource_limiter("background", "lightning") + .get_background_resource_limiter("background", "lightning") .unwrap(); new_bg_limiter.consume( Duration::from_secs(5), diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index deb1b2e44dea..7bc76691e1ff 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -340,7 +340,11 @@ mod tests { let resource_ctl = Arc::new(ResourceGroupManager::default()); let rg1 = new_resource_group_ru("test".into(), 1000, 14); resource_ctl.add_resource_group(rg1); - assert!(resource_ctl.get_resource_limiter("test", "br").is_none()); + assert!( + resource_ctl + .get_background_resource_limiter("test", "br") + .is_none() + ); let test_provider = TestResourceStatsProvider::new(8.0, 10000.0); let mut worker = @@ -351,10 +355,12 @@ mod tests { resource_ctl.add_resource_group(default_bg); assert!( resource_ctl - .get_resource_limiter("default", "lightning") + .get_background_resource_limiter("default", "lightning") .is_none() ); - let limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); + let limiter = resource_ctl + .get_background_resource_limiter("default", "br") + .unwrap(); assert!( limiter .get_limiter(ResourceType::Cpu) @@ -513,13 +519,15 @@ mod tests { let default = new_background_resource_group_ru("default".into(), 2000, 8, vec!["br".into()]); resource_ctl.add_resource_group(default); - let new_limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); + let new_limiter = resource_ctl + .get_background_resource_limiter("default", "br") + .unwrap(); assert_eq!(&*new_limiter as *const _, &*limiter as *const _); let bg = new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); resource_ctl.add_resource_group(bg); let bg_limiter = resource_ctl - .get_resource_limiter("background", "br") + .get_background_resource_limiter("background", "br") .unwrap(); reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); @@ -581,7 +589,7 @@ mod tests { new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); resource_ctl.add_resource_group(new_bg); let new_bg_limiter = resource_ctl - .get_resource_limiter("background", "br") + .get_background_resource_limiter("background", "br") .unwrap(); assert_ne!(&*bg_limiter as *const _, &*new_bg_limiter as *const _); assert!( diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 01cb571e8aa7..005a18938de1 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -511,6 +511,10 @@ impl Endpoint { .get_resource_control_context() .get_resource_group_name(), req_ctx.context.get_request_source(), + req_ctx + .context + .get_resource_control_context() + .get_override_priority(), ) }); // box the tracker so that moving it is cheap. @@ -757,6 +761,10 @@ impl Endpoint { .get_resource_control_context() .get_resource_group_name(), req_ctx.context.get_request_source(), + req_ctx + .context + .get_resource_control_context() + .get_override_priority(), ) }); let key_ranges = req_ctx diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 92e73ca9f8f1..2dc4f76b1944 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -751,7 +751,7 @@ macro_rules! impl_write { let (meta, resource_limiter) = match first_req { Some(r) => { let limiter = resource_manager.as_ref().and_then(|m| { - m.get_resource_limiter( + m.get_background_resource_limiter( r.get_context() .get_resource_control_context() .get_resource_group_name(), @@ -1060,7 +1060,7 @@ impl ImportSst for ImportSstService { let tablets = self.tablets.clone(); let start = Instant::now(); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { - r.get_resource_limiter( + r.get_background_resource_limiter( req.get_context() .get_resource_control_context() .get_resource_group_name(), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cc48d9e36e3b..c0d6e6fc4a3b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -609,6 +609,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -782,6 +783,10 @@ impl Storage { .get_resource_control_context() .get_resource_group_name(), requests[0].get_context().get_request_source(), + requests[0] + .get_context() + .get_resource_control_context() + .get_override_priority(), ) }); let concurrency_manager = self.concurrency_manager.clone(); @@ -978,6 +983,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1170,6 +1176,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1346,6 +1353,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1662,6 +1670,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1754,6 +1763,10 @@ impl Storage { .get_resource_control_context() .get_resource_group_name(), gets[0].get_context().get_request_source(), + gets[0] + .get_context() + .get_resource_control_context() + .get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1893,6 +1906,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2399,6 +2413,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2536,6 +2551,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2698,6 +2714,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2879,6 +2896,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 36492f227011..995c361e163c 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1236,6 +1236,10 @@ impl TxnScheduler { .get_resource_control_context() .get_resource_group_name(), task.cmd.ctx().get_request_source(), + task.cmd + .ctx() + .get_resource_control_context() + .get_override_priority(), ) }); let mut sample = quota_limiter.new_sample(true); From 7be1b17d72870bb95fcb7443b1f945f9c59657fa Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 9 Nov 2023 18:26:12 +0800 Subject: [PATCH 131/203] *: make unified-pool use FuturePool (#15925) close tikv/tikv#15924 *: make unified-pool use FuturePool Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/worker/pool.rs | 34 ++--- .../tikv_util/src/yatp_pool/future_pool.rs | 23 +++- components/tikv_util/src/yatp_pool/mod.rs | 15 ++- src/read_pool.rs | 127 ++++++------------ 4 files changed, 86 insertions(+), 113 deletions(-) diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index c3919e426197..9ef827b007a5 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -7,7 +7,7 @@ use std::{ future::Future, sync::{ atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, Mutex, + Arc, }, time::{Duration, Instant}, }; @@ -20,13 +20,13 @@ use futures::{ stream::StreamExt, }; use prometheus::IntGauge; -use yatp::{Remote, ThreadPool}; +use yatp::Remote; use super::metrics::*; use crate::{ future::{block_on_timeout, poll_future_notify}, timer::GLOBAL_TIMER_HANDLE, - yatp_pool::{DefaultTicker, YatpPoolBuilder}, + yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, }; #[derive(PartialEq)] @@ -222,7 +222,7 @@ impl LazyWorker { } pub fn remote(&self) -> Remote { - self.worker.remote.clone() + self.worker.remote() } } @@ -301,11 +301,8 @@ impl> Builder { let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(self.name) .thread_count(self.thread_count, self.thread_count, self.thread_count) - .build_single_level_pool(); - let remote = pool.remote().clone(); - let pool = Arc::new(Mutex::new(Some(pool))); + .build_future_pool(); Worker { - remote, stop: Arc::new(AtomicBool::new(false)), pool, counter: Arc::new(AtomicUsize::new(0)), @@ -318,8 +315,7 @@ impl> Builder { /// A worker that can schedule time consuming tasks. #[derive(Clone)] pub struct Worker { - pool: Arc>>>, - remote: Remote, + pool: FuturePool, pending_capacity: usize, counter: Arc, stop: Arc, @@ -371,7 +367,7 @@ impl Worker { .interval(std::time::Instant::now(), interval) .compat(); let stop = self.stop.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { while !stop.load(Ordering::Relaxed) && let Some(Ok(_)) = interval.next().await { @@ -389,7 +385,7 @@ impl Worker { .interval(std::time::Instant::now(), interval) .compat(); let stop = self.stop.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { while !stop.load(Ordering::Relaxed) && let Some(Ok(_)) = interval.next().await { @@ -403,7 +399,7 @@ impl Worker { where F: Future + Send + 'static, { - self.remote.spawn(f); + let _ = self.pool.spawn(f); } fn delay_notify(tx: UnboundedSender>, timeout: Duration) { @@ -438,10 +434,8 @@ impl Worker { /// Stops the worker thread. pub fn stop(&self) { - if let Some(pool) = self.pool.lock().unwrap().take() { - self.stop.store(true, Ordering::Release); - pool.shutdown(); - } + self.stop.store(true, Ordering::Release); + self.pool.shutdown(); } /// Checks if underlying worker can't handle task immediately. @@ -451,7 +445,7 @@ impl Worker { } pub fn remote(&self) -> Remote { - self.remote.clone() + self.pool.remote().clone() } fn start_impl( @@ -461,7 +455,7 @@ impl Worker { metrics_pending_task_count: IntGauge, ) { let counter = self.counter.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { let mut handle = RunnableWrapper { inner: runner }; while let Some(msg) = receiver.next().await { match msg { @@ -488,7 +482,7 @@ impl Worker { let counter = self.counter.clone(); let timeout = runner.get_interval(); Self::delay_notify(tx.clone(), timeout); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { let mut handle = RunnableWrapper { inner: runner }; while let Some(msg) = receiver.next().await { match msg { diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 827ffbbdce2e..c6a34b2673be 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -56,7 +56,7 @@ impl FuturePool { pool, env, pool_size: AtomicUsize::new(pool_size), - max_tasks, + max_tasks: AtomicUsize::new(max_tasks), }), } } @@ -119,6 +119,11 @@ impl FuturePool { pub fn shutdown(&self) { self.inner.pool.shutdown(); } + + // Get a remote queue for spawning tasks without owning the thread pool. + pub fn remote(&self) -> &yatp::Remote { + self.inner.pool.remote() + } } struct PoolInner { @@ -126,13 +131,20 @@ struct PoolInner { env: Env, // for accessing pool_size config since yatp doesn't offer such getter. pool_size: AtomicUsize, - max_tasks: usize, + max_tasks: AtomicUsize, } impl PoolInner { #[inline] fn scale_pool_size(&self, thread_count: usize) { self.pool.scale_workers(thread_count); + let mut max_tasks = self.max_tasks.load(Ordering::Acquire); + if max_tasks != std::usize::MAX { + max_tasks = max_tasks + .saturating_div(self.pool_size.load(Ordering::Acquire)) + .saturating_mul(thread_count); + self.max_tasks.store(max_tasks, Ordering::Release); + } self.pool_size.store(thread_count, Ordering::Release); } @@ -148,15 +160,16 @@ impl PoolInner { max_tasks: 100, })); - if self.max_tasks == std::usize::MAX { + let max_tasks = self.max_tasks.load(Ordering::Acquire); + if max_tasks == std::usize::MAX { return Ok(()); } let current_tasks = self.get_running_task_count(); - if current_tasks >= self.max_tasks { + if current_tasks >= max_tasks { Err(Full { current_tasks, - max_tasks: self.max_tasks, + max_tasks, }) } else { Ok(()) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index fc80e69cd843..2752f3f3c513 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -369,7 +369,7 @@ impl YatpPoolBuilder { FuturePool::from_pool(pool, &name, size, task) } - pub fn build_single_level_pool(self) -> ThreadPool { + fn build_single_level_pool(self) -> ThreadPool { let (builder, runner) = self.create_builder(); builder.build_with_queue_and_runner( yatp::queue::QueueType::SingleLevel, @@ -377,7 +377,18 @@ impl YatpPoolBuilder { ) } - pub fn build_multi_level_pool(self) -> ThreadPool { + pub fn build_multi_level_future_pool(self) -> FuturePool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); + let size = self.core_thread_count; + let task = self.max_tasks; + let pool = self.build_multi_level_pool(); + FuturePool::from_pool(pool, &name, size, task) + } + + fn build_multi_level_pool(self) -> ThreadPool { let name = self .name_prefix .clone() diff --git a/src/read_pool.rs b/src/read_pool.rs index 7821f2f946e0..8f5a459c5bcc 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -27,9 +27,7 @@ use tikv_util::{ yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; use tracker::TrackedFuture; -use yatp::{ - metrics::MULTILEVEL_LEVEL_ELAPSED, pool::Remote, queue::Extras, task::future::TaskCell, -}; +use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; use self::metrics::*; use crate::{ @@ -55,11 +53,9 @@ pub enum ReadPool { read_pool_low: FuturePool, }, Yatp { - pool: yatp::ThreadPool, + pool: FuturePool, + // deprecated. will remove in the v8.x. running_tasks: IntGauge, - running_threads: IntGauge, - max_tasks: usize, - pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -80,17 +76,11 @@ impl ReadPool { ReadPool::Yatp { pool, running_tasks, - running_threads, - max_tasks, - pool_size, resource_ctl, time_slice_inspector, } => ReadPoolHandle::Yatp { - remote: pool.remote().clone(), + remote: pool.clone(), running_tasks: running_tasks.clone(), - running_threads: running_threads.clone(), - max_tasks: *max_tasks, - pool_size: *pool_size, resource_ctl: resource_ctl.clone(), time_slice_inspector: time_slice_inspector.clone(), }, @@ -106,11 +96,8 @@ pub enum ReadPoolHandle { read_pool_low: FuturePool, }, Yatp { - remote: Remote, + remote: FuturePool, running_tasks: IntGauge, - running_threads: IntGauge, - max_tasks: usize, - pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -145,19 +132,10 @@ impl ReadPoolHandle { ReadPoolHandle::Yatp { remote, running_tasks, - max_tasks, resource_ctl, .. } => { let running_tasks = running_tasks.clone(); - // Note that the running task number limit is not strict. - // If several tasks are spawned at the same time while the running task number - // is close to the limit, they may all pass this check and the number of running - // tasks may exceed the limit. - if running_tasks.get() as usize >= *max_tasks { - return Err(ReadPoolError::UnifiedReadPoolFull); - } - running_tasks.inc(); let fixed_level = match priority { CommandPri::High => Some(0), @@ -167,31 +145,26 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); - let task_cell = if let Some(resource_ctl) = resource_ctl { - TaskCell::new( - TrackedFuture::new(with_resource_limiter( - ControlledFuture::new( - async move { - f.await; - running_tasks.dec(); - }, - resource_ctl.clone(), - group_name, - ), - resource_limiter, - )), - extras, - ) + if let Some(resource_ctl) = resource_ctl { + let fut = TrackedFuture::new(with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_name, + ), + resource_limiter, + )); + remote.spawn_with_extras(fut, extras)?; } else { - TaskCell::new( - TrackedFuture::new(async move { - f.await; - running_tasks.dec(); - }), - extras, - ) - }; - remote.spawn(task_cell); + let fut = async move { + f.await; + running_tasks.dec(); + }; + remote.spawn_with_extras(fut, extras)?; + } } } Ok(()) @@ -231,7 +204,7 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { read_pool_normal, .. } => read_pool_normal.get_pool_size(), - ReadPoolHandle::Yatp { pool_size, .. } => *pool_size, + ReadPoolHandle::Yatp { remote, .. } => remote.get_pool_size(), } } @@ -241,10 +214,10 @@ impl ReadPoolHandle { read_pool_normal, .. } => read_pool_normal.get_running_task_count() / read_pool_normal.get_pool_size(), ReadPoolHandle::Yatp { + remote, running_tasks, - pool_size, .. - } => running_tasks.get() as usize / *pool_size, + } => running_tasks.get() as usize / remote.get_pool_size(), } } @@ -253,19 +226,8 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { - remote, - running_threads, - max_tasks, - pool_size, - .. - } => { - remote.scale_workers(max_thread_count); - *max_tasks = max_tasks - .saturating_div(*pool_size) - .saturating_mul(max_thread_count); - running_threads.set(max_thread_count as i64); - *pool_size = max_thread_count; + ReadPoolHandle::Yatp { remote, .. } => { + remote.scale_pool_size(max_thread_count); } } } @@ -469,6 +431,11 @@ pub fn build_yatp_read_pool_with_name( config.max_thread_count, ), ) + .max_tasks( + config + .max_tasks_per_worker + .saturating_mul(config.max_thread_count), + ) .after_start(move || { let engine = raftkv.lock().unwrap().clone(); set_tls_engine(engine); @@ -478,21 +445,15 @@ pub fn build_yatp_read_pool_with_name( destroy_tls_engine::(); }); let pool = if let Some(ref r) = resource_ctl { - builder.build_priority_pool(r.clone()) + builder.build_priority_future_pool(r.clone()) } else { - builder.build_multi_level_pool() + builder.build_multi_level_future_pool() }; let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS .with_label_values(&[&unified_read_pool_name]), - running_threads: UNIFIED_READ_POOL_RUNNING_THREADS - .with_label_values(&[&unified_read_pool_name]), - max_tasks: config - .max_tasks_per_worker - .saturating_mul(config.max_thread_count), - pool_size: config.max_thread_count, resource_ctl, time_slice_inspector, } @@ -761,12 +722,6 @@ mod metrics { &["name"] ) .unwrap(); - pub static ref UNIFIED_READ_POOL_RUNNING_THREADS: IntGaugeVec = register_int_gauge_vec!( - "tikv_unified_read_pool_thread_count", - "The number of running threads in the unified read pool", - &["name"] - ) - .unwrap(); } } @@ -826,7 +781,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); @@ -875,7 +830,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } @@ -888,7 +843,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } } @@ -931,7 +886,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } @@ -948,7 +903,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } } From 58aed5779d75900f5f1d4a429de93a1149da647c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 9 Nov 2023 23:18:42 +0800 Subject: [PATCH 132/203] alert: tackle the false-postive case where alerting `gc not work`. (#15948) close tikv/tikv#15796 Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- metrics/alertmanager/tikv.rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index bc092562773a..aa8530df45fe 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -15,7 +15,7 @@ groups: - alert: TiKV_GC_can_not_work expr: sum(increase(tikv_gcworker_gc_tasks_vec{task="gc"}[1d])) < 1 and (sum(increase(tikv_gc_compaction_filter_perform[1d])) < 1 and sum(increase(tikv_engine_event_total{db="kv", cf="write", type="compaction"}[1d])) >= 1) - for: 1m + for: 5m labels: env: ENV_LABELS_ENV level: emergency From 157e09d1b109a79e3353ca6d32bef1d4ef80dca7 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 10 Nov 2023 11:24:42 +0800 Subject: [PATCH 133/203] server: Introduce heap profiling config (#15883) close tikv/tikv#15958 introduce heap profiling config Signed-off-by: Connor1996 Co-authored-by: tonyxuqqi --- cmd/tikv-server/src/main.rs | 3 + components/raftstore/src/store/config.rs | 23 ++- components/server/src/server.rs | 5 +- components/server/src/server2.rs | 3 +- components/server/src/setup.rs | 2 - components/tikv_alloc/src/default.rs | 9 ++ components/tikv_alloc/src/jemalloc.rs | 69 +++++++-- etc/config-template.toml | 12 ++ src/config/mod.rs | 142 ++++++++++++++++-- src/server/config.rs | 13 +- src/server/status_server/mod.rs | 1 + src/server/status_server/profile.rs | 12 +- tests/integrations/config/mod.rs | 17 +-- .../config/test-cache-compatible.toml | 2 + tests/integrations/config/test-custom.toml | 8 +- tests/integrations/config/test-default.toml | 2 + 16 files changed, 256 insertions(+), 67 deletions(-) diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 9fdcad81c58e..3f4372c32cc1 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -217,6 +217,9 @@ fn main() { process::exit(1) } + // Init memory related settings. + config.memory.init(); + let (service_event_tx, service_event_rx) = tikv_util::mpsc::unbounded(); // pipe for controling service match config.storage.engine { EngineType::RaftKv => server::server::run_tikv(config, service_event_tx, service_event_rx), diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 3d1b58a6e751..facaa1514d84 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -104,12 +104,11 @@ pub struct Config { pub max_manual_flush_rate: f64, // When a peer is not responding for this time, leader will not keep entry cache for it. pub raft_entry_cache_life_time: ReadableDuration, - // Deprecated! The configuration has no effect. - // They are preserved for compatibility check. // When a peer is newly added, reject transferring leader to the peer for a while. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. It has no effect"] pub raft_reject_transfer_leader_duration: ReadableDuration, /// Whether to disable checking quorum for the raft group. This will make @@ -329,27 +328,26 @@ pub struct Config { pub io_reschedule_concurrent_max_count: usize, pub io_reschedule_hotpot_duration: ReadableDuration, - // Deprecated! Batch is done in raft client. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. Batch is done in raft client."] pub raft_msg_flush_interval: ReadableDuration, - // Deprecated! These configuration has been moved to Coprocessor. - // They are preserved for compatibility check. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to coprocessor.region_max_size."] pub region_max_size: ReadableSize, #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to coprocessor.region_split_size."] pub region_split_size: ReadableSize, - // Deprecated! The time to clean stale peer safely can be decided based on RocksDB snapshot - // sequence number. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. The time to clean stale peer safely can be decided based on RocksDB snapshot sequence number."] pub clean_stale_peer_delay: ReadableDuration, // Interval to inspect the latency of raftstore for slow store detection. @@ -409,6 +407,7 @@ pub struct Config { } impl Default for Config { + #[allow(deprecated)] fn default() -> Config { Config { prevote: true, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a4b6276a5878..006750fd518a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -75,7 +75,9 @@ use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; use tikv::{ - config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, + config::{ + ConfigController, DbConfigManger, DbType, LogConfigManager, MemoryConfigManager, TikvConfig, + }, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -506,6 +508,7 @@ where ); cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + cfg_controller.register(tikv::config::Module::Memory, Box::new(MemoryConfigManager)); // Create cdc. let mut cdc_worker = Box::new(LazyWorker::new("cdc")); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 65d02f58c088..fdbb18b62054 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -73,7 +73,7 @@ use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ config::{ loop_registry, ConfigController, ConfigurableDb, DbConfigManger, DbType, LogConfigManager, - TikvConfig, + MemoryConfigManager, TikvConfig, }, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, @@ -441,6 +441,7 @@ where ); cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + cfg_controller.register(tikv::config::Module::Memory, Box::new(MemoryConfigManager)); let lock_mgr = LockManager::new(&self.core.config.pessimistic_txn); cfg_controller.register( diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index b758b9e39df9..b11ffbc45b60 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -245,12 +245,10 @@ pub fn initial_metric(cfg: &MetricConfig) { pub fn overwrite_config_with_cmd_args(config: &mut TikvConfig, matches: &ArgMatches<'_>) { if let Some(level) = matches.value_of("log-level") { config.log.level = logger::get_level_by_string(level).unwrap().into(); - config.log_level = slog::Level::Info.into(); } if let Some(file) = matches.value_of("log-file") { config.log.file.filename = file.to_owned(); - config.log_file = "".to_owned(); } if let Some(addr) = matches.value_of("addr") { diff --git a/components/tikv_alloc/src/default.rs b/components/tikv_alloc/src/default.rs index 2674331c3cd9..5133d76e1728 100644 --- a/components/tikv_alloc/src/default.rs +++ b/components/tikv_alloc/src/default.rs @@ -8,6 +8,7 @@ use crate::AllocStats; pub fn dump_stats() -> String { String::new() } + pub fn dump_prof(_path: &str) -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } @@ -24,6 +25,14 @@ pub fn deactivate_prof() -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } +pub fn set_prof_sample(_rate: u64) -> ProfResult<()> { + Err(ProfError::MemProfilingNotEnabled) +} + +pub fn is_profiling_active() -> bool { + false +} + /// # Safety /// /// It is safe. The unsafe marker is just for matching the function signature. diff --git a/components/tikv_alloc/src/jemalloc.rs b/components/tikv_alloc/src/jemalloc.rs index 876afa9fcd53..245f6280b716 100644 --- a/components/tikv_alloc/src/jemalloc.rs +++ b/components/tikv_alloc/src/jemalloc.rs @@ -133,7 +133,7 @@ pub fn remove_thread_memory_accessor() { use std::thread::ThreadId; -pub use self::profiling::{activate_prof, deactivate_prof, dump_prof}; +pub use self::profiling::*; pub fn dump_stats() -> String { let mut buf = Vec::with_capacity(1024); @@ -311,6 +311,21 @@ mod profiling { // C string should end with a '\0'. const PROF_ACTIVE: &[u8] = b"prof.active\0"; const PROF_DUMP: &[u8] = b"prof.dump\0"; + const PROF_RESET: &[u8] = b"prof.reset\0"; + const OPT_PROF: &[u8] = b"opt.prof\0"; + + pub fn set_prof_sample(rate: u64) -> ProfResult<()> { + let rate = (rate as f64).log2().ceil() as usize; + unsafe { + if let Err(e) = tikv_jemalloc_ctl::raw::write(PROF_RESET, rate) { + return Err(ProfError::JemallocError(format!( + "failed to set prof sample: {}", + e + ))); + } + } + Ok(()) + } pub fn activate_prof() -> ProfResult<()> { unsafe { @@ -351,22 +366,44 @@ mod profiling { Ok(()) } + pub fn is_profiling_active() -> bool { + match unsafe { tikv_jemalloc_ctl::raw::read(PROF_ACTIVE) } { + Err(e) => { + panic!("is_profiling_active: {:?}", e); + } + Ok(prof) => prof, + } + } + + pub fn is_profiling_enabled() -> bool { + match unsafe { tikv_jemalloc_ctl::raw::read(OPT_PROF) } { + Err(e) => { + // Shouldn't be possible since mem-profiling is set + panic!("is_profiling_enabled: {:?}", e); + } + Ok(prof) => prof, + } + } + #[cfg(test)] mod tests { use std::fs; use tempfile::Builder; - const OPT_PROF: &[u8] = b"opt.prof\0"; + use super::*; - fn is_profiling_on() -> bool { - match unsafe { tikv_jemalloc_ctl::raw::read(OPT_PROF) } { - Err(e) => { - // Shouldn't be possible since mem-profiling is set - panic!("is_profiling_on: {:?}", e); - } - Ok(prof) => prof, - } + #[test] + #[ignore = "#ifdef MALLOC_CONF"] + fn test_profiling_active() { + // Make sure somebody has turned on profiling + assert!(is_profiling_enabled(), "set MALLOC_CONF=prof:true"); + activate_prof().unwrap(); + assert!(is_profiling_active()); + deactivate_prof().unwrap(); + assert!(!is_profiling_active()); + + super::set_prof_sample(512 * 1024 * 1024).unwrap(); } // Only trigger this test with jemallocs `opt.prof` set to @@ -382,7 +419,7 @@ mod profiling { #[ignore = "#ifdef MALLOC_CONF"] fn test_profiling_memory_ifdef_malloc_conf() { // Make sure somebody has turned on profiling - assert!(is_profiling_on(), "set MALLOC_CONF=prof:true"); + assert!(is_profiling_enabled(), "set MALLOC_CONF=prof:true"); let dir = Builder::new() .prefix("test_profiling_memory") @@ -391,11 +428,11 @@ mod profiling { let os_path = dir.path().to_path_buf().join("test1.dump").into_os_string(); let path = os_path.into_string().unwrap(); - super::dump_prof(&path).unwrap(); + dump_prof(&path).unwrap(); let os_path = dir.path().to_path_buf().join("test2.dump").into_os_string(); let path = os_path.into_string().unwrap(); - super::dump_prof(&path).unwrap(); + dump_prof(&path).unwrap(); let files = fs::read_dir(dir.path()).unwrap().count(); assert_eq!(files, 2); @@ -431,4 +468,10 @@ mod profiling { pub fn deactivate_prof() -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } + pub fn set_prof_sample(_rate: u64) -> ProfResult<()> { + Err(ProfError::MemProfilingNotEnabled) + } + pub fn is_profiling_active() -> bool { + false + } } diff --git a/etc/config-template.toml b/etc/config-template.toml index 3c8a60159105..3e55004feb29 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -83,6 +83,18 @@ ## maximum number of old log files to retain # max-backups = 0 +[memory] +## Whether enable the heap profiling which may have a bit performance overhead about 2% for the +## default sample rate. +# enable-heap-profiling = true + +## Average interval between allocation samples, as measured in bytes of allocation activity. +## Increasing the sampling interval decreases profile fidelity, but also decreases the +## computational overhead. +## The default sample interval is 512 KB. It only accepts power of two, otherwise it will be +## rounded up to the next power of two. +# profiling-sample-per-bytes = "512KB" + ## Configurations for the single thread pool serving read requests. [readpool.unified] ## The minimal working thread count of the thread pool. diff --git a/src/config/mod.rs b/src/config/mod.rs index 237ac3c7a725..b192a7ac5f75 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -82,6 +82,7 @@ use crate::{ server::{ gc_worker::{GcConfig, RawCompactionFilterFactory, WriteCompactionFilterFactory}, lock_manager::Config as PessimisticTxnConfig, + status_server::HEAP_PROFILE_ACTIVE, ttl::TtlCompactionFilterFactory, Config as ServerConfig, CONFIG_ROCKSDB_GAUGE, }, @@ -1263,10 +1264,10 @@ pub struct DbConfig { #[serde(with = "rocks_config::rate_limiter_mode_serde")] #[online_config(skip)] pub rate_limiter_mode: DBRateLimiterMode, - // deprecated. use rate_limiter_auto_tuned. - #[online_config(skip)] + #[online_config(hidden)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The configuration has been removed. Use `rate_limiter_auto_tuned` instead"] pub auto_tuned: Option, pub rate_limiter_auto_tuned: bool, pub bytes_per_sync: ReadableSize, @@ -1318,6 +1319,7 @@ pub struct DbResources { } impl Default for DbConfig { + #[allow(deprecated)] fn default() -> DbConfig { DbConfig { wal_recovery_mode: DBRecoveryMode::PointInTime, @@ -2965,13 +2967,15 @@ pub struct CdcConfig { pub old_value_cache_memory_quota: ReadableSize, // Deprecated! preserved for compatibility check. - #[online_config(skip)] + #[online_config(hidden)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The configuration has been removed."] pub old_value_cache_size: usize, } impl Default for CdcConfig { + #[allow(deprecated)] fn default() -> Self { Self { min_ts_interval: ReadableDuration::secs(1), @@ -3211,6 +3215,72 @@ impl ConfigManager for LogConfigManager { } } +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct MemoryConfig { + // Whether enable the heap profiling which may have a bit performance overhead about 2% for the + // default sample rate. + pub enable_heap_profiling: bool, + + // Average interval between allocation samples, as measured in bytes of allocation activity. + // Increasing the sampling interval decreases profile fidelity, but also decreases the + // computational overhead. + // The default sample interval is 512 KB. It only accepts power of two, otherwise it will be + // rounded up to the next power of two. + pub profiling_sample_per_bytes: ReadableSize, +} + +impl Default for MemoryConfig { + fn default() -> Self { + Self { + enable_heap_profiling: true, + profiling_sample_per_bytes: ReadableSize::kb(512), + } + } +} + +impl MemoryConfig { + pub fn init(&self) { + if self.enable_heap_profiling { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + if let Err(e) = tikv_alloc::activate_prof() { + error!("failed to enable heap profiling"; "err" => ?e); + return; + } + *activate = Some(None); + tikv_alloc::set_prof_sample(self.profiling_sample_per_bytes.0).unwrap(); + } + } +} + +pub struct MemoryConfigManager; + +impl ConfigManager for MemoryConfigManager { + fn dispatch(&mut self, changes: ConfigChange) -> CfgResult<()> { + if let Some(ConfigValue::Bool(enable)) = changes.get("enable_heap_profiling") { + if *enable { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + // already enabled by HTTP API, do nothing + if activate.is_none() { + tikv_alloc::activate_prof()?; + *activate = Some(None); + } + } else { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + tikv_alloc::deactivate_prof()?; + *activate = None; + } + } + + if let Some(ConfigValue::Size(sample_rate)) = changes.get("profiling_sample_per_bytes") { + tikv_alloc::set_prof_sample(*sample_rate).unwrap(); + } + info!("update memory config"; "config" => ?changes); + Ok(()) + } +} + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -3261,21 +3331,29 @@ pub struct TikvConfig { #[online_config(hidden)] pub cfg_path: String, - // Deprecated! These configuration has been moved to LogConfig. - // They are preserved for compatibility check. #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.level."] pub log_level: LogLevel, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.filename."] pub log_file: String, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.format."] pub log_format: LogFormat, - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.max_days."] pub log_rotation_timespan: ReadableDuration, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.max_size."] pub log_rotation_size: ReadableSize, #[online_config(skip)] @@ -3306,6 +3384,9 @@ pub struct TikvConfig { #[online_config(submodule)] pub log: LogConfig, + #[online_config(submodule)] + pub memory: MemoryConfig, + #[online_config(submodule)] pub quota: QuotaConfig, @@ -3383,6 +3464,7 @@ pub struct TikvConfig { } impl Default for TikvConfig { + #[allow(deprecated)] fn default() -> TikvConfig { TikvConfig { cfg_path: "".to_owned(), @@ -3399,6 +3481,7 @@ impl Default for TikvConfig { memory_usage_limit: None, memory_usage_high_water: 0.9, log: LogConfig::default(), + memory: MemoryConfig::default(), quota: QuotaConfig::default(), readpool: ReadPoolConfig::default(), server: ServerConfig::default(), @@ -3777,6 +3860,7 @@ impl TikvConfig { // As the init of `logger` is very early, this adjust needs to be separated and // called immediately after parsing the command line. + #[allow(deprecated)] pub fn logger_compatible_adjust(&mut self) { let default_tikv_cfg = TikvConfig::default(); let default_log_cfg = LogConfig::default(); @@ -3828,6 +3912,7 @@ impl TikvConfig { } } + #[allow(deprecated)] pub fn compatible_adjust(&mut self) { let default_raft_store = RaftstoreConfig::default(); let default_coprocessor = CopConfig::default(); @@ -4435,6 +4520,7 @@ pub enum Module { BackupStream, Quota, Log, + Memory, Unknown(String), } @@ -4463,6 +4549,7 @@ impl From<&str> for Module { "resource_metering" => Module::ResourceMetering, "quota" => Module::Quota, "log" => Module::Log, + "memory" => Module::Memory, n => Module::Unknown(n.to_owned()), } } @@ -4766,7 +4853,7 @@ mod tests { assert_eq!(last_cfg_metadata.modified().unwrap(), first_modified); // write to file when config is the inequivalent of last one. - cfg.log_level = slog::Level::Warning.into(); + cfg.log.level = slog::Level::Warning.into(); persist_config(&cfg).unwrap(); last_cfg_metadata = last_cfg_path.metadata().unwrap(); assert_ne!(last_cfg_metadata.modified().unwrap(), first_modified); @@ -5364,7 +5451,7 @@ mod tests { } #[test] - fn test_change_logconfig() { + fn test_change_log_config() { let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); let cfg_controller = ConfigController::new(cfg); @@ -5386,6 +5473,37 @@ mod tests { ); } + #[test] + #[cfg(feature = "mem-profiling")] + fn test_change_memory_config() { + let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); + let cfg_controller = ConfigController::new(cfg); + + cfg_controller.register(Module::Memory, Box::new(MemoryConfigManager)); + cfg_controller + .update_config("memory.enable_heap_profiling", "false") + .unwrap(); + assert_eq!(tikv_alloc::is_profiling_active(), false); + cfg_controller + .update_config("memory.enable_heap_profiling", "true") + .unwrap(); + assert_eq!(tikv_alloc::is_profiling_active(), true); + + cfg_controller + .update_config("memory.profiling_sample_per_bytes", "1MB") + .unwrap(); + assert_eq!( + cfg_controller + .get_current() + .memory + .profiling_sample_per_bytes, + ReadableSize::mb(1), + ); + cfg_controller + .update_config("memory.profiling_sample_per_bytes", "invalid") + .unwrap_err(); + } + #[test] fn test_dispatch_titan_blob_run_mode_config() { let mut cfg = TikvConfig::default(); diff --git a/src/server/config.rs b/src/server/config.rs index 013d1a66238d..4e66e5802c0b 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -187,26 +187,27 @@ pub struct Config { #[online_config(skip)] pub labels: HashMap, - // deprecated. use readpool.coprocessor.xx_concurrency. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.*_concurrency."] pub end_point_concurrency: Option, - // deprecated. use readpool.coprocessor.stack_size. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.stack_size."] pub end_point_stack_size: Option, - // deprecated. use readpool.coprocessor.max_tasks_per_worker_xx. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.max_tasks_per_worker_*."] pub end_point_max_tasks: Option, } impl Default for Config { + #[allow(deprecated)] fn default() -> Config { let cpu_num = SysQuota::cpu_cores_quota(); let background_thread_count = if cpu_num > 16.0 { 3 } else { 2 }; diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index b76454ffab86..60b267a6d944 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,6 +40,7 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; +pub use profile::HEAP_PROFILE_ACTIVE; use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 3941c6c12b67..dbf819b35fe0 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -37,7 +37,7 @@ lazy_static! { // If it's some it means there are already a CPU profiling. static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. - static ref HEAP_PROFILE_ACTIVE: Mutex>, TempDir)>> = Mutex::new(None); + pub static ref HEAP_PROFILE_ACTIVE: Mutex, TempDir)>>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -129,7 +129,7 @@ where let on_start = move || { let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); assert!(activate.is_none()); - *activate = Some((Some(tx), dir)); + *activate = Some(Some((tx, dir))); activate_prof().map_err(|e| format!("activate_prof: {}", e))?; callback(); info!("periodical heap profiling is started"); @@ -168,9 +168,11 @@ where pub fn deactivate_heap_profile() -> bool { let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); match activate.as_mut() { - Some((tx, _)) => { - if let Some(tx) = tx.take() { + Some(tx) => { + if let Some((tx, _)) = tx.take() { let _ = tx.send(()); + } else { + *activate = None; } true } @@ -277,7 +279,7 @@ pub fn heap_profiles_dir() -> Option { .lock() .unwrap() .as_ref() - .map(|(_, dir)| dir.path().to_owned()) + .and_then(|v| v.as_ref().map(|(_, dir)| dir.path().to_owned())) } pub fn list_heap_profiles() -> Result, String> { diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 1239aa53fb80..2ab4ce5cc094 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -65,7 +65,6 @@ fn read_file_in_project_dir(path: &str) -> String { #[test] fn test_serde_custom_tikv_config() { let mut value = TikvConfig::default(); - value.log_rotation_timespan = ReadableDuration::days(1); value.log.level = Level::Critical.into(); value.log.file.filename = "foo".to_owned(); value.log.format = LogFormat::Json; @@ -77,6 +76,8 @@ fn test_serde_custom_tikv_config() { value.abort_on_panic = true; value.memory_usage_limit = Some(ReadableSize::gb(10)); value.memory_usage_high_water = 0.65; + value.memory.enable_heap_profiling = false; + value.memory.profiling_sample_per_bytes = ReadableSize::mb(1); value.server = ServerConfig { cluster_id: 0, // KEEP IT ZERO, it is skipped by serde. addr: "example.com:443".to_owned(), @@ -103,9 +104,6 @@ fn test_serde_custom_tikv_config() { grpc_stream_initial_window_size: ReadableSize(12_345), grpc_keepalive_time: ReadableDuration::secs(3), grpc_keepalive_timeout: ReadableDuration::secs(60), - end_point_concurrency: None, - end_point_max_tasks: None, - end_point_stack_size: None, end_point_recursion_limit: 100, end_point_stream_channel_size: 16, end_point_batch_row_limit: 64, @@ -125,6 +123,7 @@ fn test_serde_custom_tikv_config() { forward_max_connections_per_address: 5, reject_messages_on_memory_ratio: 0.8, simplify_metrics: false, + ..Default::default() }; value.readpool = ReadPoolConfig { unified: UnifiedReadPoolConfig { @@ -191,11 +190,9 @@ fn test_serde_custom_tikv_config() { raft_engine_purge_interval: ReadableDuration::minutes(20), max_manual_flush_rate: 5.0, raft_entry_cache_life_time: ReadableDuration::secs(12), - raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(12), region_split_check_diff: Some(ReadableSize::mb(20)), region_compact_check_interval: ReadableDuration::secs(12), - clean_stale_peer_delay: ReadableDuration::secs(0), region_compact_check_step: Some(1_234), region_compact_min_tombstones: 999, region_compact_tombstones_percent: 33, @@ -231,8 +228,6 @@ fn test_serde_custom_tikv_config() { use_delete_range: true, snap_generator_pool_size: 2, cleanup_import_sst_interval: ReadableDuration::minutes(12), - region_max_size: ReadableSize(0), - region_split_size: ReadableSize(0), local_read_batch_size: 33, apply_batch_system, store_batch_system, @@ -253,7 +248,6 @@ fn test_serde_custom_tikv_config() { io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), report_min_resolved_ts_interval: ReadableDuration::millis(233), - raft_msg_flush_interval: ReadableDuration::micros(250), check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), @@ -272,6 +266,7 @@ fn test_serde_custom_tikv_config() { unsafe_disable_check_quorum: false, periodic_full_compact_start_times: ReadableSchedule::default(), periodic_full_compact_start_max_cpu: 0.1, + ..Default::default() }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { @@ -318,7 +313,6 @@ fn test_serde_custom_tikv_config() { rate_bytes_per_sec: ReadableSize::kb(1), rate_limiter_refill_period: ReadableDuration::millis(10), rate_limiter_mode: DBRateLimiterMode::AllIo, - auto_tuned: None, rate_limiter_auto_tuned: false, bytes_per_sync: ReadableSize::mb(1), wal_bytes_per_sync: ReadableSize::kb(32), @@ -616,6 +610,7 @@ fn test_serde_custom_tikv_config() { write_buffer_limit: None, }, titan: titan_db_config.clone(), + ..Default::default() }; value.raftdb = RaftDbConfig { info_log_level: LogLevel::Info, @@ -846,7 +841,6 @@ fn test_serde_custom_tikv_config() { }; value.cdc = CdcConfig { min_ts_interval: ReadableDuration::secs(4), - old_value_cache_size: 0, hibernate_regions_compatible: false, incremental_scan_threads: 3, incremental_scan_concurrency: 4, @@ -856,6 +850,7 @@ fn test_serde_custom_tikv_config() { tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), sink_memory_quota: ReadableSize::mb(7), + ..Default::default() }; value.resolved_ts = ResolvedTsConfig { enable: true, diff --git a/tests/integrations/config/test-cache-compatible.toml b/tests/integrations/config/test-cache-compatible.toml index 9fce88833ed4..f91b5cdafc32 100644 --- a/tests/integrations/config/test-cache-compatible.toml +++ b/tests/integrations/config/test-cache-compatible.toml @@ -2,6 +2,8 @@ [log.file] +[memory] + [readpool.coprocessor] [readpool.storage] diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ef7a4809168f..a9772e285af2 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -1,9 +1,5 @@ -log-level = "info" -log-file = "" -log-format = "text" slow-log-file = "slow_foo" slow-log-threshold = "1s" -log-rotation-timespan = "1d" panic-when-unexpected-key-or-data = true abort-on-panic = true memory-usage-limit = "10GB" @@ -19,6 +15,10 @@ max-size = 1 max-backups = 2 max-days = 3 +[memory] +enable-heap-profiling = false +profiling-sample-per-bytes = "1MB" + [readpool.unified] min-thread-count = 5 max-thread-count = 10 diff --git a/tests/integrations/config/test-default.toml b/tests/integrations/config/test-default.toml index 23e53b9daf3f..ca1abc0081b8 100644 --- a/tests/integrations/config/test-default.toml +++ b/tests/integrations/config/test-default.toml @@ -2,6 +2,8 @@ [log.file] +[memory] + [readpool.unified] [readpool.storage] From 5f2f5e7e4d6266e4df891dd6c49f2153b6bcfdff Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 10 Nov 2023 17:26:13 +0800 Subject: [PATCH 134/203] cdc: notify pending tasks if associated regions change (#15947) close tikv/tikv#15910 Signed-off-by: qupeng Signed-off-by: qupeng Co-authored-by: Ping Yu --- components/cdc/src/endpoint.rs | 7 +++- components/cdc/src/initializer.rs | 41 +++++++++++++++---- components/cdc/src/observer.rs | 30 ++++++++------ .../cdc/tests/failpoints/test_endpoint.rs | 26 ++++++++++++ .../cdc/tests/failpoints/test_register.rs | 6 ++- 5 files changed, 86 insertions(+), 24 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 82233af8f145..e62650c77c6d 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -809,7 +809,6 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint { CDC_SCAN_TASKS.with_label_values(&["finish"]).inc(); } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 2882d2e975e0..ef39a693e3e4 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -90,7 +90,6 @@ pub(crate) struct Initializer { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, - pub(crate) scan_concurrency_semaphore: Arc, pub(crate) scan_speed_limiter: Limiter, pub(crate) fetch_speed_limiter: Limiter, @@ -110,9 +109,11 @@ impl Initializer { &mut self, change_observer: ChangeObserver, cdc_handle: T, + concurrency_semaphore: Arc, memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); + let _permit = concurrency_semaphore.acquire().await; // To avoid holding too many snapshots and holding them too long, // we need to acquire scan concurrency permit before taking snapshot. @@ -188,8 +189,6 @@ impl Initializer { region: Region, memory_quota: Arc, ) -> Result<()> { - let scan_concurrency_semaphore = self.scan_concurrency_semaphore.clone(); - let _permit = scan_concurrency_semaphore.acquire().await; CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); defer!(CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec()); @@ -655,7 +654,6 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), - scan_concurrency_semaphore: Arc::new(Semaphore::new(1)), scan_speed_limiter: Limiter::new(scan_limit as _), fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, @@ -1034,26 +1032,51 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); + let concurrency_semaphore = Arc::new(Semaphore::new(1)); let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.initialize(change_cmd, raft_router.clone(), memory_quota.clone())) - .unwrap_err(); + block_on(initializer.initialize( + change_cmd, + raft_router.clone(), + concurrency_semaphore.clone(), + memory_quota.clone(), + )) + .unwrap_err(); + + let (tx, rx) = sync_channel(1); + let concurrency_semaphore_ = concurrency_semaphore.clone(); + pool.spawn(async move { + let _permit = concurrency_semaphore_.acquire().await; + tx.send(()).unwrap(); + tx.send(()).unwrap(); + tx.send(()).unwrap(); + }); + rx.recv_timeout(Duration::from_millis(200)).unwrap(); let (tx1, rx1) = sync_channel(1); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); pool.spawn(async move { // Migrated to 2021 migration. This let statement is probably not needed, see // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = ( + &initializer, + &change_cmd, + &raft_router, + &concurrency_semaphore, + ); let res = initializer - .initialize(change_cmd, raft_router, memory_quota) + .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) .await; tx1.send(res).unwrap(); }); + // Must timeout because there is no enough permit. + rx1.recv_timeout(Duration::from_millis(200)).unwrap_err(); - // Shouldn't timeout, gets an error instead. + // Release the permit + rx.recv_timeout(Duration::from_millis(200)).unwrap(); let res = rx1.recv_timeout(Duration::from_millis(200)).unwrap(); - assert!(res.is_err()); + res.unwrap_err(); worker.stop(); } diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index aac2842e4042..cfcedfeb59da 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -177,20 +177,26 @@ impl RegionChangeObserver for CdcObserver { event: RegionChangeEvent, _: StateRole, ) { - if let RegionChangeEvent::Destroy = event { - let region_id = ctx.region().get_id(); - if let Some(observe_id) = self.is_subscribed(region_id) { - // Unregister all downstreams. - let store_err = RaftStoreError::RegionNotFound(region_id); - let deregister = Deregister::Delegate { - region_id, - observe_id, - err: CdcError::request(store_err.into()), - }; - if let Err(e) = self.sched.schedule(Task::Deregister(deregister)) { - error!("cdc schedule cdc task failed"; "error" => ?e); + match event { + RegionChangeEvent::Destroy + | RegionChangeEvent::Update( + RegionChangeReason::Split | RegionChangeReason::CommitMerge, + ) => { + let region_id = ctx.region().get_id(); + if let Some(observe_id) = self.is_subscribed(region_id) { + // Unregister all downstreams. + let store_err = RaftStoreError::RegionNotFound(region_id); + let deregister = Deregister::Delegate { + region_id, + observe_id, + err: CdcError::request(store_err.into()), + }; + if let Err(e) = self.sched.schedule(Task::Deregister(deregister)) { + error!("cdc schedule cdc task failed"; "error" => ?e); + } } } + _ => {} } } } diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index f7cc387625dd..42977cc38566 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -569,3 +569,29 @@ fn test_cdc_stream_multiplexing() { } assert!(request_2_ready); } + +// This case tests pending regions can still get region split/merge +// notifications. +#[test] +fn test_cdc_notify_pending_regions() { + let cluster = new_server_cluster(0, 1); + cluster.pd_client.disable_default_operator(); + let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); + let region = suite.cluster.get_region(&[]); + let rid = region.id; + let (mut req_tx, _, receive_event) = new_event_feed_v2(suite.get_region_cdc_client(rid)); + + fail::cfg("cdc_before_initialize", "pause").unwrap(); + let mut req = suite.new_changedata_request(rid); + req.request_id = 1; + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + + thread::sleep(Duration::from_millis(100)); + suite.cluster.must_split(®ion, b"x"); + let event = receive_event(false); + matches!( + event.get_events()[0].event, + Some(Event_oneof_event::Error(ref e)) if e.has_region_not_found(), + ); + fail::remove("cdc_before_initialize"); +} diff --git a/components/cdc/tests/failpoints/test_register.rs b/components/cdc/tests/failpoints/test_register.rs index 4558397f8a96..2b6be3744af4 100644 --- a/components/cdc/tests/failpoints/test_register.rs +++ b/components/cdc/tests/failpoints/test_register.rs @@ -165,7 +165,11 @@ fn test_connections_register_impl() { let mut events = receive_event(false).events.to_vec(); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Error(err) => { - assert!(err.has_epoch_not_match(), "{:?}", err); + assert!( + err.has_epoch_not_match() || err.has_region_not_found(), + "{:?}", + err + ); } other => panic!("unknown event {:?}", other), } From 91b35fb8d3f8507e8fcb4217ce1de5169d202764 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 10 Nov 2023 20:39:42 +0800 Subject: [PATCH 135/203] resource_control: support automatically tuning priority resource limiters (#15929) close tikv/tikv#15917 Signed-off-by: glorv --- components/resource_control/src/future.rs | 8 +- components/resource_control/src/lib.rs | 14 +- components/resource_control/src/metrics.rs | 6 + .../resource_control/src/resource_group.rs | 29 +- .../resource_control/src/resource_limiter.rs | 28 +- components/resource_control/src/worker.rs | 356 +++++++++++++++++- components/server/src/server.rs | 3 +- components/server/src/server2.rs | 3 +- components/tikv_util/src/yatp_pool/metrics.rs | 4 +- components/tikv_util/src/yatp_pool/mod.rs | 60 ++- src/read_pool.rs | 42 ++- src/server/service/kv.rs | 62 +-- src/storage/mod.rs | 23 +- src/storage/txn/sched_pool.rs | 30 +- src/storage/txn/scheduler.rs | 29 +- tests/failpoints/cases/test_storage.rs | 4 +- 16 files changed, 603 insertions(+), 98 deletions(-) diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index a935c3b41fa9..53bca48b3019 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -274,7 +274,13 @@ mod tests { .name_prefix("test") .build_future_pool(); - let resource_limiter = Arc::new(ResourceLimiter::new("".into(), f64::INFINITY, 1000.0, 0)); + let resource_limiter = Arc::new(ResourceLimiter::new( + "".into(), + f64::INFINITY, + 1000.0, + 0, + true, + )); fn spawn_and_wait(pool: &FuturePool, f: F, limiter: Arc) where diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 6cfd24914a12..a7b4cf031923 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. #![feature(test)] #![feature(local_key_cell_methods)] +#![feature(array_zip)] use std::sync::Arc; @@ -10,8 +11,8 @@ use serde::{Deserialize, Serialize}; mod resource_group; pub use resource_group::{ - ResourceConsumeType, ResourceController, ResourceGroupManager, TaskMetadata, - MIN_PRIORITY_UPDATE_INTERVAL, + priority_from_task_meta, ResourceConsumeType, ResourceController, ResourceGroupManager, + TaskMetadata, MIN_PRIORITY_UPDATE_INTERVAL, }; mod future; @@ -29,7 +30,9 @@ pub use channel::ResourceMetered; mod resource_limiter; pub use resource_limiter::ResourceLimiter; use tikv_util::worker::Worker; -use worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}; +use worker::{ + GroupQuotaAdjustWorker, PriorityLimiterAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION, +}; mod metrics; pub mod worker; @@ -66,10 +69,13 @@ pub fn start_periodic_tasks( bg_worker.spawn_async_task(async move { resource_mgr_service_clone.watch_resource_groups().await; }); - // spawn a task to auto adjust background quota limiter. + // spawn a task to auto adjust background quota limiter and priority quota + // limiter. let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); + let mut priority_worker = PriorityLimiterAdjustWorker::new(mgr.clone()); bg_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { worker.adjust_quota(); + priority_worker.adjust(); }); // spawn a task to periodically upload resource usage statistics to PD. bg_worker.spawn_async_task(async move { diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs index 16338f41c6c8..c94040925011 100644 --- a/components/resource_control/src/metrics.rs +++ b/components/resource_control/src/metrics.rs @@ -22,6 +22,12 @@ lazy_static! { &["name"] ) .unwrap(); + pub static ref PRIORITY_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( + "tikv_resource_control_priority_quota_limit", + "The quota limiter for each priority in resource control", + &["priority"] + ) + .unwrap(); } pub fn deregister_metrics(name: &str) { diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index b7e7ca28705e..b45a9833bb8e 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -41,7 +41,6 @@ const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; /// The maximum RU quota that can be configured. const MAX_RU_QUOTA: u64 = i32::MAX as u64; -#[cfg(test)] const LOW_PRIORITY: u32 = 1; const MEDIUM_PRIORITY: u32 = 8; #[cfg(test)] @@ -57,7 +56,7 @@ pub enum ResourceConsumeType { IoBytes(u64), } -#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter)] +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] #[repr(usize)] pub enum TaskPriority { High = 0, @@ -110,6 +109,7 @@ impl Default for ResourceGroupManager { f64::INFINITY, f64::INFINITY, 0, + false, )) }) .collect::>() @@ -206,6 +206,7 @@ impl ResourceGroupManager { f64::INFINITY, f64::INFINITY, version, + true, ))) }) } else { @@ -304,6 +305,14 @@ impl ResourceGroupManager { self.get_group_count() > 1 } + /// return the priority of target resource group. + #[inline] + pub fn get_resource_group_priority(&self, group: &str) -> u32 { + self.resource_groups + .get(group) + .map_or(LOW_PRIORITY, |g| g.group.priority) + } + // Always return the background resource limiter if any; // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( @@ -371,6 +380,11 @@ impl ResourceGroupManager { group_priority.unwrap_or(default_group.group.priority), ) } + + #[inline] + pub fn get_priority_resource_limiters(&self) -> [Arc; 3] { + self.priority_limiters.clone() + } } pub(crate) struct ResourceGroup { @@ -708,7 +722,7 @@ impl<'a> TaskMetadata<'a> { self.metadata.into_owned() } - fn override_priority(&self) -> u32 { + pub fn override_priority(&self) -> u32 { if self.metadata.is_empty() { return 0; } @@ -734,6 +748,15 @@ impl<'a> TaskMetadata<'a> { } } +// return the TaskPriority value from task metadata. +// This function is used for handling thread pool task waiting metrics. +pub fn priority_from_task_meta(meta: &[u8]) -> usize { + let priority = TaskMetadata::from_bytes(meta).override_priority(); + // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) + debug_assert!(priority <= 16); + TaskPriority::from(priority) as usize +} + impl TaskPriorityProvider for ResourceController { fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { let metadata = TaskMetadata::from_bytes(extras.metadata()); diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 8898b4eba23c..bce6867ac2ea 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -39,6 +39,8 @@ pub struct ResourceLimiter { name: String, version: u64, limiters: [QuotaLimiter; ResourceType::COUNT], + // whether the resource limiter is a background limiter or priority limiter. + is_background: bool, } impl std::fmt::Debug for ResourceLimiter { @@ -48,16 +50,27 @@ impl std::fmt::Debug for ResourceLimiter { } impl ResourceLimiter { - pub fn new(name: String, cpu_limit: f64, io_limit: f64, version: u64) -> Self { + pub fn new( + name: String, + cpu_limit: f64, + io_limit: f64, + version: u64, + is_background: bool, + ) -> Self { let cpu_limiter = QuotaLimiter::new(cpu_limit); let io_limiter = QuotaLimiter::new(io_limit); Self { name, version, limiters: [cpu_limiter, io_limiter], + is_background, } } + pub fn is_background(&self) -> bool { + self.is_background + } + pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { let cpu_dur = self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); @@ -86,7 +99,7 @@ impl ResourceLimiter { } pub(crate) fn get_limit_statistics(&self, ty: ResourceType) -> GroupStatistics { - let (total_consumed, total_wait_dur_us, read_consumed, write_consumed) = + let (total_consumed, total_wait_dur_us, read_consumed, write_consumed, request_count) = self.limiters[ty as usize].get_statistics(); GroupStatistics { version: self.version, @@ -94,6 +107,7 @@ impl ResourceLimiter { total_wait_dur_us, read_consumed, write_consumed, + request_count, } } } @@ -104,6 +118,7 @@ pub(crate) struct QuotaLimiter { total_wait_dur_us: AtomicU64, read_bytes: AtomicU64, write_bytes: AtomicU64, + req_count: AtomicU64, } impl QuotaLimiter { @@ -113,6 +128,7 @@ impl QuotaLimiter { total_wait_dur_us: AtomicU64::new(0), read_bytes: AtomicU64::new(0), write_bytes: AtomicU64::new(0), + req_count: AtomicU64::new(0), } } @@ -128,12 +144,13 @@ impl QuotaLimiter { self.limiter.set_speed_limit(limit); } - fn get_statistics(&self) -> (u64, u64, u64, u64) { + fn get_statistics(&self) -> (u64, u64, u64, u64, u64) { ( self.limiter.total_bytes_consumed() as u64, self.total_wait_dur_us.load(Ordering::Relaxed), self.read_bytes.load(Ordering::Relaxed), self.write_bytes.load(Ordering::Relaxed), + self.req_count.load(Ordering::Relaxed), ) } @@ -146,6 +163,7 @@ impl QuotaLimiter { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } + self.req_count.fetch_add(1, Ordering::Relaxed); dur } @@ -162,6 +180,7 @@ impl QuotaLimiter { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } + self.req_count.fetch_add(1, Ordering::Relaxed); dur } } @@ -173,6 +192,7 @@ pub struct GroupStatistics { pub total_wait_dur_us: u64, pub read_consumed: u64, pub write_consumed: u64, + pub request_count: u64, } impl std::ops::Sub for GroupStatistics { @@ -184,6 +204,7 @@ impl std::ops::Sub for GroupStatistics { total_wait_dur_us: self.total_wait_dur_us.saturating_sub(rhs.total_wait_dur_us), read_consumed: self.read_consumed.saturating_sub(rhs.read_consumed), write_consumed: self.write_consumed.saturating_sub(rhs.write_consumed), + request_count: self.request_count.saturating_sub(rhs.request_count), } } } @@ -198,6 +219,7 @@ impl std::ops::Div for GroupStatistics { total_wait_dur_us: (self.total_wait_dur_us as f64 / rhs) as u64, read_consumed: (self.read_consumed as f64 / rhs) as u64, write_consumed: (self.write_consumed as f64 / rhs) as u64, + request_count: (self.request_count as f64 / rhs) as u64, } } } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 7bc76691e1ff..79dea73d0ae2 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -9,16 +9,19 @@ use std::{ }; use file_system::{fetch_io_bytes, IoBytes, IoType}; -use strum::EnumCount; +use prometheus::Histogram; +use strum::{EnumCount, IntoEnumIterator}; use tikv_util::{ + debug, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, warn, + yatp_pool::metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC, }; use crate::{ metrics::*, - resource_group::ResourceGroupManager, + resource_group::{ResourceGroupManager, TaskPriority}, resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, }; @@ -39,7 +42,7 @@ pub struct SysQuotaGetter { process_stat: ProcessStat, prev_io_stats: [IoBytes; IoType::COUNT], prev_io_ts: Instant, - io_bandwidth: u64, + io_bandwidth: f64, } impl ResourceStatsProvider for SysQuotaGetter { @@ -55,7 +58,7 @@ impl ResourceStatsProvider for SysQuotaGetter { } ResourceType::Io => { let mut stats = ResourceUsageStats { - total_quota: self.io_bandwidth as f64, + total_quota: self.io_bandwidth, current_used: 0.0, }; let now = Instant::now_coarse(); @@ -97,7 +100,7 @@ impl GroupQuotaAdjustWorker { process_stat: ProcessStat::cur_proc_stat().unwrap(), prev_io_stats: [IoBytes::default(); IoType::COUNT], prev_io_ts: Instant::now_coarse(), - io_bandwidth, + io_bandwidth: io_bandwidth as f64, }; Self::with_quota_getter(resource_ctl, resource_quota_getter) } @@ -295,6 +298,235 @@ struct GroupStats { expect_cost_rate: f64, } +/// PriorityLimiterAdjustWorker automically adjust the quota of each priority +/// limiter based on the statistics data during a certain period of time. +/// In general, caller should call this function in a fixed interval. +pub struct PriorityLimiterAdjustWorker { + resource_ctl: Arc, + trackers: [PriorityLimiterStatsTracker; 3], + resource_quota_getter: R, + last_adjust_time: Instant, + is_last_low_cpu: bool, + is_last_single_group: bool, +} + +impl PriorityLimiterAdjustWorker { + pub fn new(resource_ctl: Arc) -> Self { + let resource_quota_getter = SysQuotaGetter { + process_stat: ProcessStat::cur_proc_stat().unwrap(), + prev_io_stats: [IoBytes::default(); IoType::COUNT], + prev_io_ts: Instant::now_coarse(), + io_bandwidth: f64::INFINITY, + }; + Self::with_quota_getter(resource_ctl, resource_quota_getter) + } +} + +impl PriorityLimiterAdjustWorker { + fn with_quota_getter( + resource_ctl: Arc, + resource_quota_getter: R, + ) -> Self { + let priorities: [_; 3] = TaskPriority::iter().collect::>().try_into().unwrap(); + let trackers = resource_ctl + .get_priority_resource_limiters() + .zip(priorities) + .map(|(l, p)| PriorityLimiterStatsTracker::new(l, p.as_str())); + Self { + resource_ctl, + trackers, + resource_quota_getter, + last_adjust_time: Instant::now_coarse(), + is_last_low_cpu: true, + is_last_single_group: true, + } + } + pub fn adjust(&mut self) { + let now = Instant::now_coarse(); + let dur = now.saturating_duration_since(self.last_adjust_time); + if dur < Duration::from_secs(1) { + warn!("adjust duration too small, skip adjustment."; "dur" => ?dur); + return; + } + self.last_adjust_time = now; + + // fast path for only the default resource group which means resource + // control is not used at all. + let group_count = self.resource_ctl.get_group_count(); + if group_count == 1 { + if self.is_last_single_group { + return; + } + self.is_last_single_group = true; + self.trackers.iter().skip(1).for_each(|t| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY) + }); + return; + } + self.is_last_single_group = false; + + let stats: [_; 3] = + std::array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); + + let process_cpu_stats = match self + .resource_quota_getter + .get_current_stats(ResourceType::Cpu) + { + Ok(s) => s, + Err(e) => { + warn!("get process total cpu failed; skip adjusment."; "err" => ?e); + return; + } + }; + + if process_cpu_stats.current_used < process_cpu_stats.total_quota * 0.5 { + if self.is_last_low_cpu { + return; + } + self.is_last_low_cpu = true; + self.trackers.iter().skip(1).for_each(|t| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + // 0 represent infinity + PRIORITY_QUOTA_LIMIT_VEC + .get_metric_with_label_values(&[t.priority]) + .unwrap() + .set(0); + }); + return; + } + self.is_last_low_cpu = false; + + let total_reqs: u64 = stats.iter().map(|s| s.req_count).sum(); + let max_reqs = stats.iter().map(|s| s.req_count).max().unwrap(); + // there is only 1 active priority, do not restrict. + if total_reqs * 99 / 100 <= max_reqs { + self.trackers + .iter() + .skip(1) + .for_each(|t: &PriorityLimiterStatsTracker| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY) + }); + return; + } + + let real_cpu_total: f64 = stats.iter().map(|s| s.cpu_secs).sum(); + let expect_pool_cpu_total = real_cpu_total * (process_cpu_stats.total_quota * 0.95) + / process_cpu_stats.current_used; + let mut limits = [0.0; 2]; + let level_expected: [_; 3] = + std::array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); + // substract the cpu time usage for priority high. + let mut expect_cpu_time_total = expect_pool_cpu_total - level_expected[0]; + + // still reserve a minimal cpu quota + let minimal_quota = process_cpu_stats.total_quota / MICROS_PER_SEC * 0.05; + for i in 1..self.trackers.len() { + if expect_cpu_time_total < minimal_quota { + expect_cpu_time_total = minimal_quota; + } + let limit = expect_cpu_time_total * MICROS_PER_SEC; + self.trackers[i] + .limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(limit); + PRIORITY_QUOTA_LIMIT_VEC + .get_metric_with_label_values(&[self.trackers[i].priority]) + .unwrap() + .set(limit as i64); + limits[i - 1] = limit; + expect_cpu_time_total -= level_expected[i]; + } + debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, + "limits" => ?limits, "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); + } +} + +#[derive(Debug)] +struct LimiterStats { + // QuotaLimiter consumed cpu secs in total + cpu_secs: f64, + // QuotaLimiter waited secs in total. + wait_secs: f64, + // the total number of tasks that are scheduled. + req_count: u64, +} + +struct HistogramTracker { + metrics: Histogram, + last_sum: f64, + last_count: u64, +} + +impl HistogramTracker { + fn new(metrics: Histogram) -> Self { + let last_sum = metrics.get_sample_sum(); + let last_count = metrics.get_sample_count(); + Self { + metrics, + last_sum, + last_count, + } + } + + fn get_and_upate_statistics(&mut self) -> (f64, u64) { + let cur_sum = self.metrics.get_sample_sum(); + let cur_count = self.metrics.get_sample_count(); + let res = (cur_sum - self.last_sum, cur_count - self.last_count); + self.last_sum = cur_sum; + self.last_count = cur_count; + res + } +} + +struct PriorityLimiterStatsTracker { + priority: &'static str, + limiter: Arc, + last_stats: GroupStatistics, + // unified-read-pool and schedule-worker-pool wait duration metrics. + task_wait_dur_trakcers: [HistogramTracker; 2], +} + +impl PriorityLimiterStatsTracker { + fn new(limiter: Arc, priority: &'static str) -> Self { + let task_wait_dur_trakcers = + ["unified-read-pool", "sched-worker-priority"].map(|pool_name| { + HistogramTracker::new( + YATP_POOL_SCHEDULE_WAIT_DURATION_VEC + .get_metric_with_label_values(&[pool_name, priority]) + .unwrap(), + ) + }); + let last_stats = limiter.get_limit_statistics(ResourceType::Cpu); + Self { + priority, + limiter, + last_stats, + task_wait_dur_trakcers, + } + } + + fn get_and_update_last_stats(&mut self, dur_secs: f64) -> LimiterStats { + let cur_stats = self.limiter.get_limit_statistics(ResourceType::Cpu); + let stats_delta = (cur_stats - self.last_stats) / dur_secs; + self.last_stats = cur_stats; + let wait_stats: [_; 2] = + std::array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); + let schedule_wait_dur_secs = wait_stats.iter().map(|s| s.0).sum::() / dur_secs; + LimiterStats { + cpu_secs: stats_delta.total_consumed as f64 / MICROS_PER_SEC, + wait_secs: stats_delta.total_wait_dur_us as f64 / MICROS_PER_SEC + + schedule_wait_dur_secs, + req_count: stats_delta.request_count, + } + } +} + #[cfg(test)] mod tests { use std::time::Duration; @@ -658,4 +890,118 @@ mod tests { }, ); } + + #[test] + fn test_adjust_priority_resource_limiter() { + let resource_ctl = Arc::new(ResourceGroupManager::default()); + let priority_limiters = resource_ctl.get_priority_resource_limiters(); + let test_provider = TestResourceStatsProvider::new(8.0, f64::INFINITY); + let mut worker = + PriorityLimiterAdjustWorker::with_quota_getter(resource_ctl.clone(), test_provider); + + let reset_quota = |worker: &mut PriorityLimiterAdjustWorker, + cpu: f64| { + worker.resource_quota_getter.cpu_used = cpu; + worker.last_adjust_time = Instant::now_coarse() - Duration::from_secs(10); + priority_limiters[1] + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + priority_limiters[2] + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + }; + + fn check(val: f64, expected: f64) { + assert!( + (val.is_infinite() && expected.is_infinite()) + || (expected * 0.99 < val && val < expected * 1.01), + "actual: {}, expected: {}", + val, + expected + ); + } + + let check_limiter = |high: f64, medium: f64, low: f64| { + check( + priority_limiters[0] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + high * MICROS_PER_SEC, + ); + check( + priority_limiters[1] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + medium * MICROS_PER_SEC, + ); + check( + priority_limiters[2] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + low * MICROS_PER_SEC, + ); + }; + + // only default group, always return infinity. + reset_quota(&mut worker, 6.4); + priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default()); + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + let rg1 = new_resource_group_ru("test_high".into(), 1000, 16); + resource_ctl.add_resource_group(rg1); + let rg2 = new_resource_group_ru("test_low".into(), 2000, 1); + resource_ctl.add_resource_group(rg2); + + reset_quota(&mut worker, 6.4); + priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default()); + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 1.2); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 2.6, 0.6); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + reset_quota(&mut worker, 8.0); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 2.8); + + reset_quota(&mut worker, 6.0); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 5.2); + + // duration too small, unchanged. + worker.resource_quota_getter.cpu_used = 6.0; + worker.last_adjust_time = Instant::now_coarse() - Duration::from_millis(500); + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 5.2); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 006750fd518a..72e09a9f8d8e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -70,7 +70,7 @@ use raftstore::{ RaftRouterCompactedEventSender, }; use resolved_ts::{LeadershipResolver, Task}; -use resource_control::ResourceGroupManager; +use resource_control::{priority_from_task_meta, ResourceGroupManager}; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; @@ -558,6 +558,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), + Some(Arc::new(priority_from_task_meta)), )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index fdbb18b62054..eab384871e6c 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,7 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resolved_ts::Task; -use resource_control::ResourceGroupManager; +use resource_control::{priority_from_task_meta, ResourceGroupManager}; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ @@ -469,6 +469,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), + Some(Arc::new(priority_from_task_meta)), )) } else { None diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index 8ae1aa8910ef..efb1379dcc79 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -19,8 +19,8 @@ lazy_static! { pub static ref YATP_POOL_SCHEDULE_WAIT_DURATION_VEC: HistogramVec = register_histogram_vec!( "tikv_yatp_pool_schedule_wait_duration", "Histogram of yatp pool schedule wait duration.", - &["name"], - exponential_buckets(1e-5, 4.0, 12).unwrap() // 10us ~ 41s + &["name", "priority"], + exponential_buckets(1e-5, 2.0, 18).unwrap() // 10us ~ 2.5s ) .unwrap(); } diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 2752f3f3c513..3cb237bad15d 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -1,14 +1,14 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. mod future_pool; -mod metrics; +pub mod metrics; use std::sync::Arc; use fail::fail_point; pub use future_pool::{Full, FuturePool}; use futures::{compat::Stream01CompatExt, StreamExt}; -use prometheus::{local::LocalHistogram, Histogram}; +use prometheus::{local::LocalHistogram, Histogram, HistogramOpts}; use yatp::{ pool::{CloneRunnerBuilder, Local, Remote, Runner}, queue::{multilevel, priority, Extras, QueueType, TaskCell as _}, @@ -165,7 +165,10 @@ pub struct YatpPoolRunner { before_pause: Option>, // Statistics about the schedule wait duration. - schedule_wait_duration: LocalHistogram, + // local histogram for high,medium,low priority tasks. + schedule_wait_durations: [LocalHistogram; 3], + // return the index of `schedule_wait_durations` from task metadata. + metric_idx_from_task_meta: Arc usize + Send + Sync>, } impl Runner for YatpPoolRunner { @@ -190,12 +193,12 @@ impl Runner for YatpPoolRunner { fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { let extras = task_cell.mut_extras(); if let Some(schedule_time) = extras.schedule_time() { - self.schedule_wait_duration - .observe(schedule_time.elapsed().as_secs_f64()); + let idx = (*self.metric_idx_from_task_meta)(extras.metadata()); + self.schedule_wait_durations[idx].observe(schedule_time.elapsed().as_secs_f64()); } let finished = self.inner.handle(local, task_cell); if self.ticker.try_tick() { - self.schedule_wait_duration.flush(); + self.schedule_wait_durations.iter().for_each(|m| m.flush()); } finished } @@ -229,7 +232,8 @@ impl YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, - schedule_wait_duration: Histogram, + schedule_wait_durations: [Histogram; 3], + metric_idx_from_task_meta: Arc usize + Send + Sync>, ) -> Self { YatpPoolRunner { inner, @@ -238,7 +242,8 @@ impl YatpPoolRunner { after_start, before_stop, before_pause, - schedule_wait_duration: schedule_wait_duration.local(), + schedule_wait_durations: schedule_wait_durations.map(|m| m.local()), + metric_idx_from_task_meta, } } } @@ -256,6 +261,10 @@ pub struct YatpPoolBuilder { max_tasks: usize, cleanup_method: CleanupMethod, + // whether to tracker task scheduling wait duration + enable_task_wait_metrics: bool, + metric_idx_from_task_meta: Option usize + Send + Sync>>, + #[cfg(test)] background_cleanup_hook: Option>, } @@ -275,6 +284,9 @@ impl YatpPoolBuilder { max_tasks: std::usize::MAX, cleanup_method: CleanupMethod::InPlace, + enable_task_wait_metrics: false, + metric_idx_from_task_meta: None, + #[cfg(test)] background_cleanup_hook: None, } @@ -344,6 +356,19 @@ impl YatpPoolBuilder { self } + pub fn enable_task_wait_metrics(mut self) -> Self { + self.enable_task_wait_metrics = true; + self + } + + pub fn metric_idx_from_task_meta( + mut self, + f: Arc usize + Send + Sync>, + ) -> Self { + self.metric_idx_from_task_meta = Some(f); + self + } + pub fn build_future_pool(self) -> FuturePool { let name = self .name_prefix @@ -480,15 +505,24 @@ impl YatpPoolBuilder { let after_start = self.after_start.take(); let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); - let schedule_wait_duration = - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name]); + let schedule_wait_durations = if self.enable_task_wait_metrics { + ["high", "medium", "low"].map(|p| { + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name, p]) + }) + } else { + std::array::from_fn(|_| Histogram::with_opts(HistogramOpts::new("_", "_")).unwrap()) + }; + let metric_idx_from_task_meta = self + .metric_idx_from_task_meta + .unwrap_or_else(|| Arc::new(|_| 0)); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), after_start, before_stop, before_pause, - schedule_wait_duration, + schedule_wait_durations, + metric_idx_from_task_meta, ); (builder, read_pool_runner) } @@ -511,6 +545,7 @@ mod tests { let name = "test_record_schedule_wait_duration"; let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(name) + .enable_task_wait_metrics() .build_single_level_pool(); let (tx, rx) = mpsc::channel(); for _ in 0..3 { @@ -529,7 +564,8 @@ mod tests { } // Drop the pool so the local metrics are flushed. drop(pool); - let histogram = metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); + let histogram = + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "high"]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } diff --git a/src/read_pool.rs b/src/read_pool.rs index 8f5a459c5bcc..32be95698da1 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -395,6 +395,7 @@ pub fn build_yatp_read_pool( engine: E, resource_ctl: Option>, cleanup_method: CleanupMethod, + metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); build_yatp_read_pool_with_name( @@ -404,6 +405,7 @@ pub fn build_yatp_read_pool( resource_ctl, cleanup_method, unified_read_pool_name, + metric_idx_from_task_meta_fn, ) } @@ -414,9 +416,10 @@ pub fn build_yatp_read_pool_with_name( resource_ctl: Option>, cleanup_method: CleanupMethod, unified_read_pool_name: String, + metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, ) -> ReadPool { let raftkv = Arc::new(Mutex::new(engine)); - let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) + let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) .cleanup_method(cleanup_method) .stack_size(config.stack_size.0 as usize) @@ -444,6 +447,12 @@ pub fn build_yatp_read_pool_with_name( .before_stop(|| unsafe { destroy_tls_engine::(); }); + if let Some(metric_idx_from_task_meta_fn) = metric_idx_from_task_meta_fn { + builder = builder + .enable_task_wait_metrics() + .metric_idx_from_task_meta(metric_idx_from_task_meta_fn); + } + let pool = if let Some(ref r) = resource_ctl { builder.build_priority_future_pool(r.clone()) } else { @@ -755,8 +764,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -803,8 +818,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -859,8 +880,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -986,6 +1013,7 @@ mod tests { resource_manager, CleanupMethod::InPlace, name.clone(), + None, ); let gen_task = || { diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8426143d502f..4a7395222f7f 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -192,14 +192,14 @@ macro_rules! handle_request { handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); }; ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -229,6 +229,20 @@ macro_rules! handle_request { } } +// consume resource group penalty and set explicit group priority +// We override the override_priority here to make handling tasks easier. +fn consume_penalty_and_set_priority( + resource_manager: &Arc, + resource_control_ctx: &mut ResourceControlContext, +) { + resource_manager.consume_penalty(resource_control_ctx); + if resource_control_ctx.get_override_priority() == 0 { + let prioirty = resource_manager + .get_resource_group_priority(resource_control_ctx.get_resource_group_name()); + resource_control_ctx.override_priority = prioirty as u64; + } +} + macro_rules! set_total_time { ($resp:ident, $duration:expr,no_time_detail) => {}; ($resp:ident, $duration:expr,has_time_detail) => { @@ -476,12 +490,12 @@ impl Tikv for Service { ctx.spawn(task); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -513,13 +527,13 @@ impl Tikv for Service { fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - req: RawCoprocessorRequest, + mut req: RawCoprocessorRequest, sink: UnarySink, ) { let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -602,13 +616,13 @@ impl Tikv for Service { fn coprocessor_stream( &mut self, ctx: RpcContext<'_>, - req: Request, + mut req: Request, mut sink: ServerStreamingSink, ) { let begin_instant = Instant::now(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1148,10 +1162,10 @@ fn handle_batch_commands_request( let resp = future::ok(batch_commands_response::Response::default()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::Get(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1169,10 +1183,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1190,10 +1204,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1224,10 +1238,10 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c0d6e6fc4a3b..c89a767a80ba 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3344,7 +3344,8 @@ impl TestStorageBuilder { } else { None }; - + let manager = Arc::new(ResourceGroupManager::default()); + let resource_ctl = manager.derive_controller("test".into(), false); Storage::from_engine( self.engine, &self.config, @@ -3362,11 +3363,8 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), ts_provider, - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - false, - ))), - None, + Some(resource_ctl), + Some(manager), ) } @@ -3379,7 +3377,8 @@ impl TestStorageBuilder { &crate::config::StorageReadPoolConfig::default_for_test(), engine.clone(), ); - + let manager = Arc::new(ResourceGroupManager::default()); + let resource_ctl = manager.derive_controller("test".into(), false); Storage::from_engine( engine, &self.config, @@ -3397,16 +3396,14 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), None, - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - false, - ))), - None, + Some(resource_ctl), + Some(manager), ) } pub fn build_for_resource_controller( self, + resource_manager: Arc, resource_controller: Arc, ) -> Result, L, F>> { let engine = TxnTestEngine { @@ -3436,7 +3433,7 @@ impl TestStorageBuilder { latest_feature_gate(), None, Some(resource_controller), - None, + Some(resource_manager), ) } } diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 197363043730..8674a581c725 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -12,7 +12,10 @@ use kvproto::{kvrpcpb::CommandPri, pdpb::QueryKind}; use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; -use resource_control::{ControlledFuture, ResourceController, TaskMetadata}; +use resource_control::{ + priority_from_task_meta, with_resource_limiter, ControlledFuture, ResourceController, + ResourceGroupManager, TaskMetadata, +}; use tikv_util::{ sys::SysQuota, yatp_pool::{Full, FuturePool, PoolTicker, YatpPoolBuilder}, @@ -101,6 +104,7 @@ impl VanillaQueue { struct PriorityQueue { worker_pool: FuturePool, resource_ctl: Arc, + resource_mgr: Arc, } impl PriorityQueue { @@ -118,15 +122,23 @@ impl PriorityQueue { // TODO: maybe use a better way to generate task_id let task_id = rand::random::(); let group_name = metadata.group_name().to_owned(); + let resource_limiter = self.resource_mgr.get_resource_limiter( + unsafe { std::str::from_utf8_unchecked(&group_name) }, + "", + metadata.override_priority() as u64, + ); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); self.worker_pool.spawn_with_extras( - ControlledFuture::new( - async move { - f.await; - }, - self.resource_ctl.clone(), - group_name, + with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + }, + self.resource_ctl.clone(), + group_name, + ), + resource_limiter, ), extras, ) @@ -155,6 +167,7 @@ impl SchedPool { reporter: R, feature_gate: FeatureGate, resource_ctl: Option>, + resource_mgr: Option>, ) -> Self { let builder = |pool_size: usize, name_prefix: &str| { let engine = Arc::new(Mutex::new(engine.clone())); @@ -181,6 +194,8 @@ impl SchedPool { destroy_tls_engine::(); tls_flush(&reporter); }) + .enable_task_wait_metrics() + .metric_idx_from_task_meta(Arc::new(priority_from_task_meta)) }; let vanilla = VanillaQueue { worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), @@ -191,6 +206,7 @@ impl SchedPool { worker_pool: builder(pool_size, "sched-worker-priority") .build_priority_future_pool(r.clone()), resource_ctl: r.clone(), + resource_mgr: resource_mgr.unwrap(), }); let queue_type = if resource_ctl.is_some() { QueueType::Dynamic diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 995c361e163c..6d087d894df6 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -472,6 +472,7 @@ impl TxnScheduler { reporter, feature_gate.clone(), resource_ctl, + resource_manager.clone(), ), control_mutex: Arc::new(tokio::sync::Mutex::new(false)), lock_mgr, @@ -1300,10 +1301,14 @@ impl TxnScheduler { // TODO: write bytes can be a bit inaccurate due to error requests or in-memory // pessimistic locks. sample.add_write_bytes(write_bytes); - // estimate the cpu time for write by the schdule cpu time and write bytes - let expected_dur = (sample.cpu_time() + Duration::from_micros(write_bytes as u64)) - * SCHEDULER_CPU_TIME_FACTOR; if let Some(limiter) = resource_limiter { + let expected_dur = if limiter.is_background() { + // estimate the cpu time for write by the schduling cpu time and write bytes + (sample.cpu_time() + Duration::from_micros(write_bytes as u64)) + * SCHEDULER_CPU_TIME_FACTOR + } else { + sample.cpu_time() + }; limiter .async_consume( expected_dur, @@ -2032,6 +2037,8 @@ mod tests { enable_async_apply_prewrite: false, ..Default::default() }; + let resource_manager = Arc::new(ResourceGroupManager::default()); + let controller = resource_manager.derive_controller("test".into(), false); ( TxnScheduler::new( engine.clone(), @@ -2049,11 +2056,8 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), latest_feature_gate(), - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - true, - ))), - None, + Some(controller), + Some(resource_manager), ), engine, ) @@ -2388,6 +2392,8 @@ mod tests { }; let feature_gate = FeatureGate::default(); feature_gate.set_version("6.0.0").unwrap(); + let resource_manager = Arc::new(ResourceGroupManager::default()); + let controller = resource_manager.derive_controller("test".into(), false); let scheduler = TxnScheduler::new( engine, @@ -2405,11 +2411,8 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), feature_gate.clone(), - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - true, - ))), - None, + Some(controller), + Some(resource_manager), ); // Use sync mode if pipelined_pessimistic_lock is false. assert_eq!(scheduler.pessimistic_lock_mode(), PessimisticLockMode::Sync); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 533d8d0abd4b..fec1ccc931db 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -352,12 +352,12 @@ fn test_scheduler_pool_auto_switch_for_resource_ctl() { .get(&1) .unwrap() .clone(); - let resource_manager = ResourceGroupManager::default(); + let resource_manager = Arc::new(ResourceGroupManager::default()); let resource_ctl = resource_manager.derive_controller("test".to_string(), true); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .config(cluster.cfg.tikv.storage.clone()) - .build_for_resource_controller(resource_ctl) + .build_for_resource_controller(resource_manager.clone(), resource_ctl) .unwrap(); let region = cluster.get_region(b"k1"); From eb28cf9927017b82d930ade81e1f844790f17ed3 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 13 Nov 2023 15:06:44 +0800 Subject: [PATCH 136/203] test: do not capture test output in make test_with_nextest (#15968) ref tikv/tikv#15967 Signed-off-by: glorv --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 103c502036e8..18ea19e48874 100644 --- a/Makefile +++ b/Makefile @@ -316,7 +316,7 @@ test: # Run tests with nextest. ifndef CUSTOM_TEST_COMMAND -test_with_nextest: export CUSTOM_TEST_COMMAND=nextest run +test_with_nextest: export CUSTOM_TEST_COMMAND=nextest run --nocapture endif test_with_nextest: export RUSTDOCFLAGS="-Z unstable-options --persist-doctests" test_with_nextest: From e29eae2fdc984042671c93541499cb4c20241066 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 13 Nov 2023 16:14:15 +0800 Subject: [PATCH 137/203] *: allow dynamic link openssl library (#15944) close tikv/tikv#15943 Currently, TiKV binaries are statically linked to the OpenSSL library, preventing the use of the host system's OpenSSL. This commit adds an option to build TiKV with dynamic linking of the OpenSSL library, enabling TiKV to utilize the host system's OpenSSL. This is particularly useful in FIPS scenarios where TiKV needs to delegate cryptographic operations to the host FIPS OpenSSL. By default, this option is disabled, and TiKV continues to statically link the OpenSSL library. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .dockerignore | 3 -- Cargo.lock | 6 +-- Cargo.toml | 17 +++++--- Dockerfile.FIPS | 43 +++++++++++++++++++ Makefile | 26 ++++++++--- cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-server/Cargo.toml | 1 + components/backup-stream/Cargo.toml | 2 +- components/encryption/Cargo.toml | 3 ++ components/encryption/export/Cargo.toml | 1 + components/encryption/src/io.rs | 13 +++++- scripts/check-bins.py | 57 +++++++++++++++++++------ src/lib.rs | 8 +++- 13 files changed, 147 insertions(+), 34 deletions(-) create mode 100644 Dockerfile.FIPS diff --git a/.dockerignore b/.dockerignore index b0a83d43c41a..4afd9fdf497d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,3 @@ -# This file is almost the same as .gitignore expect the next line. -.git - # OSX leaves these everywhere on SMB shares ._* diff --git a/Cargo.lock b/Cargo.lock index fba26935d1f6..89fa63ed848b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2950,7 +2950,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2969,7 +2969,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "bzip2-sys", "cc", @@ -4890,7 +4890,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index edebbc46f1dd..82846e98acf9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,17 @@ test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] test-engines-panic = ["engine_test/test-engines-panic"] pprof-fp = ["pprof/frame-pointer"] +openssl-vendored = [ + "openssl/vendored", + "hyper-tls/vendored", + # NB: the "openssl" feature does not make grpcio-sys v0.10 depends on + # openssl-sys, and it can not find the static openssl built by openssl-sys. + # Enabling "grpcio/openssl-vendored" explicitly makes grpcio-sys depends on + # openssl-sys and correctly links to the static openssl. + "grpcio/openssl-vendored", + # NB: Enable SM4 support if OpenSSL is built from source and statically linked. + "encryption_export/sm4", +] # for testing configure propegate to other crates # https://stackoverflow.com/questions/41700543/can-we-share-test-utilites-between-crates @@ -358,7 +369,7 @@ tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -grpcio = { version = "0.10.4", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } +grpcio = { version = "0.10.4", default-features = false, features = ["openssl", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10.4", default-features = false, features = ["protobuf-codec"] } tipb = { git = "https://github.com/pingcap/tipb.git" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } @@ -380,10 +391,6 @@ opt-level = 1 debug = false opt-level = 1 -[profile.dev.package.tirocks-sys] -debug = false -opt-level = 1 - [profile.dev.package.tests] debug = 1 opt-level = 1 diff --git a/Dockerfile.FIPS b/Dockerfile.FIPS new file mode 100644 index 000000000000..fe34ab00f656 --- /dev/null +++ b/Dockerfile.FIPS @@ -0,0 +1,43 @@ +# This Docker image contains a minimal build environment for a FIPS compliant TiKV. + +FROM redhat/ubi8-minimal:8.6 as builder + +RUN microdnf install -y openssl-devel + +RUN microdnf install -y \ + gcc \ + gcc-c++ \ + libstdc++-static \ + make \ + cmake \ + perl \ + git \ + findutils \ + curl \ + python3 && \ + microdnf clean all + +# Install Rustup +RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y +ENV PATH /root/.cargo/bin/:$PATH + +# Checkout TiKV source code. +WORKDIR /tikv +COPY .git .git +ARG GIT_HASH +RUN git checkout ${GIT_HASH} && git checkout . + +# Do not static link OpenSSL. +ENV ENABLE_FIPS 1 +RUN make build_dist_release + +# Export to a clean image +FROM redhat/ubi8-minimal:8.6 +COPY --from=builder /tikv/target/release/tikv-server /tikv-server +COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl + +RUN microdnf install -y openssl + +EXPOSE 20160 20180 + +ENTRYPOINT ["/tikv-server"] diff --git a/Makefile b/Makefile index 18ea19e48874..b54d44036691 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,19 @@ ENABLE_FEATURES += cloud-gcp ENABLE_FEATURES += cloud-azure endif +export DOCKER_FILE ?= Dockerfile +export DOCKER_IMAGE_NAME ?= pingcap/tikv +export DOCKER_IMAGE_TAG ?= latest +export DEV_DOCKER_IMAGE_NAME ?= pingcap/tikv_dev +export ENABLE_FIPS ?= 0 + +ifeq ($(ENABLE_FIPS),1) +DOCKER_IMAGE_TAG := ${DOCKER_IMAGE_TAG}-fips +DOCKER_FILE := ${DOCKER_FILE}.FIPS +else +ENABLE_FEATURES += openssl-vendored +endif + PROJECT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) BIN_PATH = $(CURDIR)/bin @@ -135,10 +148,6 @@ export TIKV_BUILD_GIT_HASH ?= $(shell git rev-parse HEAD 2> /dev/null || echo ${ export TIKV_BUILD_GIT_TAG ?= $(shell git describe --tag || echo ${BUILD_INFO_GIT_FALLBACK}) export TIKV_BUILD_GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2> /dev/null || echo ${BUILD_INFO_GIT_FALLBACK}) -export DOCKER_IMAGE_NAME ?= "pingcap/tikv" -export DOCKER_IMAGE_TAG ?= "latest" -export DEV_DOCKER_IMAGE_NAME ?= "pingcap/tikv_dev" - # Turn on cargo pipelining to add more build parallelism. This has shown decent # speedups in TiKV. # @@ -155,6 +164,12 @@ ifeq ($(TIKV_BUILD_RUSTC_TARGET),aarch64-unknown-linux-gnu) export RUSTFLAGS := $(RUSTFLAGS) -Ctarget-feature=-outline-atomics endif +ifeq ($(shell basename $(shell which python 2>/dev/null)),python) +PY := python +else +PY := python3 +endif + # Almost all the rules in this Makefile are PHONY # Declaring a rule as PHONY could improve correctness # But probably instead just improves performance by a little bit @@ -248,7 +263,7 @@ dist_release: @mkdir -p ${BIN_PATH} @cp -f ${CARGO_TARGET_DIR}/release/tikv-ctl ${CARGO_TARGET_DIR}/release/tikv-server ${BIN_PATH}/ ifeq ($(shell uname),Linux) # Macs binary isn't elf format - @python scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server + $(PY) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server endif # Build with release flag as if it were for distribution, but without @@ -393,6 +408,7 @@ error-code: etc/error_code.toml docker: docker build \ -t ${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + -f ${DOCKER_FILE} \ --build-arg GIT_HASH=${TIKV_BUILD_GIT_HASH} \ --build-arg GIT_TAG=${TIKV_BUILD_GIT_TAG} \ --build-arg GIT_BRANCH=${TIKV_BUILD_GIT_BRANCH} \ diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 0a630ebc023a..e16fadf0836a 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -24,6 +24,7 @@ cloud-gcp = [ cloud-azure = [ "encryption_export/cloud-azure", ] +openssl-vendored = ["tikv/openssl-vendored"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 409dc84a62d8..ef278854dd75 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -18,6 +18,7 @@ failpoints = ["server/failpoints"] cloud-aws = ["server/cloud-aws"] cloud-gcp = ["server/cloud-gcp"] cloud-azure = ["server/cloud-azure"] +openssl-vendored = ["tikv/openssl-vendored"] test-engine-kv-rocksdb = [ "server/test-engine-kv-rocksdb" ] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 12979eab2123..1308d10966f8 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -41,7 +41,7 @@ engine_traits = { workspace = true } error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"], optional = true } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl"], optional = true } external_storage = { workspace = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 021c9f230026..336f2e1854f1 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -6,6 +6,9 @@ publish = false [features] failpoints = ["fail/failpoints"] +# openssl/vendored is necssary in order to conditionally building SM4 encryption +# support, as SM4 is disabled on various openssl distributions, such as Rocky Linux 9. +sm4 = ["openssl/vendored"] [dependencies] async-trait = "0.1" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index c1891a934806..829e33ae5aaa 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -9,6 +9,7 @@ default = ["cloud-aws", "cloud-gcp", "cloud-azure"] cloud-aws = ["aws"] cloud-gcp = [] cloud-azure = ["azure"] +sm4 = ["encryption/sm4"] [dependencies] async-trait = "0.1" diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index d7b7eb76b8a5..dc326e784272 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -390,7 +390,18 @@ pub fn create_aes_ctr_crypter( EncryptionMethod::Aes128Ctr => OCipher::aes_128_ctr(), EncryptionMethod::Aes192Ctr => OCipher::aes_192_ctr(), EncryptionMethod::Aes256Ctr => OCipher::aes_256_ctr(), - EncryptionMethod::Sm4Ctr => OCipher::sm4_ctr(), + EncryptionMethod::Sm4Ctr => { + #[cfg(feature = "sm4")] + { + OCipher::sm4_ctr() + } + #[cfg(not(feature = "sm4"))] + { + return Err(box_err!( + "sm4-ctr is not supported by dynamically linked openssl" + )); + } + } }; let crypter = OCrypter::new(cipher, mode, key, Some(iv.as_slice()))?; Ok((cipher, crypter)) diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 1255472a76a0..421a4df5ef4b 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -21,6 +21,22 @@ SYS_LIB = ["libstdc++"] +def ensure_link(args, require_static, libs): + p = os.popen("uname") + if "Linux" not in p.readline(): + return + for bin in args: + p = os.popen("ldd " + bin) + requires = set(l.split()[0] for l in p.readlines()) + for lib in libs: + if any(lib in r for r in requires): + if require_static: + pr("error: %s should not requires dynamic library %s\n" % (bin, lib)) + sys.exit(1) + elif not require_static: + pr("error: %s should requires dynamic library %s\n" % (bin, lib)) + sys.exit(1) + def pr(s): if sys.stdout.isatty(): sys.stdout.write("\x1b[2K\r" + s) @@ -72,6 +88,24 @@ def check_sse(executable): print("fix this by building tikv with ROCKSDB_SYS_SSE=1") sys.exit(1) +def is_openssl_vendored_enabled(features): + return "openssl-vendored" in features + +def check_openssl(executable, is_static_link): + openssl_libs = ["libcrypto", "libssl"] + ensure_link([executable], is_static_link, openssl_libs) + if is_static_link: + return + openssl_symbols = ["EVP_", "OPENSSL"] + p = os.popen('nm %s | grep -iE " (t|T) (%s)"' % (executable, "|".join(openssl_symbols))) + lines = p.readlines() + if lines: + pr( + "error: %s contains OpenSSL symbol %s in text section:\n%s\n" + % (executable, openssl_symbols, "".join(lines)) + ) + sys.exit(1) + def check_tests(features): if not is_jemalloc_enabled(features): print("jemalloc not enabled, skip check!") @@ -95,28 +129,22 @@ def check_tests(features): pr("Checking binary %s" % name) check_jemalloc(executable) + check_openssl(executable, True) pr("") print("Done, takes %.2fs." % (time.time() - start)) -def ensure_link(args): - p = os.popen("uname") - if "Linux" not in p.readline(): - return - for bin in args: - p = os.popen("ldd " + bin) - requires = set(l.split()[0] for l in p.readlines()) - for lib in SYS_LIB: - if any(lib in r for r in requires): - pr("error: %s should not requires dynamic library %s\n" % (bin, lib)) - sys.exit(1) - def check_release(enabled_features, args): - ensure_link(args) + # Ensure statically link SYS_LIB. + ensure_link(args, True, SYS_LIB) checked_features = [] if is_jemalloc_enabled(enabled_features): checked_features.append("jemalloc") if is_sse_enabled(enabled_features): checked_features.append("SSE4.2") + if is_openssl_vendored_enabled(enabled_features): + checked_features.append("static-link-openssl") + else: + checked_features.append("dynamic-link-openssl") if not checked_features: print("Both jemalloc and SSE4.2 are disabled, skip check") return @@ -127,7 +155,8 @@ def check_release(enabled_features, args): check_jemalloc(arg) if is_sse_enabled(enabled_features): check_sse(arg) - pr("%s %s \033[32menabled\033[0m\n" % (arg, " ".join(checked_features))) + check_openssl(arg, is_openssl_vendored_enabled(enabled_features)) + pr("%s [%s] \033[32menabled\033[0m\n" % (arg, " ".join(checked_features))) def main(): argv = sys.argv diff --git a/src/lib.rs b/src/lib.rs index b3e9ebaf8e84..a0ccff3c8cbb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,8 +79,12 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { } /// return the build version of tikv-server -pub fn tikv_build_version() -> &'static str { - env!("CARGO_PKG_VERSION") +pub fn tikv_build_version() -> String { + if option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") + } else { + env!("CARGO_PKG_VERSION").to_owned() + } } /// Prints the tikv version information to the standard output. From a50e36052a8c3c470b6ba96ab515c7a7ab067525 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 13 Nov 2023 16:38:14 +0800 Subject: [PATCH 138/203] backup-stream: remove dead code (#15976) close tikv/tikv#15975 Since v7.1.0, TiKV no longer connect PD etcd API directly, instead TiKV uses PD meta storage API. The code and crates guarded by "metastore-etcd" feature became dead code essentially. Signed-off-by: Neil Shen --- Cargo.lock | 314 +-------- components/backup-stream/Cargo.toml | 7 - components/backup-stream/src/errors.rs | 25 - components/backup-stream/src/metadata/mod.rs | 2 - .../backup-stream/src/metadata/store/etcd.rs | 627 ------------------ .../src/metadata/store/lazy_etcd.rs | 316 --------- .../backup-stream/src/metadata/store/mod.rs | 8 - components/error_code/src/backup_stream.rs | 3 - 8 files changed, 3 insertions(+), 1299 deletions(-) delete mode 100644 components/backup-stream/src/metadata/store/etcd.rs delete mode 100644 components/backup-stream/src/metadata/store/lazy_etcd.rs diff --git a/Cargo.lock b/Cargo.lock index 89fa63ed848b..d629d2ac18a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -184,17 +184,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58982858be7540a465c790b95aaea6710e5139bf8956b1d1344d014fa40100b0" dependencies = [ - "async-stream-impl 0.2.0", - "futures-core", -] - -[[package]] -name = "async-stream" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" -dependencies = [ - "async-stream-impl 0.3.3", + "async-stream-impl", "futures-core", ] @@ -209,17 +199,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "async-stream-impl" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.103", -] - [[package]] name = "async-trait" version = "0.1.58" @@ -299,51 +278,6 @@ dependencies = [ "uuid 0.8.2", ] -[[package]] -name = "axum" -version = "0.5.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acee9fd5073ab6b045a275b3e709c163dd36c90685219cb21804a147b58dba43" -dependencies = [ - "async-trait", - "axum-core", - "bitflags", - "bytes", - "futures-util", - "http", - "http-body", - "hyper", - "itoa 1.0.1", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "serde", - "sync_wrapper", - "tokio", - "tower", - "tower-http", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e5939e02c56fecd5c017c37df4238c0a839fa76b7f97acdd7efb804fd181cc" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "mime", - "tower-layer", - "tower-service", -] - [[package]] name = "azure" version = "0.0.1" @@ -546,7 +480,6 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "cfg-if 1.0.0", "chrono", "concurrency_manager", "crossbeam", @@ -557,7 +490,6 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "etcd-client", "external_storage", "fail", "file_system", @@ -599,7 +531,6 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic", "txn_types", "url", "uuid 0.8.2", @@ -698,7 +629,7 @@ dependencies = [ "lazy_static", "lazycell", "peeking_take_while", - "prettyplease 0.2.6", + "prettyplease", "proc-macro2", "quote", "regex", @@ -1798,25 +1729,6 @@ dependencies = [ "tikv_alloc", ] -[[package]] -name = "etcd-client" -version = "0.10.2" -source = "git+https://github.com/pingcap/etcd-client?rev=41d393c32a7a7c728550cee1d9a138dafe6f3e27#41d393c32a7a7c728550cee1d9a138dafe6f3e27" -dependencies = [ - "http", - "hyper", - "hyper-openssl", - "openssl", - "prost", - "tokio", - "tokio-stream", - "tonic", - "tonic-build", - "tower", - "tower-service", - "visible", -] - [[package]] name = "event-listener" version = "2.5.1" @@ -1974,12 +1886,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "flate2" version = "1.0.11" @@ -2557,12 +2463,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" - [[package]] name = "http-types" version = "2.12.0" @@ -2643,18 +2543,6 @@ dependencies = [ "tower-layer", ] -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - [[package]] name = "hyper-tls" version = "0.5.0" @@ -3076,12 +2964,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -[[package]] -name = "matchit" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" - [[package]] name = "md-5" version = "0.9.1" @@ -3829,16 +3711,6 @@ dependencies = [ "ucd-trie", ] -[[package]] -name = "petgraph" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" -dependencies = [ - "fixedbitset", - "indexmap", -] - [[package]] name = "phf" version = "0.9.0" @@ -4002,16 +3874,6 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" -[[package]] -name = "prettyplease" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" -dependencies = [ - "proc-macro2", - "syn 1.0.103", -] - [[package]] name = "prettyplease" version = "0.2.6" @@ -4131,61 +3993,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "prost" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0841812012b2d4a6145fae9a6af1534873c32aa67fff26bd09f8fa42c83f95a" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8b442418ea0822409d9e7d047cbf1e7e9e1760b172bf9982cf29d517c93511" -dependencies = [ - "bytes", - "heck 0.4.1", - "itertools", - "lazy_static", - "log", - "multimap", - "petgraph", - "prettyplease 0.1.21", - "prost", - "prost-types", - "regex", - "syn 1.0.103", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164ae68b6587001ca506d3bf7f1000bfa248d0e1217b618108fba4ec1d0cc306" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 1.0.103", -] - -[[package]] -name = "prost-types" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747761bc3dc48f9a34553bf65605cf6cb6288ba219f3450b4275dbd81539551a" -dependencies = [ - "bytes", - "prost", -] - [[package]] name = "protobuf" version = "2.8.0" @@ -6484,7 +6291,7 @@ version = "7.6.0-alpha" dependencies = [ "anyhow", "api_version", - "async-stream 0.2.0", + "async-stream", "async-trait", "backtrace", "batch-system", @@ -6931,16 +6738,6 @@ dependencies = [ "futures 0.1.31", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "1.7.0" @@ -7020,90 +6817,6 @@ dependencies = [ "serde", ] -[[package]] -name = "tonic" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55b9af819e54b8f33d453655bef9b9acc171568fb49523078d0cc4e7484200ec" -dependencies = [ - "async-stream 0.3.3", - "async-trait", - "axum", - "base64 0.13.0", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost", - "prost-derive", - "tokio", - "tokio-stream", - "tokio-util", - "tower", - "tower-layer", - "tower-service", - "tracing", - "tracing-futures", -] - -[[package]] -name = "tonic-build" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c6fd7c2581e36d63388a9e04c350c21beb7a8b059580b2e93993c526899ddc" -dependencies = [ - "prettyplease 0.1.21", - "proc-macro2", - "prost-build", - "quote", - "syn 1.0.103", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower-http" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c530c8675c1dbf98facee631536fa116b5fb6382d7dd6dc1b118d970eafe3ba" -dependencies = [ - "bitflags", - "bytes", - "futures-core", - "futures-util", - "http", - "http-body", - "http-range-header", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-layer" version = "0.3.1" @@ -7123,7 +6836,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f" dependencies = [ "cfg-if 1.0.0", - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -7149,16 +6861,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "tracing-futures" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" -dependencies = [ - "pin-project", - "tracing", -] - [[package]] name = "tracker" version = "0.0.1" @@ -7358,16 +7060,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "visible" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a044005fd5c0fc1ebd79c622e5606431c6b879a6a19acafb754be9926a2de73e" -dependencies = [ - "quote", - "syn 1.0.103", -] - [[package]] name = "waker-fn" version = "1.1.0" diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 1308d10966f8..141954686c38 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -11,8 +11,6 @@ test-engines-rocksdb = ["tikv/test-engines-rocksdb"] failpoints = ["tikv/failpoints", "fail/failpoints"] backup-stream-debug = [] -metastore-etcd = ["tonic", "etcd-client"] - [[test]] name = "integration" path = "tests/integration/mod.rs" @@ -30,7 +28,6 @@ harness = true async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" -cfg-if = "1" chrono = "0.4" concurrency_manager = { workspace = true } crossbeam = "0.8" @@ -39,9 +36,6 @@ dashmap = "5" engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } -# We cannot update the etcd-client to latest version because of the cyclic requirement. -# Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl"], optional = true } external_storage = { workspace = true } fail = "0.5" file_system = { workspace = true } @@ -78,7 +72,6 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } -tonic = { version = "0.8", optional = true } txn_types = { workspace = true } uuid = "0.8" yatp = { workspace = true } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index c3cc91da9ff5..67461e2978b2 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -5,8 +5,6 @@ use std::{ }; use error_code::ErrorCodeExt; -#[cfg(feature = "metastore-etcd")] -use etcd_client::Error as EtcdError; use grpcio::Error as GrpcError; use kvproto::{errorpb::Error as StoreError, metapb::*}; use pd_client::Error as PdError; @@ -22,9 +20,6 @@ use crate::{endpoint::Task, metrics}; pub enum Error { #[error("gRPC meet error {0}")] Grpc(#[from] GrpcError), - #[cfg(feature = "metasotre-etcd")] - #[error("Etcd meet error {0}")] - Etcd(#[from] EtcdErrorExt), #[error("Protobuf meet error {0}")] Protobuf(#[from] ProtobufError), #[error("No such task {task_name:?}")] @@ -54,30 +49,10 @@ pub enum Error { Other(#[from] Box), } -#[cfg(feature = "metastore-etcd")] -impl From for Error { - fn from(value: EtcdError) -> Self { - Self::Etcd(value.into()) - } -} - -#[cfg(feature = "metastore-etcd")] -#[derive(ThisError, Debug)] -pub enum EtcdErrorExt { - #[error("{0}")] - Normal(#[from] EtcdError), - #[error("the watch canceled")] - WatchCanceled, - #[error("the required revision has been compacted, current is {current}")] - RevisionCompacted { current: i64 }, -} - impl ErrorCodeExt for Error { fn error_code(&self) -> error_code::ErrorCode { use error_code::backup_stream::*; match self { - #[cfg(feature = "metastore-etcd")] - Error::Etcd(_) => ETCD, Error::Protobuf(_) => PROTO, Error::NoSuchTask { .. } => NO_SUCH_TASK, Error::MalformedMetadata(_) => MALFORMED_META, diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index a96e2f9bcb67..1150c2932bd7 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -8,5 +8,3 @@ pub mod store; pub mod test; pub use client::{Checkpoint, CheckpointProvider, MetadataClient, MetadataEvent, StreamTask}; -#[cfg(feature = "metastore-etcd")] -pub use store::lazy_etcd::{ConnectionConfig, LazyEtcdClient}; diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs deleted file mode 100644 index 62a246a08ef2..000000000000 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ /dev/null @@ -1,627 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - cmp::Ordering, - collections::{HashMap, HashSet}, - pin::Pin, - sync::{Arc, Weak}, - time::Duration, -}; - -use async_trait::async_trait; -use etcd_client::{ - Client, Compare, CompareOp, DeleteOptions, EventType, GetOptions, Member, PutOptions, - SortOrder, SortTarget, Txn, TxnOp, WatchOptions, -}; -use futures::StreamExt; -use tikv_util::{info, warn}; -use tokio::sync::Mutex; -use tokio_stream::Stream; - -use super::{ - GetExtra, GetResponse, Keys, KvChangeSubscription, KvEventType, MetaStore, Snapshot, - TransactionOp, -}; -use crate::{ - annotate, - errors::{Error, EtcdErrorExt, Result}, - metadata::{ - keys::{KeyValue, MetaKey}, - metrics::METADATA_KEY_OPERATION, - store::{KvEvent, Subscription}, - }, -}; -// Can we get rid of the mutex? (which means, we must use a singleton client.) -// Or make a pool of clients? -#[derive(Clone)] -pub struct EtcdStore(Arc>); - -#[derive(Default)] -pub(super) struct TopologyUpdater { - last_urls: HashSet, - client: Weak>, - - // back off configs - pub(super) loop_interval: Duration, - pub(super) loop_failure_back_off: Duration, -} - -impl std::fmt::Debug for TopologyUpdater { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TopologyUpdater") - .field("last_urls", &self.last_urls) - .finish() - } -} - -#[async_trait] -pub(super) trait ClusterInfoProvider { - async fn get_members(&mut self) -> Result>; - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()>; - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()>; -} - -#[async_trait] -impl ClusterInfoProvider for Client { - async fn get_members(&mut self) -> Result> { - let result = self.member_list().await?; - Ok(result.members().to_vec()) - } - - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { - Client::add_endpoint(self, endpoint) - .await - .map_err(|err| annotate!(err, "during adding the endpoint {}", endpoint))?; - Ok(()) - } - - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { - Client::remove_endpoint(self, endpoint) - .await - .map_err(|err| annotate!(err, "during removing the endpoint {}", endpoint))?; - Ok(()) - } -} - -#[derive(Debug, Clone, Copy)] -enum DiffType { - Add, - Remove, -} - -#[derive(Clone)] -struct Diff { - diff_type: DiffType, - url: String, -} - -impl std::fmt::Debug for Diff { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let syn = match self.diff_type { - DiffType::Add => "+", - DiffType::Remove => "-", - }; - write!(f, "{}{}", syn, self.url) - } -} - -impl TopologyUpdater { - // Note: we may require the initial endpoints from the arguments directly. - // So the internal map won't get inconsistent when the cluster config changed - // during initializing. - // But that is impossible for now because we cannot query the node ID before - // connecting. - pub fn new(cluster_ref: Weak>) -> Self { - Self { - last_urls: Default::default(), - client: cluster_ref, - - loop_interval: Duration::from_secs(60), - loop_failure_back_off: Duration::from_secs(10), - } - } - - pub fn init(&mut self, members: impl Iterator) { - for mem in members { - self.last_urls.insert(mem); - } - } - - fn diff(&self, incoming: &[Member]) -> Vec { - let newer = incoming - .iter() - .flat_map(|mem| mem.client_urls().iter()) - .collect::>(); - let mut result = vec![]; - for url in &newer { - if !self.last_urls.contains(*url) { - result.push(Diff { - diff_type: DiffType::Add, - url: String::clone(url), - }) - } - } - for url in &self.last_urls { - if !newer.contains(url) { - result.push(Diff { - diff_type: DiffType::Remove, - url: String::clone(url), - }) - } - } - result - } - - fn apply(&mut self, diff: &Diff) -> Option { - match diff.diff_type { - DiffType::Add => match self.last_urls.insert(diff.url.clone()) { - true => None, - false => Some(format!( - "the member to adding with url {} overrides existing urls.", - diff.url - )), - }, - DiffType::Remove => match self.last_urls.remove(&diff.url) { - true => None, - false => Some(format!( - "the member to remove with url {} hasn't been added.", - diff.url - )), - }, - } - } - - async fn update_topology_by(&mut self, cli: &mut C, diff: &Diff) -> Result<()> { - match diff.diff_type { - DiffType::Add => cli.add_endpoint(&diff.url).await?, - DiffType::Remove => cli.remove_endpoint(&diff.url).await?, - } - Ok(()) - } - - async fn do_update(&mut self, cli: &mut C) -> Result<()> { - let cluster = cli.get_members().await?; - let diffs = self.diff(cluster.as_slice()); - if !diffs.is_empty() { - info!("log backup updating store topology."; "diffs" => ?diffs, "current_state" => ?self); - } - for diff in diffs { - match self.apply(&diff) { - Some(warning) => { - warn!("log backup meet some wrong status when updating PD clients, skipping this update."; "warn" => %warning); - } - None => self.update_topology_by(cli, &diff).await?, - } - } - Result::Ok(()) - } - - pub(super) async fn update_topology_loop(&mut self) { - while let Some(cli) = self.client.upgrade() { - let mut lock = cli.lock().await; - let result = self.do_update(&mut lock).await; - drop(lock); - match result { - Ok(_) => tokio::time::sleep(self.loop_interval).await, - Err(err) => { - err.report("during updating etcd topology"); - tokio::time::sleep(self.loop_failure_back_off).await; - } - } - } - } - - pub async fn main_loop(mut self) { - info!("log backup topology updater finish initialization."; "current_state" => ?self); - self.update_topology_loop().await - } -} - -impl EtcdStore { - pub fn connect, S: AsRef<[E]>>(endpoints: S) -> Self { - // TODO remove block_on - let cli = - futures::executor::block_on(etcd_client::Client::connect(&endpoints, None)).unwrap(); - Self(Arc::new(Mutex::new(cli))) - } - - pub fn inner(&self) -> &Arc> { - &self.0 - } -} - -impl From for EtcdStore { - fn from(cli: etcd_client::Client) -> Self { - Self(Arc::new(Mutex::new(cli))) - } -} - -impl From for KvEventType { - fn from(e: EventType) -> Self { - match e { - EventType::Put => Self::Put, - EventType::Delete => Self::Delete, - } - } -} - -impl From for KeyValue { - fn from(kv: etcd_client::KeyValue) -> Self { - // TODO: we can move out the vector in the KeyValue struct here. (instead of - // copying.) But that isn't possible for now because: - // - The raw KV pair(defined by the protocol buffer of etcd) is private. - // - That did could be exported by `pub-fields` feature of the client. However - // that feature isn't published in theirs Cargo.toml (Is that a mistake?). - // - Indeed, we can use `mem::transmute` here because `etcd_client::KeyValue` - // has `#[repr(transparent)]`. But before here become a known bottle neck, I'm - // not sure whether it's worthwhile for involving unsafe code. - KeyValue(MetaKey(kv.key().to_owned()), kv.value().to_owned()) - } -} - -/// Prepare the etcd options required by the keys. -/// Return the start key for requesting. -macro_rules! prepare_opt { - ($opt:ident, $keys:expr) => { - match $keys { - Keys::Prefix(key) => { - $opt = $opt.with_prefix(); - key - } - Keys::Range(key, end_key) => { - $opt = $opt.with_range(end_key); - key - } - Keys::Key(key) => key, - } - }; -} - -#[async_trait] -impl MetaStore for EtcdStore { - type Snap = EtcdSnapshot; - - async fn snapshot(&self) -> Result { - let status = self.0.lock().await.status().await?; - Ok(EtcdSnapshot { - store: self.clone(), - revision: status.header().unwrap().revision(), - }) - } - - async fn watch(&self, keys: Keys, start_rev: i64) -> Result { - let mut opt = WatchOptions::new(); - let key = prepare_opt!(opt, keys); - opt = opt.with_start_revision(start_rev); - let (mut watcher, stream) = self.0.lock().await.watch(key, Some(opt)).await?; - Ok(Subscription { - stream: Box::pin(stream.flat_map( - |events| -> Pin> + Send>> { - match events { - Err(err) => Box::pin(tokio_stream::once(Err(err.into()))), - Ok(events) => { - if events.compact_revision() > 0 && events.canceled() { - return Box::pin(tokio_stream::once(Err(Error::Etcd( - EtcdErrorExt::RevisionCompacted { - current: events.compact_revision(), - }, - )))); - } - if events.canceled() { - return Box::pin(tokio_stream::once(Err(Error::Etcd( - EtcdErrorExt::WatchCanceled, - )))); - } - Box::pin(tokio_stream::iter( - // TODO: remove the copy here via access the protobuf field - // directly. - #[allow(clippy::unnecessary_to_owned)] - events.events().to_owned().into_iter().filter_map(|event| { - let kv = event.kv()?; - Some(Ok(KvEvent { - kind: event.event_type().into(), - pair: kv.clone().into(), - })) - }), - )) - } - } - }, - )), - cancel: Box::pin(async move { - if let Err(err) = watcher.cancel().await { - warn!("failed to cancel watch stream!"; "err" => %err); - } - }), - }) - } - - async fn txn(&self, t: super::Transaction) -> Result<()> { - let mut cli = self.0.lock().await; - let txns = Self::make_txn(&mut cli, t).await?; - for txn in txns { - cli.txn(txn).await?; - } - Ok(()) - } - - async fn set(&self, pair: KeyValue) -> Result<()> { - self.0.lock().await.put(pair.0, pair.1, None).await?; - Ok(()) - } - - async fn delete(&self, keys: Keys) -> Result<()> { - let mut opt = DeleteOptions::new(); - let key = prepare_opt!(opt, keys); - - self.0.lock().await.delete(key, Some(opt)).await?; - Ok(()) - } - - async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { - let mut cli = self.0.lock().await; - let txn = Self::make_conditional_txn(&mut cli, txn).await?; - cli.txn(txn).await?; - Ok(()) - } -} - -impl EtcdStore { - fn collect_leases_needed(txn: &super::Transaction) -> HashSet { - txn.ops - .iter() - .filter_map(|op| match op { - TransactionOp::Put(_, opt) if opt.ttl.as_secs() > 0 => Some(opt.ttl), - _ => None, - }) - .collect() - } - - async fn make_leases( - cli: &mut Client, - needed: HashSet, - ) -> Result> { - let mut map = HashMap::with_capacity(needed.len()); - for lease_time in needed { - let lease_id = cli.lease_grant(lease_time.as_secs() as _, None).await?.id(); - map.insert(lease_time, lease_id); - } - Ok(map) - } - - fn partition_txns(mut txn: super::Transaction, leases: HashMap) -> Vec { - txn.ops - .chunks_mut(128) - .map(|txn| Txn::default().and_then(Self::to_txn(txn, &leases))) - .collect() - } - - fn to_compare(cond: super::Condition) -> Compare { - let op = match cond.result { - Ordering::Less => CompareOp::Less, - Ordering::Equal => CompareOp::Equal, - Ordering::Greater => CompareOp::Greater, - }; - Compare::value(cond.over_key, op, cond.arg) - } - - /// Convert the transaction operations to etcd transaction ops. - fn to_txn(ops: &mut [super::TransactionOp], leases: &HashMap) -> Vec { - ops.iter_mut().map(|op| match op { - TransactionOp::Put(key, opt) => { - let opts = if opt.ttl.as_secs() > 0 { - let lease = leases.get(&opt.ttl); - match lease { - None => { - warn!("lease not found, the request key may not have a ttl"; "dur" => ?opt.ttl); - None - } - Some(lease_id) => { - Some(PutOptions::new().with_lease(*lease_id)) - } - } - } else { - None - }; - TxnOp::put(key.take_key(), key.take_value(), opts) - }, - TransactionOp::Delete(rng) => { - let rng = std::mem::replace(rng, Keys::Key(MetaKey(vec![]))); - let mut opt = DeleteOptions::new(); - let key = prepare_opt!(opt, rng); - TxnOp::delete(key, Some(opt)) - }, - }).collect::>() - } - - /// Make a conditional txn. - /// For now, this wouldn't split huge transaction into smaller ones, - /// so when playing with etcd in PD, conditional transaction should be - /// small. - async fn make_conditional_txn( - cli: &mut Client, - mut txn: super::CondTransaction, - ) -> Result { - let cond = Self::to_compare(txn.cond); - - let mut leases_needed = Self::collect_leases_needed(&txn.success); - leases_needed.extend(Self::collect_leases_needed(&txn.failure).into_iter()); - let leases = Self::make_leases(cli, leases_needed).await?; - let success = Self::to_txn(&mut txn.success.ops, &leases); - let failure = Self::to_txn(&mut txn.failure.ops, &leases); - Ok(Txn::new().when([cond]).and_then(success).or_else(failure)) - } - - async fn make_txn(cli: &mut Client, etcd_txn: super::Transaction) -> Result> { - let (put_cnt, delete_cnt) = etcd_txn.ops.iter().fold((0, 0), |(p, d), item| match item { - TransactionOp::Put(..) => (p + 1, d), - TransactionOp::Delete(_) => (p, d + 1), - }); - METADATA_KEY_OPERATION - .with_label_values(&["put"]) - .inc_by(put_cnt); - METADATA_KEY_OPERATION - .with_label_values(&["del"]) - .inc_by(delete_cnt); - let needed_leases = Self::collect_leases_needed(&etcd_txn); - let leases = Self::make_leases(cli, needed_leases).await?; - let txns = Self::partition_txns(etcd_txn, leases); - Ok(txns) - } -} - -pub struct EtcdSnapshot { - store: EtcdStore, - revision: i64, -} - -#[async_trait] -impl Snapshot for EtcdSnapshot { - async fn get_extra(&self, keys: Keys, extra: GetExtra) -> Result { - let mut opt = GetOptions::new(); - let key = prepare_opt!(opt, keys); - opt = opt.with_revision(self.revision); - if extra.desc_order { - opt = opt.with_sort(SortTarget::Key, SortOrder::Descend); - } - if extra.limit > 0 { - opt = opt.with_limit(extra.limit as _); - } - let resp = self.store.0.lock().await.get(key.0, Some(opt)).await?; - Ok(GetResponse { - kvs: resp - .kvs() - .iter() - .map(|kv| KeyValue(MetaKey(kv.key().to_owned()), kv.value().to_owned())) - .collect(), - more: resp.more(), - }) - } - - fn revision(&self) -> i64 { - self.revision - } -} - -#[cfg(test)] -mod test { - use std::{ - collections::{HashMap, HashSet}, - fmt::Display, - sync::Arc, - time::Duration, - }; - - use async_trait::async_trait; - use etcd_client::{proto::PbMember, Member}; - use tokio::{sync::Mutex, time::timeout}; - - use super::{ClusterInfoProvider, TopologyUpdater}; - use crate::errors::Result; - - #[derive(Default, Debug)] - struct FakeCluster { - id_alloc: u64, - members: HashMap, - endpoints: HashSet, - } - - #[async_trait] - impl ClusterInfoProvider for FakeCluster { - async fn get_members(&mut self) -> Result> { - let members = self.members.values().cloned().collect(); - Ok(members) - } - - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { - self.endpoints.insert(endpoint.to_owned()); - Ok(()) - } - - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { - self.endpoints.remove(endpoint); - Ok(()) - } - } - - impl FakeCluster { - fn new_id(&mut self) -> u64 { - let i = self.id_alloc; - self.id_alloc += 1; - i - } - - fn init_with_member(&mut self, n: usize) -> Vec { - let mut endpoints = Vec::with_capacity(n); - for _ in 0..n { - let mem = self.add_member(); - let url = format!("fakestore://{}", mem); - self.endpoints.insert(url.clone()); - endpoints.push(url); - } - endpoints - } - - fn add_member(&mut self) -> u64 { - let id = self.new_id(); - let mut mem = PbMember::default(); - mem.id = id; - mem.client_ur_ls = vec![format!("fakestore://{}", id)]; - // Safety: `Member` is #[repr(transparent)]. - self.members.insert(id, unsafe { std::mem::transmute(mem) }); - id - } - - fn remove_member(&mut self, id: u64) -> bool { - self.members.remove(&id).is_some() - } - - fn check_consistency(&self, message: impl Display) { - let urls = self - .members - .values() - .flat_map(|mem| mem.client_urls().iter().cloned()) - .collect::>(); - assert_eq!( - urls, self.endpoints, - "{}: consistency check not passed.", - message - ); - } - } - - #[test] - fn test_topology_updater() { - let mut c = FakeCluster::default(); - let eps = c.init_with_member(3); - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - - let sc = Arc::new(Mutex::new(c)); - let mut tu = TopologyUpdater::new(Arc::downgrade(&sc)); - tu.loop_failure_back_off = Duration::ZERO; - tu.loop_interval = Duration::from_millis(100); - tu.init(eps.into_iter()); - - { - let mut sc = sc.blocking_lock(); - sc.check_consistency("after init"); - sc.add_member(); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("adding nodes"); - sc.add_member(); - sc.add_member(); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("adding more nodes"); - assert!(sc.remove_member(0), "{:?}", sc); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("removing nodes"); - } - - drop(sc); - rt.block_on(async { timeout(Duration::from_secs(1), tu.update_topology_loop()).await }) - .unwrap() - } -} diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs deleted file mode 100644 index 7dacf45e6976..000000000000 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - sync::Arc, - time::{Duration, SystemTime}, -}; - -use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; -use futures::Future; -use openssl::{ - pkey::PKey, - x509::{verify::X509VerifyFlags, X509}, -}; -use security::SecurityManager; -use tikv_util::{ - info, - stream::{RetryError, RetryExt}, - warn, -}; -use tokio::sync::Mutex as AsyncMutex; - -use super::{ - etcd::{EtcdSnapshot, TopologyUpdater}, - EtcdStore, MetaStore, -}; -use crate::errors::{ContextualResultExt, Result}; - -const RPC_TIMEOUT: Duration = Duration::from_secs(5); - -#[derive(Clone)] -pub struct LazyEtcdClient(Arc>); - -#[derive(Clone)] -pub struct ConnectionConfig { - pub tls: Arc, - pub keep_alive_interval: Duration, - pub keep_alive_timeout: Duration, -} - -impl Default for ConnectionConfig { - fn default() -> Self { - Self { - tls: Default::default(), - keep_alive_interval: Duration::from_secs(10), - keep_alive_timeout: Duration::from_secs(3), - } - } -} - -impl std::fmt::Debug for ConnectionConfig { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ConnectionConfig") - .field("keep_alive_interval", &self.keep_alive_interval) - .field("keep_alive_timeout", &self.keep_alive_timeout) - .finish() - } -} - -impl ConnectionConfig { - /// Convert the config to the connection option. - fn to_connection_options(&self) -> ConnectOptions { - let mut opts = ConnectOptions::new(); - if let Some(tls) = &self - .tls - .client_suite() - .map_err(|err| warn!("failed to load client suite!"; "err" => %err)) - .ok() - { - opts = opts.with_openssl_tls( - OpenSslClientConfig::default() - .ca_cert_pem(&tls.ca) - // Some of users may prefer using multi-level self-signed certs. - // In this scenario, we must set this flag or openssl would probably complain it cannot found the root CA. - // (Because the flags we provide allows users providing exactly one CA cert.) - // We haven't make it configurable because it is enabled in gRPC by default too. - // TODO: Perhaps implement grpc-io based etcd client, fully remove the difference between gRPC TLS and our custom TLS? - .manually(|c| c.cert_store_mut().set_flags(X509VerifyFlags::PARTIAL_CHAIN)) - .manually(|c| { - let mut client_certs= X509::stack_from_pem(&tls.client_cert)?; - let client_key = PKey::private_key_from_pem(&tls.client_key.0)?; - if !client_certs.is_empty() { - c.set_certificate(&client_certs[0])?; - } - if client_certs.len() > 1 { - for i in client_certs.drain(1..) { - c.add_extra_chain_cert(i)?; - } - } - c.set_private_key(&client_key)?; - Ok(()) - }), - ) - } - opts = opts - .with_keep_alive(self.keep_alive_interval, self.keep_alive_timeout) - .with_keep_alive_while_idle(false) - .with_timeout(RPC_TIMEOUT); - - opts - } -} - -impl LazyEtcdClient { - pub fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - let mut inner = LazyEtcdClientInner::new(endpoints, conf); - inner.normalize_urls(); - Self(Arc::new(AsyncMutex::new(inner))) - } - - // For testing -- check whether the endpoints are properly normalized. - #[cfg(test)] - pub(super) fn endpoints(&self) -> Vec { - self.0.blocking_lock().endpoints.clone() - } - - async fn get_cli(&self) -> Result { - let mut l = self.0.lock().await; - l.get_cli().await.cloned() - } -} - -#[derive(Clone)] -pub struct LazyEtcdClientInner { - conf: ConnectionConfig, - endpoints: Vec, - - last_modified: Option, - cli: Option, -} - -impl LazyEtcdClientInner { - fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - LazyEtcdClientInner { - conf, - endpoints: endpoints.iter().map(ToString::to_string).collect(), - last_modified: None, - cli: None, - } - } -} - -fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { - match etcd_err { - EtcdError::InvalidArgs(_) - | EtcdError::InvalidUri(_) - | EtcdError::Utf8Error(_) - | EtcdError::InvalidHeaderValue(_) - | EtcdError::EndpointError(_) - | EtcdError::OpenSsl(_) => false, - EtcdError::TransportError(_) - | EtcdError::IoError(_) - | EtcdError::WatchError(_) - | EtcdError::LeaseKeepAliveError(_) - | EtcdError::ElectError(_) => true, - EtcdError::GRpcStatus(grpc) => matches!( - grpc.code(), - tonic::Code::Unavailable - | tonic::Code::Aborted - | tonic::Code::Internal - | tonic::Code::ResourceExhausted - ), - } -} - -#[derive(Debug)] -struct RetryableEtcdError(EtcdError); - -impl RetryError for RetryableEtcdError { - fn is_retryable(&self) -> bool { - etcd_error_is_retryable(&self.0) - } -} - -impl From for RetryableEtcdError { - fn from(e: EtcdError) -> Self { - Self(e) - } -} - -pub async fn retry(mut action: impl FnMut() -> F) -> Result -where - F: Future>, -{ - use futures::TryFutureExt; - let r = tikv_util::stream::retry_ext( - move || action().err_into::(), - RetryExt::default().with_fail_hook(|err| info!("retry it"; "err" => ?err)), - ) - .await; - r.map_err(|err| err.0.into()) -} - -impl LazyEtcdClientInner { - fn normalize_urls(&mut self) { - let enabled_tls = self.conf.tls.client_suite().is_ok(); - for endpoint in self.endpoints.iter_mut() { - // Don't touch them when the schemes already provided. - // Given etcd is based on gRPC (which relies on HTTP/2), - // there shouldn't be other schemes available (Hopefully...) - if endpoint.starts_with("http://") || endpoint.starts_with("https://") { - continue; - } - let expected_scheme = if enabled_tls { "https" } else { "http" }; - *endpoint = format!("{}://{}", expected_scheme, endpoint) - } - info!("log backup normalized etcd endpoints"; "endpoints" => ?self.endpoints); - } - - async fn connect(&mut self) -> Result<&EtcdStore> { - let store = retry(|| { - // For now, the interface of the `etcd_client` doesn't us to control - // how to create channels when connecting, hence we cannot update the tls config - // at runtime, now what we did is manually check that each time we are getting - // the clients. - etcd_client::Client::connect( - self.endpoints.clone(), - Some(self.conf.to_connection_options()), - ) - }) - .await - .context("during connecting to the etcd")?; - let store = EtcdStore::from(store); - let mut updater = TopologyUpdater::new(Arc::downgrade(store.inner())); - self.cli = Some(store); - updater.init(self.endpoints.iter().cloned()); - tokio::task::spawn(updater.main_loop()); - Ok(self.cli.as_ref().unwrap()) - } - - pub async fn get_cli(&mut self) -> Result<&EtcdStore> { - let modified = self.conf.tls.get_config().is_modified(&mut self.last_modified) - // Don't reload once we cannot check whether it is modified. - // Because when TLS disabled, this would always fail. - .unwrap_or(false); - if !modified && self.cli.is_some() { - return Ok(self.cli.as_ref().unwrap()); - } - info!("log backup reconnecting to the etcd service."; "tls_modified" => %modified, "connected_before" => %self.cli.is_some()); - self.connect().await - } -} - -#[async_trait::async_trait] -impl MetaStore for LazyEtcdClient { - type Snap = EtcdSnapshot; - - async fn snapshot(&self) -> Result { - self.get_cli().await?.snapshot().await - } - - async fn watch( - &self, - keys: super::Keys, - start_rev: i64, - ) -> Result { - self.get_cli().await?.watch(keys, start_rev).await - } - - async fn txn(&self, txn: super::Transaction) -> Result<()> { - self.get_cli().await?.txn(txn).await - } - - async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { - self.get_cli().await?.txn_cond(txn).await - } -} - -#[cfg(test)] -mod tests { - use std::{fs::File, io::Write, path::PathBuf, sync::Arc}; - - use security::{SecurityConfig, SecurityManager}; - use tempfile::TempDir; - - use super::LazyEtcdClient; - use crate::{errors::Result, metadata::ConnectionConfig}; - - #[test] - fn test_normalize_url() -> Result<()> { - let endpoints = ["http://pd-1".to_owned(), "pd-2".to_owned()]; - let le = LazyEtcdClient::new(&endpoints, Default::default()); - assert_eq!(le.endpoints(), &["http://pd-1", "http://pd-2"]); - - let tempdir = TempDir::new()?; - let write_all = |path: &PathBuf, content| { - let mut f = File::create(path)?; - f.write_all(content)?; - Result::Ok(()) - }; - let ca = tempdir.path().join("ca"); - let cert = tempdir.path().join("cert"); - let key = tempdir.path().join("key"); - write_all(&ca, b"CA :3")?; - write_all(&cert, b"Cert :D")?; - write_all(&key, b"Key X)")?; - - let cfg = SecurityConfig { - ca_path: ca.to_string_lossy().into_owned(), - cert_path: cert.to_string_lossy().into_owned(), - key_path: key.to_string_lossy().into_owned(), - - ..Default::default() - }; - let sm = SecurityManager::new(&cfg).unwrap(); - let endpoints = ["https://pd-1".to_owned(), "pd-2".to_owned()]; - let le = LazyEtcdClient::new( - &endpoints, - ConnectionConfig { - tls: Arc::new(sm), - ..Default::default() - }, - ); - assert_eq!(le.endpoints(), &["https://pd-1", "https://pd-2"]); - Result::Ok(()) - } -} diff --git a/components/backup-stream/src/metadata/store/mod.rs b/components/backup-stream/src/metadata/store/mod.rs index 7cecda9720e8..00f18c470422 100644 --- a/components/backup-stream/src/metadata/store/mod.rs +++ b/components/backup-stream/src/metadata/store/mod.rs @@ -1,13 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -cfg_if::cfg_if! { - if #[cfg(feature = "metastore-etcd")] { - pub mod etcd; - pub mod lazy_etcd; - pub use etcd::EtcdStore; - } -} - // Note: these mods also used for integration tests, // so we cannot compile them only when `#[cfg(test)]`. // (See https://github.com/rust-lang/rust/issues/84629) diff --git a/components/error_code/src/backup_stream.rs b/components/error_code/src/backup_stream.rs index a4b28b0e9ee0..78cb544746df 100644 --- a/components/error_code/src/backup_stream.rs +++ b/components/error_code/src/backup_stream.rs @@ -3,9 +3,6 @@ define_error_codes! { "KV:LogBackup:", - ETCD => ("Etcd", - "Error during requesting the meta store(etcd)", - "Please check the connectivity between TiKV and PD."), PROTO => ("Proto", "Error during decode / encoding protocol buffer messages", "Please check the version of TiKV / BR are compatible, or whether data is corrupted." From 03499fff2e2bcb32cf33f76bc92f8b1cf3b4f0a3 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Tue, 14 Nov 2023 14:26:14 +0800 Subject: [PATCH 139/203] metric: add ttl expire metric (#15949) close tikv/tikv#15873 metric: add ttl expire metric Signed-off-by: Smityz Signed-off-by: Smilencer Co-authored-by: Ping Yu Co-authored-by: Ping Yu --- metrics/grafana/tikv_details.json | 228 +++++++++++++++++++++++- src/server/ttl/ttl_compaction_filter.rs | 47 ++++- 2 files changed, 266 insertions(+), 9 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index cc89e8aeae5a..3efa0e310662 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -8375,7 +8375,227 @@ "h": 7, "w": 12, "x": 0, - "y": 6 + "y": 62 + }, + "hiddenSeries": false, + "id": 23763573818, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_ttl_expire_kv_count_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 10, + "legendFormat": "{{instance}}", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL expire count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:35", + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:36", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 62 + }, + "hiddenSeries": false, + "id": 23763573819, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_ttl_expire_kv_size_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 10, + "legendFormat": "{{instance}}", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL expire size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:35", + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:36", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 69 }, "id": 6985, "legend": { @@ -8471,7 +8691,7 @@ "h": 7, "w": 12, "x": 12, - "y": 6 + "y": 69 }, "id": 6987, "legend": { @@ -8564,7 +8784,7 @@ "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 76 }, "id": 6986, "legend": { @@ -8691,7 +8911,7 @@ "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 76 }, "id": 7326, "interval": null, diff --git a/src/server/ttl/ttl_compaction_filter.rs b/src/server/ttl/ttl_compaction_filter.rs index 06fc6981cf23..be4f0df6cf40 100644 --- a/src/server/ttl/ttl_compaction_filter.rs +++ b/src/server/ttl/ttl_compaction_filter.rs @@ -11,9 +11,23 @@ use engine_rocks::{ RocksTtlProperties, }; use engine_traits::raw_ttl::ttl_current_ts; +use prometheus::*; use crate::server::metrics::TTL_CHECKER_ACTIONS_COUNTER_VEC; +lazy_static! { + pub static ref TTL_EXPIRE_KV_SIZE_COUNTER: IntCounter = register_int_counter!( + "tikv_ttl_expire_kv_size_total", + "Total size of rawkv ttl expire", + ) + .unwrap(); + pub static ref TTL_EXPIRE_KV_COUNT_COUNTER: IntCounter = register_int_counter!( + "tikv_ttl_expire_kv_count_total", + "Total number of rawkv ttl expire", + ) + .unwrap(); +} + #[derive(Default)] pub struct TtlCompactionFilterFactory { _phantom: PhantomData, @@ -41,10 +55,7 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { } let name = CString::new("ttl_compaction_filter").unwrap(); - let filter = TtlCompactionFilter:: { - ts: current, - _phantom: PhantomData, - }; + let filter = TtlCompactionFilter::::new(); Some((name, filter)) } @@ -56,6 +67,28 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { pub struct TtlCompactionFilter { ts: u64, _phantom: PhantomData, + expire_count: u64, + expire_size: u64, +} + +impl Drop for TtlCompactionFilter { + fn drop(&mut self) { + // Accumulate counters would slightly improve performance as prometheus counters + // are atomic variables underlying + TTL_EXPIRE_KV_SIZE_COUNTER.inc_by(self.expire_size); + TTL_EXPIRE_KV_COUNT_COUNTER.inc_by(self.expire_count); + } +} + +impl TtlCompactionFilter { + fn new() -> Self { + Self { + ts: ttl_current_ts(), + _phantom: PhantomData, + expire_count: 0, + expire_size: 0, + } + } } impl CompactionFilter for TtlCompactionFilter { @@ -83,7 +116,11 @@ impl CompactionFilter for TtlCompactionFilter { Ok(RawValue { expire_ts: Some(expire_ts), .. - }) if expire_ts <= self.ts => CompactionFilterDecision::Remove, + }) if expire_ts <= self.ts => { + self.expire_size += key.len() as u64 + value.len() as u64; + self.expire_count += 1; + CompactionFilterDecision::Remove + } Err(err) => { TTL_CHECKER_ACTIONS_COUNTER_VEC .with_label_values(&["ts_error"]) From d4eabb0fbb8854b970605ff84b58277e0410036b Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 14 Nov 2023 15:14:14 +0800 Subject: [PATCH 140/203] makefile: polish the validation of python path. (#15981) close tikv/tikv#15980 Signed-off-by: lucasliang --- Makefile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b54d44036691..632794f3208f 100644 --- a/Makefile +++ b/Makefile @@ -164,11 +164,8 @@ ifeq ($(TIKV_BUILD_RUSTC_TARGET),aarch64-unknown-linux-gnu) export RUSTFLAGS := $(RUSTFLAGS) -Ctarget-feature=-outline-atomics endif -ifeq ($(shell basename $(shell which python 2>/dev/null)),python) -PY := python -else -PY := python3 -endif +# If both python and python3 are installed, it will choose python as a preferred option. +PYTHON := $(shell command -v python 2> /dev/null || command -v python3 2> /dev/null) # Almost all the rules in this Makefile are PHONY # Declaring a rule as PHONY could improve correctness @@ -263,7 +260,7 @@ dist_release: @mkdir -p ${BIN_PATH} @cp -f ${CARGO_TARGET_DIR}/release/tikv-ctl ${CARGO_TARGET_DIR}/release/tikv-server ${BIN_PATH}/ ifeq ($(shell uname),Linux) # Macs binary isn't elf format - $(PY) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server + $(PYTHON) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server endif # Build with release flag as if it were for distribution, but without From 36e2154f12e85ce5edc0a47d03757d826c37ac64 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Tue, 14 Nov 2023 16:29:14 +0800 Subject: [PATCH 141/203] raftstore: Inherit raftdb-path when upgrading to raft-engine (#15809) close tikv/tikv#15771 Inherit raftdb-path when upgrading to raft-engine, and support to custom set raft-engine dir when upgrading Signed-off-by: Smilencer Signed-off-by: Smityz Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- components/tikv_util/src/config.rs | 324 ++++++++++++++++++++++++++++- src/config/mod.rs | 250 +++++++++++++++++++++- 2 files changed, 560 insertions(+), 14 deletions(-) diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 39e143fc04ce..7b3e6cd2469e 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -1588,9 +1588,10 @@ macro_rules! numeric_enum_serializing_mod { /// States: /// 1. Init - Only source directory contains Raft data. /// 2. Migrating - A marker file contains the path of source directory. The -/// source directory contains a complete copy of Raft data. Target -/// directory may exist. 3. Completed - Only target directory contains Raft -/// data. Marker file may exist. +/// source directory contains a complete copy of Raft data. Target +/// directory may exist. +/// 3. Completed - Only target directory contains Raft data. Marker file may +/// exist. pub struct RaftDataStateMachine { root: PathBuf, in_progress_marker: PathBuf, @@ -1681,7 +1682,7 @@ impl RaftDataStateMachine { pub fn after_dump_data(&mut self) { assert!(Self::data_exists(&self.source)); assert!(Self::data_exists(&self.target)); - Self::must_remove(&self.source); // Enters the `Completed` state. + Self::must_remove_except(&self.source, &self.target); // Enters the `Completed` state. Self::must_remove(&self.in_progress_marker); } @@ -1733,6 +1734,31 @@ impl RaftDataStateMachine { } } + // Remove all files and directories under `remove_path` except `retain_path`. + fn must_remove_except(remove_path: &Path, retain_path: &Path) { + if !remove_path.exists() { + info!("Path not exists"; "path" => %remove_path.display()); + return; + } + if !remove_path.is_dir() { + info!("Path is not a directory, so remove directly"; "path" => %remove_path.display()); + Self::must_remove(remove_path); + return; + } + if !retain_path.starts_with(remove_path) { + info!("Removing directory as retain path is not under remove path"; "retain path" => %retain_path.display(), "remove path" => %remove_path.display()); + Self::must_remove(remove_path); + return; + } + + for entry in fs::read_dir(remove_path).unwrap() { + let sub_path = entry.unwrap().path(); + if sub_path != retain_path { + Self::must_remove(&sub_path); + } + } + } + fn must_rename_dir(from: &Path, to: &Path) { fs::rename(from, to).unwrap(); let mut dir = to.to_path_buf(); @@ -1740,11 +1766,35 @@ impl RaftDataStateMachine { Self::sync_dir(&dir); } - fn data_exists(path: &Path) -> bool { - if !path.exists() || !path.is_dir() { + #[inline] + fn dir_exists(path: &Path) -> bool { + path.exists() && path.is_dir() + } + + pub fn raftengine_exists(path: &Path) -> bool { + if !Self::dir_exists(path) { return false; } - fs::read_dir(path).unwrap().next().is_some() + fs::read_dir(path).unwrap().any(|entry| { + if let Ok(e) = entry { + let p = e.path(); + p.is_file() && p.extension().map_or(false, |ext| ext == "raftlog") + } else { + false + } + }) + } + + pub fn raftdb_exists(path: &Path) -> bool { + if !Self::dir_exists(path) { + return false; + } + let current_file_path = path.join("CURRENT"); + current_file_path.exists() && current_file_path.is_file() + } + + pub fn data_exists(path: &Path) -> bool { + Self::raftengine_exists(path) || Self::raftdb_exists(path) } fn sync_dir(dir: &Path) { @@ -2348,6 +2398,98 @@ yyy = 100 ); } + #[test] + fn test_raft_engine_switch() { + // default setting, raft-db and raft-engine are not in the same place, need + // dump raft data from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = root.join("target"); + fs::create_dir_all(&target).unwrap(); + let mut state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + + // raft-db is eventually moved, can't dump from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let target = root.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap_err(); + fs::remove_dir_all(&root).unwrap(); + + // when setting raft-db dir, raft-engine dir is not set, raft-engine dir + // inherit from raft-db dir, need to dump raft data from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + + // inherit scenario raft-db is eventually moved, can't dump from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap_err(); + fs::remove_dir_all(&root).unwrap(); + + // raft-db dump from raft-engine + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + let mut state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + } + #[test] fn test_raft_data_migration() { fn run_migration(root: &Path, source: &Path, target: &Path, check: F) { @@ -2370,12 +2512,15 @@ yyy = 100 fs::write(&marker, backup_marker).unwrap(); } - let source_file = source.join("file"); - let target_file = target.join("file"); + let mut source_file = source.join("CURRENT"); + let target_file = target.join("0000000000000001.raftlog"); if !target.exists() { fs::create_dir_all(target).unwrap(); check(); } + if !source_file.exists() { + source_file = source.join("0000000000000001.raftlog"); + } fs::copy(source_file, target_file).unwrap(); check(); state.after_dump_data_with_check(&check); @@ -2407,7 +2552,7 @@ yyy = 100 let target = root.join("target"); fs::create_dir_all(&target).unwrap(); // Write some data into source. - let source_file = source.join("file"); + let source_file = source.join("CURRENT"); File::create(source_file).unwrap(); let backup = dir.path().join("backup"); @@ -2423,4 +2568,163 @@ yyy = 100 copy_dir(&backup, &root).unwrap(); }); } + + #[test] + fn test_must_remove_except() { + fn create_raftdb(path: &Path) { + fs::create_dir(path).unwrap(); + // CURRENT file as the marker of raftdb. + let raftdb_data = path.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + } + + fn create_raftengine(path: &Path) { + fs::create_dir(path).unwrap(); + let raftengine_data = path.join("raftengine_data"); + fs::File::create(raftengine_data).unwrap(); + } + + fn create_test_root(path: &Path) { + fs::create_dir(path).unwrap(); + } + + fn raftengine_must_exist(path: &Path) { + assert!(path.exists()); + let raftengine_data = path.join("raftengine_data"); + assert!(raftengine_data.exists()); + } + + fn raftdb_must_not_exist(path: &Path) { + assert!(!path.exists()); + let raftdb_data = path.join("raftdb_data"); + assert!(!raftdb_data.exists()); + } + let test_dir = tempfile::Builder::new() + .tempdir() + .unwrap() + .into_path() + .join("test_must_remove_except"); + + // before: + // test_must_remove_except + // ├── raftdb + // │ └── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_dir = test_dir.join("raftdb"); + let raftengine_dir = test_dir.join("raftengine"); + create_raftdb(&raftdb_dir); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + raftdb_must_not_exist(&raftdb_dir); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // └── raftdb + // ├── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftdb + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_dir = test_dir.join("raftdb"); + let raftengine_dir = raftdb_dir.join("raftengine"); + create_raftdb(&raftdb_dir); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + assert!(!test_dir.join("raftdb/raftdb_data").exists()); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // └── raftengine + // ├── raftdb + // │ └── raftdb_data + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftengine_dir = test_dir.join("raftengine"); + let raftdb_dir = raftengine_dir.join("raftdb"); + create_raftengine(&raftengine_dir); + create_raftdb(&raftdb_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + raftdb_must_not_exist(&raftdb_dir); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // ├── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_data = test_dir.join("raftdb_data"); + fs::File::create(raftdb_data).unwrap(); + let raftengine_dir = test_dir.join("raftengine"); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&test_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + assert!(!test_dir.join("raftdb_data").exists()); + fs::remove_dir_all(&test_dir).unwrap(); + } + + #[test] + fn test_raft_data_exist() { + fn clear_dir(path: &PathBuf) { + if path.exists() { + fs::remove_dir_all(path).unwrap(); + } + fs::create_dir(path).unwrap(); + } + let test_dir = tempfile::Builder::new().tempdir().unwrap().into_path(); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("0000000000000001.raftlog")).unwrap(); + assert!(RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("0000000000000001.raftlog")).unwrap(); + fs::File::create(test_dir.join("trash")).unwrap(); + assert!(RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("raftlog")).unwrap(); + assert!(!RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + assert!(!RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("CURRENT")).unwrap(); + assert!(RaftDataStateMachine::raftdb_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("NOT_CURRENT")).unwrap(); + assert!(!RaftDataStateMachine::raftdb_exists(&test_dir)); + + clear_dir(&test_dir); + assert!(!RaftDataStateMachine::raftdb_exists(&test_dir)); + } } diff --git a/src/config/mod.rs b/src/config/mod.rs index b192a7ac5f75..4846b9465fc2 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3544,8 +3544,24 @@ impl TikvConfig { .unwrap() .to_owned(); } - self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; - self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; + + match ( + self.raft_store.raftdb_path.is_empty(), + self.raft_engine.config.dir.is_empty(), + ) { + (false, true) => { + // If raftdb_path is specified, raft_engine_path will inherit it, this will be + // useful when updating from older version. + self.raft_engine.config.dir = + self.infer_raft_engine_path(Some(self.raft_store.raftdb_path.as_str()))?; + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; + } + _ => { + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; + self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; + } + } + if self.log_backup.temp_path.is_empty() { self.log_backup.temp_path = config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; @@ -4075,7 +4091,10 @@ impl TikvConfig { last_cfg.raftdb.wal_dir, self.raftdb.wal_dir )); } - if last_raft_engine_dir != self.raft_engine.config.dir { + + if RaftDataStateMachine::raftengine_exists(Path::new(&last_raft_engine_dir)) + && last_raft_engine_dir != self.raft_engine.config.dir + { return Err(format!( "raft engine dir have been changed, former is '{}', \ current is '{}', please check if it is expected.", @@ -4733,6 +4752,21 @@ mod tests { }, }; + fn create_mock_raftdb(path: &Path) { + fs::create_dir_all(path).unwrap(); + fs::File::create(path.join("CURRENT")).unwrap(); + } + + fn create_mock_raftengine(path: &Path) { + fs::create_dir_all(path).unwrap(); + fs::File::create(path.join("0000000000000001.raftlog")).unwrap(); + } + + fn create_mock_kv_data(path: &Path) { + fs::create_dir_all(path.join("db")).unwrap(); + fs::File::create(path.join("db").join("CURRENT")).unwrap(); + } + #[test] fn test_case_macro() { let h = kebab_case!(HelloWorld); @@ -4783,7 +4817,8 @@ mod tests { tikv_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); - tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); + // no actual raft engine data + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); last_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); @@ -4834,6 +4869,213 @@ mod tests { ); } } + + let test_dir = tempfile::Builder::new() + .tempdir() + .unwrap() + .into_path() + .join("unittest_raft_engine_dir"); + let data_dir = test_dir.join("data"); + + // simulate tikv restart + // enable raft engine: true + // need dump data from raftdb: false + // custom raft dir: true + { + let raft_dir = test_dir.join("raft"); + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = raft_dir.to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = raft_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_raftengine(&raft_dir); + create_mock_kv_data(&data_dir); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv restart + // enable raft engine: true + // need dump data from raftdb: false + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&data_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // multi raft engine dir + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&data_dir.join("raft")); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap_err(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update with custom raft dir + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + test_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&test_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update with custom raft dir + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = "".to_owned(); + tikv_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&test_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + assert_eq!( + tikv_cfg.raft_engine.config.dir, + test_dir.join("raft").join("raft-engine").to_str().unwrap() + ); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv downgrade to raftdb + // need dump data from raft-engine + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + last_cfg.raft_engine.enable = true; + + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.enable = false; + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + last_cfg.raft_engine.enable = true; + + tikv_cfg.raft_engine.mut_config().dir = "".to_owned(); + tikv_cfg.raft_engine.enable = false; + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap_err(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); + fs::remove_dir_all(&test_dir).unwrap(); + } } #[test] From 8c3676cb28a38b675368ae1cdd6f9ac0d15e05fb Mon Sep 17 00:00:00 2001 From: Lanqing Yang Date: Tue, 14 Nov 2023 22:32:16 -0800 Subject: [PATCH 142/203] chore: clean up fixme on latch.rs (#15974) close tikv/tikv#15973 clean up fixme on latch.rs Signed-off-by: lyang24 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/txn/latch.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index a662d9bab79c..c76d71d7c7a8 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -62,8 +62,6 @@ impl Latch { } self.waiting.push_front(item); } - // FIXME: remove this clippy attribute once https://github.com/rust-lang/rust-clippy/issues/6784 is fixed. - #[allow(clippy::manual_flatten)] for it in self.waiting.iter_mut() { if let Some((v, _)) = it { if *v == key_hash { From c9052f9bca8ef290bbd860f6439ef806c0149a1e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 15 Nov 2023 15:53:17 +0800 Subject: [PATCH 143/203] fips: enable OpenSSL FIPS mode on TiKV start if it's eligible (#15983) close tikv/tikv#15982 In this commit, a new crate, "fips", is introduced. This crate includes utilities designed to enable OpenSSL FIPS mode, catering to various OpenSSL releases. This commit ensures that TiKV starts with OpenSSL FIPS mode enabled if it is built with an environment variable "ENABLE_FIPS=1". Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 13 ++++++++ Cargo.toml | 7 +++- cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-ctl/src/main.rs | 6 ++++ cmd/tikv-server/Cargo.toml | 1 + cmd/tikv-server/src/main.rs | 14 ++++++++ components/backup-stream/Cargo.toml | 2 +- components/cloud/Cargo.toml | 2 +- components/cloud/azure/Cargo.toml | 2 +- components/encryption/Cargo.toml | 2 +- components/encryption/export/Cargo.toml | 2 +- components/external_storage/Cargo.toml | 2 +- components/file_system/Cargo.toml | 2 +- components/fips/Cargo.toml | 12 +++++++ components/fips/build.rs | 32 ++++++++++++++++++ components/fips/src/lib.rs | 44 +++++++++++++++++++++++++ components/raftstore/Cargo.toml | 2 +- components/server/src/server.rs | 9 ----- components/server/src/server2.rs | 9 ----- components/server/src/setup.rs | 1 - components/sst_importer/Cargo.toml | 2 +- components/tidb_query_expr/Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 2 +- scripts/check-bins.py | 2 +- src/lib.rs | 2 +- 25 files changed, 142 insertions(+), 33 deletions(-) create mode 100644 components/fips/Cargo.toml create mode 100644 components/fips/build.rs create mode 100644 components/fips/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index d629d2ac18a4..390b748cddd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1866,6 +1866,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "fips" +version = "0.0.1" +dependencies = [ + "openssl", + "openssl-sys", + "slog", + "slog-global", +] + [[package]] name = "fix-hidden-lifetime-bug" version = "0.2.5" @@ -6317,6 +6327,7 @@ dependencies = [ "example_coprocessor_plugin", "fail", "file_system", + "fips", "flate2", "futures 0.3.15", "futures-executor", @@ -6433,6 +6444,7 @@ dependencies = [ "engine_traits", "error_code", "file_system", + "fips", "futures 0.3.15", "gag", "grpcio", @@ -6509,6 +6521,7 @@ dependencies = [ "clap 2.33.0", "encryption_export", "engine_traits", + "fips", "keys", "kvproto", "raft-engine", diff --git a/Cargo.toml b/Cargo.toml index 82846e98acf9..2d26bf6afe43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ engine_traits_tests = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } +fips = { workspace = true } flate2 = { version = "1.0", default-features = false, features = ["zlib"] } futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" @@ -111,7 +112,7 @@ notify = "4" num-traits = "0.2.14" num_cpus = "1" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot = "0.12" paste = "1.0" pd_client = { workspace = true } @@ -249,6 +250,7 @@ members = [ "components/error_code", "components/external_storage", "components/file_system", + "components/fips", "components/into_other", "components/keys", "components/log_wrappers", @@ -323,6 +325,7 @@ engine_traits_tests = { path = "components/engine_traits_tests", default-feature error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } file_system = { path = "components/file_system" } +fips = { path = "components/fips" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } keys = { path = "components/keys" } @@ -378,6 +381,8 @@ tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hot tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +openssl = "0.10" +openssl-sys = "0.9" [profile.dev.package.grpcio-sys] debug = false diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index e16fadf0836a..82553a4b45a1 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -54,6 +54,7 @@ engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } +fips = { workspace = true } futures = "0.3" gag = "1.0" grpcio = { workspace = true } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index df17e81f1ef4..a3961bbc928f 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -61,11 +61,17 @@ mod fork_readonly_tikv; mod util; fn main() { + // OpenSSL FIPS mode should be enabled at the very start. + fips::maybe_enable(); + let opt = Opt::from_args(); // Initialize logger. init_ctl_logger(&opt.log_level); + // Print OpenSSL FIPS mode status. + fips::log_status(); + // Initialize configuration and security manager. let cfg_path = opt.config.as_ref(); let mut cfg = cfg_path.map_or_else( diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index ef278854dd75..6f916d7476d3 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -36,6 +36,7 @@ pprof-fp = ["tikv/pprof-fp"] clap = "2.32" encryption_export = { workspace = true } engine_traits = { workspace = true } +fips = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git" } diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 3f4372c32cc1..01354906b466 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -13,6 +13,9 @@ use tikv::{ }; fn main() { + // OpenSSL FIPS mode should be enabled at the very start. + fips::maybe_enable(); + let build_timestamp = option_env!("TIKV_BUILD_TIME"); let version_info = tikv::tikv_version_info(build_timestamp); @@ -217,6 +220,17 @@ fn main() { process::exit(1) } + // Sets the global logger ASAP. + // It is okay to use the config w/o `validate()`, + // because `initial_logger()` handles various conditions. + server::setup::initial_logger(&config); + + // Print version information. + tikv::log_tikv_info(build_timestamp); + + // Print OpenSSL FIPS mode status. + fips::log_status(); + // Init memory related settings. config.memory.init(); diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 141954686c38..6a5a0edbba5c 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -49,7 +49,7 @@ kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } pd_client = { workspace = true } pin-project = "1.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index 3931370390e6..b5f1e8faffd6 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -11,7 +11,7 @@ error_code = { workspace = true } futures-io = "0.3" kvproto = { workspace = true } lazy_static = "1.3" -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } rusoto_core = "0.46.0" diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 04f00c4bb604..7dd98224a73d 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -21,7 +21,7 @@ futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } oauth2 = { version = "4.0.0", default-features = false } -openssl = { version = "0.10.50" } +openssl = { workspace = true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" slog = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 336f2e1854f1..9698618a4ba9 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -28,7 +28,7 @@ hex = "0.4.2" kvproto = { workspace = true } lazy_static = "1.3" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 829e33ae5aaa..1a7b64eb7be5 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -21,7 +21,7 @@ encryption = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } kvproto = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } slog = { workspace = true } # better to not use slog-global, but pass in the logger diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 25a852f9d5c3..69de83e54743 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -19,7 +19,7 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } gcp = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly", "push"] } rand = "0.8" slog = { workspace = true } diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index fbd96c3c3482..ef7df46936d2 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -15,7 +15,7 @@ fs2 = "0.4" lazy_static = "1.3" libc = "0.2" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot = "0.12" prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" diff --git a/components/fips/Cargo.toml b/components/fips/Cargo.toml new file mode 100644 index 000000000000..ab0d2aa1cf79 --- /dev/null +++ b/components/fips/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fips" +version = "0.0.1" +edition = "2021" +publish = false + +[dependencies] +openssl = { workspace = true } +openssl-sys = { workspace = true } +slog = { workspace = true } +# better to not use slog-global, but pass in the logger +slog-global = { workspace = true } diff --git a/components/fips/build.rs b/components/fips/build.rs new file mode 100644 index 000000000000..5bfe4920c2d0 --- /dev/null +++ b/components/fips/build.rs @@ -0,0 +1,32 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::env; + +fn main() { + if !option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + println!("cargo:rustc-cfg=disable_fips"); + return; + } + if let Ok(version) = env::var("DEP_OPENSSL_VERSION_NUMBER") { + let version = u64::from_str_radix(&version, 16).unwrap(); + + #[allow(clippy::unusual_byte_groupings)] + // Follow OpenSSL numeric release version identifier style: + // MNNFFPPS: major minor fix patch status + // See https://github.com/openssl/openssl/blob/OpenSSL_1_0_0-stable/crypto/opensslv.h + if version >= 0x3_00_00_00_0 { + println!("cargo:rustc-cfg=ossl3"); + } else { + println!("cargo:rustc-cfg=ossl1"); + } + } else { + panic!( + " + +The DEP_OPENSSL_VERSION_NUMBER environment variable is not found. +Please make sure \"openssl-sys\" is in fips's dependencies. + +" + ) + } +} diff --git a/components/fips/src/lib.rs b/components/fips/src/lib.rs new file mode 100644 index 000000000000..b466401af4ff --- /dev/null +++ b/components/fips/src/lib.rs @@ -0,0 +1,44 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::atomic::{AtomicUsize, Ordering}; + +static FIPS_VERSION: AtomicUsize = AtomicUsize::new(0); + +/// Enable OpenSSL FIPS mode if `can_enable` returns true. +/// It should be called at the very start of a program. +pub fn maybe_enable() { + if !can_enable() { + return; + } + #[cfg(ossl1)] + { + openssl::fips::enable(true).unwrap(); + FIPS_VERSION.store(1, Ordering::SeqCst); + return; + } + #[cfg(ossl3)] + { + std::mem::forget(openssl::provider::Provider::load(None, "fips").unwrap()); + FIPS_VERSION.store(3, Ordering::SeqCst); + return; + } + #[allow(unreachable_code)] + { + slog_global::warn!("OpenSSL FIPS mode is disabled unexpectedly"); + } +} + +/// Return true if it is built for FIPS mode. +pub fn can_enable() -> bool { + !cfg!(disable_fips) +} + +/// Prints OpenSSL FIPS mode status. +pub fn log_status() { + let ver = FIPS_VERSION.load(Ordering::SeqCst); + if ver == 0 { + slog_global::info!("OpenSSL FIPS mode is disabled"); + } else { + slog_global::info!("OpenSSL FIPS mode is enabled"; "openssl_major_version" => ver); + } +} diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 3a8caa421e58..81e809a02059 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -59,7 +59,7 @@ log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug log_wrappers = { workspace = true } memory_trace_macros = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } ordered-float = "2.6" parking_lot = "0.12" pd_client = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 72e09a9f8d8e..625d9b7cb4f3 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -198,15 +198,6 @@ pub fn run_tikv( service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, ) { - // Sets the global logger ASAP. - // It is okay to use the config w/o `validate()`, - // because `initial_logger()` handles various conditions. - initial_logger(&config); - - // Print version information. - let build_timestamp = option_env!("TIKV_BUILD_TIME"); - tikv::log_tikv_info(build_timestamp); - // Print resource quota. SysQuota::log_quota(); CPU_CORES_QUOTA_GAUGE.set(SysQuota::cpu_cores_quota()); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index eab384871e6c..1e170abb1c34 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -194,15 +194,6 @@ pub fn run_tikv( service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, ) { - // Sets the global logger ASAP. - // It is okay to use the config w/o `validate()`, - // because `initial_logger()` handles various conditions. - initial_logger(&config); - - // Print version information. - let build_timestamp = option_env!("TIKV_BUILD_TIME"); - tikv::log_tikv_info(build_timestamp); - // Print resource quota. SysQuota::log_quota(); CPU_CORES_QUOTA_GAUGE.set(SysQuota::cpu_cores_quota()); diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index b11ffbc45b60..0228e0c7f288 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -74,7 +74,6 @@ fn make_engine_log_path(path: &str, sub_path: &str, filename: &str) -> String { }) } -#[allow(dead_code)] pub fn initial_logger(config: &TikvConfig) { fail::fail_point!("mock_force_uninitial_logger", |_| { LOG_INITIALIZED.store(false, Ordering::SeqCst); diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index f4f2504a8b34..b501e509a8a3 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -39,7 +39,7 @@ kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false } protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 8a1784019059..e09c0cd96de0 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -17,7 +17,7 @@ log_wrappers = { workspace = true } match-template = "0.0.1" num = { version = "0.3", default-features = false } num-traits = "0.2" -openssl = { version = "0.10" } +openssl = { workspace = true } protobuf = "2" rand = "0.8.3" regex = "1.1" diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index b502a701136c..0b6fc5978cb8 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -37,7 +37,7 @@ nix = "0.24" num-traits = "0.2" num_cpus = "1" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot_core = "0.9.1" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 421a4df5ef4b..cbc748af958d 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,7 +14,7 @@ "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", - "tracker", "test_raftstore_macro" + "tracker", "test_raftstore_macro", "fips" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/src/lib.rs b/src/lib.rs index a0ccff3c8cbb..b300399e30a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + if fips::can_enable() { format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") } else { env!("CARGO_PKG_VERSION").to_owned() From 26f36d4b54a70267e029ee324a3aa267688bab2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mustafa=20Ate=C5=9F=20Uzun?= Date: Wed, 15 Nov 2023 11:08:16 +0300 Subject: [PATCH 144/203] chore: fix error message typo (#15363) ref tikv/tikv#15990 fix error message typo Co-authored-by: Xinye Tao Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/util.rs | 2 +- components/tikv_kv/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 66b084d4998b..329448a6ac64 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -436,7 +436,7 @@ impl Client { } } -/// The context of sending requets. +/// The context of sending request. pub struct Request { remain_request_count: usize, request_sent: usize, diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 25f58352750f..1fe61b786330 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -553,7 +553,7 @@ pub enum ErrorInner { Request(ErrorHeader), #[error("timeout after {0:?}")] Timeout(Duration), - #[error("an empty requets")] + #[error("an empty request")] EmptyRequest, #[error("key is locked (backoff or cleanup) {0:?}")] KeyIsLocked(kvproto::kvrpcpb::LockInfo), From 6a6188076637de06698f0709118d4dca02eaa4c8 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 16 Nov 2023 10:18:47 +0800 Subject: [PATCH 145/203] raftstore: fine-tune SlowScore. (#15991) ref tikv/tikv#15909 Signed-off-by: lucasliang --- components/raftstore/src/store/util.rs | 35 ++++++++++++++++++--- components/raftstore/src/store/worker/pd.rs | 4 ++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d9076a67d8a9..d1ef3fde75ad 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1733,13 +1733,38 @@ pub struct RaftstoreDuration { } impl RaftstoreDuration { + #[inline] pub fn sum(&self) -> std::time::Duration { - self.store_wait_duration.unwrap_or_default() - + self.store_process_duration.unwrap_or_default() + self.delays_on_disk_io(true) + self.delays_on_net_io() + } + + #[inline] + /// Returns the delayed duration on Disk I/O. + pub fn delays_on_disk_io(&self, include_wait_duration: bool) -> std::time::Duration { + let duration = self.store_process_duration.unwrap_or_default() + self.store_write_duration.unwrap_or_default() - + self.store_commit_duration.unwrap_or_default() - + self.apply_wait_duration.unwrap_or_default() - + self.apply_process_duration.unwrap_or_default() + + self.apply_process_duration.unwrap_or_default(); + if include_wait_duration { + duration + + self.store_wait_duration.unwrap_or_default() + + self.apply_wait_duration.unwrap_or_default() + } else { + duration + } + } + + #[inline] + /// Returns the delayed duration on Network I/O. + /// + /// Normally, it can be reflected by the duraiton on + /// `store_commit_duraiton`. + pub fn delays_on_net_io(&self) -> std::time::Duration { + // The `store_commit_duration` serves as an indicator for latency + // during the duration of transferring Raft logs to peers and appending + // logs. In most scenarios, instances of latency fluctuations in the + // network are reflected by this duration. Hence, it is selected as a + // representative of network latency. + self.store_commit_duration.unwrap_or_default() } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 798102896693..5e97adf8d3e6 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -2261,7 +2261,9 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => { - self.slow_score.record(id, duration.sum()); + // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. + self.slow_score + .record(id, duration.delays_on_disk_io(false)); self.slow_trend_cause.record( tikv_util::time::duration_to_us(duration.store_wait_duration.unwrap()), Instant::now(), From a0dbe2d0b893489015fc99ae73c6646f7989fe32 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 16 Nov 2023 12:53:16 +0800 Subject: [PATCH 146/203] resource_control: do not force set override priority at handle gRPC request (#16003) close tikv/tikv#15994 Signed-off-by: glorv --- .../resource_control/src/resource_group.rs | 21 ++++++- src/server/service/kv.rs | 62 +++++++------------ 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index b45a9833bb8e..7e6d4279a25f 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -77,7 +77,10 @@ impl TaskPriority { impl From for TaskPriority { fn from(value: u32) -> Self { // map the resource group priority value (1,8,16) to (Low,Medium,High) - if value < 6 { + // 0 means the priority is not set, so map it to medium by default. + if value == 0 { + Self::Medium + } else if value < 6 { Self::Low } else if value < 11 { Self::Medium @@ -1430,4 +1433,20 @@ pub(crate) mod tests { &mgr.priority_limiters[1] )); } + + #[test] + fn test_task_priority() { + use TaskPriority::*; + let cases = [ + (0, Medium), + (1, Low), + (7, Medium), + (8, Medium), + (15, High), + (16, High), + ]; + for (value, priority) in cases { + assert_eq!(TaskPriority::from(value), priority); + } + } } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 4a7395222f7f..8426143d502f 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -192,14 +192,14 @@ macro_rules! handle_request { handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); }; ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -229,20 +229,6 @@ macro_rules! handle_request { } } -// consume resource group penalty and set explicit group priority -// We override the override_priority here to make handling tasks easier. -fn consume_penalty_and_set_priority( - resource_manager: &Arc, - resource_control_ctx: &mut ResourceControlContext, -) { - resource_manager.consume_penalty(resource_control_ctx); - if resource_control_ctx.get_override_priority() == 0 { - let prioirty = resource_manager - .get_resource_group_priority(resource_control_ctx.get_resource_group_name()); - resource_control_ctx.override_priority = prioirty as u64; - } -} - macro_rules! set_total_time { ($resp:ident, $duration:expr,no_time_detail) => {}; ($resp:ident, $duration:expr,has_time_detail) => { @@ -490,12 +476,12 @@ impl Tikv for Service { ctx.spawn(task); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -527,13 +513,13 @@ impl Tikv for Service { fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - mut req: RawCoprocessorRequest, + req: RawCoprocessorRequest, sink: UnarySink, ) { let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -616,13 +602,13 @@ impl Tikv for Service { fn coprocessor_stream( &mut self, ctx: RpcContext<'_>, - mut req: Request, + req: Request, mut sink: ServerStreamingSink, ) { let begin_instant = Instant::now(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1162,10 +1148,10 @@ fn handle_batch_commands_request( let resp = future::ok(batch_commands_response::Response::default()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::Get(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1183,10 +1169,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::RawGet(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1204,10 +1190,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1238,10 +1224,10 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) From 1cd4753e02a72eb840def71c53fa1bdfafff28de Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 16 Nov 2023 16:39:46 +0800 Subject: [PATCH 147/203] titan: Introduce titan zstd dict compression (#15979) close tikv/tikv#5743 Introduce titan zstd dict compression Signed-off-by: Connor1996 Signed-off-by: Connor Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- etc/config-template.toml | 7 +++++++ src/config/mod.rs | 12 ++++++++++++ tests/integrations/config/mod.rs | 4 ++++ tests/integrations/config/test-custom.toml | 2 ++ 4 files changed, 25 insertions(+) diff --git a/etc/config-template.toml b/etc/config-template.toml index 3e55004feb29..22e606939215 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -939,6 +939,13 @@ ## default: lz4 # blob-file-compression = "lz4" +## Set blob file zstd dictionary compression, default(0) will use zstd compression. +## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally, +## the sample data size to train dictionary is of size 100X dictionary size innerly. +## It has no effect when `blob-file-compression` is not `zstd`. +## default: 0 +# zstd-dict-size = 0 + ## Specifics cache size for blob records ## default: 0 # blob-cache-size = "0GB" diff --git a/src/config/mod.rs b/src/config/mod.rs index 4846b9465fc2..4c37ad0eafe5 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -137,6 +137,8 @@ pub struct TitanCfConfig { #[online_config(skip)] pub blob_file_compression: CompressionType, #[online_config(skip)] + pub zstd_dict_size: ReadableSize, + #[online_config(skip)] pub blob_cache_size: ReadableSize, #[online_config(skip)] pub min_gc_batch_size: ReadableSize, @@ -170,6 +172,7 @@ impl Default for TitanCfConfig { Self { min_blob_size: ReadableSize::kb(1), // disable titan default blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -190,6 +193,15 @@ impl TitanCfConfig { let mut opts = RocksTitanDbOptions::new(); opts.set_min_blob_size(self.min_blob_size.0); opts.set_blob_file_compression(self.blob_file_compression.into()); + // To try zstd dict compression, set dict size to 4k, sample size to 100X dict + // size + opts.set_compression_options( + -14, // window_bits + 32767, // level + 0, // strategy + self.zstd_dict_size.0 as i32, // zstd dict size + self.zstd_dict_size.0 as i32 * 100, // zstd sample size + ); opts.set_blob_cache(self.blob_cache_size.0 as usize, -1, false, 0.0); opts.set_min_gc_batch_size(self.min_gc_batch_size.0); opts.set_max_gc_batch_size(self.max_gc_batch_size.0); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2ab4ce5cc094..180ef37a94a8 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -272,6 +272,7 @@ fn test_serde_custom_tikv_config() { let titan_cf_config = TitanCfConfig { min_blob_size: ReadableSize(2018), blob_file_compression: CompressionType::Zstd, + zstd_dict_size: ReadableSize::kb(16), blob_cache_size: ReadableSize::gb(12), min_gc_batch_size: ReadableSize::kb(12), max_gc_batch_size: ReadableSize::mb(12), @@ -432,6 +433,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -506,6 +508,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -580,6 +583,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index a9772e285af2..0fe5df168cc1 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -349,6 +349,7 @@ periodic-compaction-seconds = "10d" [rocksdb.defaultcf.titan] min-blob-size = "2018B" blob-file-compression = "zstd" +zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" max-gc-batch-size = "12MB" @@ -609,6 +610,7 @@ max-compactions = 3 [raftdb.defaultcf.titan] min-blob-size = "2018B" blob-file-compression = "zstd" +zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" max-gc-batch-size = "12MB" From 98e6f20be5cb2f817b56e050e97aa6df9222fb04 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 16 Nov 2023 15:23:16 -0800 Subject: [PATCH 148/203] config: Fix titan blob-run-mode setting (#15988) close tikv/tikv#15978, close tikv/tikv#15987 Fix titan config blob-run-mode's from implementation. Signed-off-by: tonyxuqqi --- components/engine_rocks/src/config.rs | 11 +++++++---- src/config/mod.rs | 23 ++++++++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/components/engine_rocks/src/config.rs b/components/engine_rocks/src/config.rs index e121a1cea18f..d55c5cb3dfc4 100644 --- a/components/engine_rocks/src/config.rs +++ b/components/engine_rocks/src/config.rs @@ -340,9 +340,9 @@ pub enum BlobRunMode { impl From for ConfigValue { fn from(mode: BlobRunMode) -> ConfigValue { let str_value = match mode { - BlobRunMode::Normal => "normal", - BlobRunMode::ReadOnly => "read-only", - BlobRunMode::Fallback => "fallback", + BlobRunMode::Normal => "kNormal", + BlobRunMode::ReadOnly => "kReadOnly", + BlobRunMode::Fallback => "kFallback", }; ConfigValue::String(str_value.into()) } @@ -366,8 +366,11 @@ impl FromStr for BlobRunMode { "normal" => Ok(BlobRunMode::Normal), "read-only" => Ok(BlobRunMode::ReadOnly), "fallback" => Ok(BlobRunMode::Fallback), + "kNormal" => Ok(BlobRunMode::Normal), + "kReadOnly" => Ok(BlobRunMode::ReadOnly), + "kFallback" => Ok(BlobRunMode::Fallback), m => Err(format!( - "expect: normal, read-only or fallback, got: {:?}", + "expect: normal, kNormal, read-only, kReadOnly, kFallback or fallback, got: {:?}", m )), } diff --git a/src/config/mod.rs b/src/config/mod.rs index 4c37ad0eafe5..6620a4852643 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5775,7 +5775,28 @@ mod tests { let diff = config_value_to_string(diff.into_iter().collect()); assert_eq!(diff.len(), 1); assert_eq!(diff[0].0.as_str(), "blob_run_mode"); - assert_eq!(diff[0].1.as_str(), "fallback"); + assert_eq!(diff[0].1.as_str(), "kFallback"); + } + + #[test] + fn test_update_titan_blob_run_mode_config() { + let mut cfg = TikvConfig::default(); + cfg.rocksdb.titan.enabled = true; + let (_, cfg_controller, ..) = new_engines::(cfg); + for run_mode in [ + "kFallback", + "kNormal", + "kReadOnly", + "fallback", + "normal", + "read-only", + ] { + let change = HashMap::from([( + "rocksdb.defaultcf.titan.blob-run-mode".to_string(), + run_mode.to_string(), + )]); + cfg_controller.update_without_persist(change).unwrap(); + } } #[test] From 2631094f5afb3d112d18b32ee1c9f79a68c73216 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Fri, 17 Nov 2023 13:59:18 +0800 Subject: [PATCH 149/203] Copr: do not change expression type for `Lower` (#16010) close tikv/tikv#16009 Copr: do not change expression type for `Lower` Signed-off-by: gengliqi --- components/tidb_query_expr/src/impl_string.rs | 37 ------------------- components/tidb_query_expr/src/lib.rs | 26 ++----------- 2 files changed, 3 insertions(+), 60 deletions(-) diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index 25c9294d533a..fb330f91e03b 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -2941,43 +2941,6 @@ mod tests { #[test] fn test_lower() { - // Test non-binary string case - let cases = vec![ - (Some(b"HELLO".to_vec()), Some(b"hello".to_vec())), - (Some(b"123".to_vec()), Some(b"123".to_vec())), - ( - Some("CAFÉ".as_bytes().to_vec()), - Some("café".as_bytes().to_vec()), - ), - ( - Some("数据库".as_bytes().to_vec()), - Some("数据库".as_bytes().to_vec()), - ), - ( - Some("НОЧЬ НА ОКРАИНЕ МОСКВЫ".as_bytes().to_vec()), - Some("ночь на окраине москвы".as_bytes().to_vec()), - ), - ( - Some("قاعدة البيانات".as_bytes().to_vec()), - Some("قاعدة البيانات".as_bytes().to_vec()), - ), - (None, None), - ]; - - for (arg, exp) in cases { - let output = RpnFnScalarEvaluator::new() - .push_param_with_field_type( - arg.clone(), - FieldTypeBuilder::new() - .tp(FieldTypeTp::VarString) - .charset(CHARSET_UTF8MB4) - .build(), - ) - .evaluate(ScalarFuncSig::Lower) - .unwrap(); - assert_eq!(output, exp); - } - // Test binary string case let cases = vec![ (Some(b"hello".to_vec()), Some(b"hello".to_vec())), diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 61fb3612b632..f1aae1de746f 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -357,27 +357,7 @@ pub fn map_unary_minus_int_func(value: ScalarFuncSig, children: &[Expr]) -> Resu } } -fn map_lower_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { - if children.len() != 1 { - return Err(other_err!( - "ScalarFunction {:?} (params = {}) is not supported in batch mode", - value, - children.len() - )); - } - if children[0].get_field_type().is_binary_string_like() { - Ok(lower_fn_meta()) - } else { - let ret_field_type = children[0].get_field_type(); - Ok(match_template_charset! { - TT, match Charset::from_name(ret_field_type.get_charset()).map_err(tidb_query_datatype::codec::Error::from)? { - Charset::TT => lower_utf8_fn_meta::(), - } - }) - } -} - -fn map_upper_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { +fn map_upper_utf8_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { if children.len() != 1 { return Err(other_err!( "ScalarFunction {:?} (params = {}) is not supported in batch mode", @@ -787,10 +767,10 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::Insert => insert_fn_meta(), ScalarFuncSig::InsertUtf8 => insert_utf8_fn_meta(), ScalarFuncSig::RightUtf8 => right_utf8_fn_meta(), - ScalarFuncSig::UpperUtf8 => map_upper_sig(value, children)?, + ScalarFuncSig::UpperUtf8 => map_upper_utf8_sig(value, children)?, ScalarFuncSig::Upper => upper_fn_meta(), - ScalarFuncSig::Lower => map_lower_sig(value, children)?, ScalarFuncSig::LowerUtf8 => map_lower_utf8_sig(value, children)?, + ScalarFuncSig::Lower => lower_fn_meta(), ScalarFuncSig::Locate2Args => locate_2_args_fn_meta(), ScalarFuncSig::Locate3Args => locate_3_args_fn_meta(), ScalarFuncSig::FieldInt => field_fn_meta::(), From eb7b1437714cb08cc185cebbfbd2cdc24e627343 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 20 Nov 2023 20:29:40 +0800 Subject: [PATCH 150/203] titan: update titan to fix incorrect blob file size and change default value (#16020) close tikv/tikv#15971 fix titan incorrect blob file size metric and change default value Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- etc/config-template.toml | 8 ++++---- src/config/mod.rs | 9 ++++++--- tests/integrations/config/mod.rs | 20 ++++++++------------ tests/integrations/config/test-custom.toml | 4 ++-- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 390b748cddd0..8ca813e39887 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2848,7 +2848,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2867,7 +2867,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "bzip2-sys", "cc", @@ -4707,7 +4707,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/etc/config-template.toml b/etc/config-template.toml index 22e606939215..cfa8e30af484 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -673,8 +673,8 @@ # enabled = false ## Maximum number of threads of `Titan` background gc jobs. -## default: 4 -# max-background-gc = 4 +## default: 1 +# max-background-gc = 1 ## Options for "Default" Column Family, which stores actual user data. [rocksdb.defaultcf] @@ -936,8 +936,8 @@ ## lz4: kLZ4Compression ## lz4hc: kLZ4HCCompression ## zstd: kZSTD -## default: lz4 -# blob-file-compression = "lz4" +## default: zstd +# blob-file-compression = "zstd" ## Set blob file zstd dictionary compression, default(0) will use zstd compression. ## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally, diff --git a/src/config/mod.rs b/src/config/mod.rs index 6620a4852643..1c29c0637eed 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -150,6 +150,7 @@ pub struct TitanCfConfig { #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "Titan doesn't need to sample anymore"] pub sample_ratio: Option, #[online_config(skip)] pub merge_small_file_threshold: ReadableSize, @@ -160,18 +161,19 @@ pub struct TitanCfConfig { pub range_merge: bool, #[online_config(skip)] pub max_sorted_runs: i32, - // deprecated. #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The feature is removed"] pub gc_merge_rewrite: bool, } impl Default for TitanCfConfig { + #[allow(deprecated)] fn default() -> Self { Self { min_blob_size: ReadableSize::kb(1), // disable titan default - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), @@ -214,6 +216,7 @@ impl TitanCfConfig { opts } + #[allow(deprecated)] fn validate(&self) -> Result<(), Box> { if self.gc_merge_rewrite { return Err( @@ -1211,7 +1214,7 @@ impl Default for TitanDbConfig { enabled: false, dirname: "".to_owned(), disable_gc: false, - max_background_gc: 4, + max_background_gc: 1, purge_obsolete_files_period: ReadableDuration::secs(10), } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 180ef37a94a8..3afcac261a4b 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -271,19 +271,18 @@ fn test_serde_custom_tikv_config() { value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { min_blob_size: ReadableSize(2018), - blob_file_compression: CompressionType::Zstd, + blob_file_compression: CompressionType::Lz4, zstd_dict_size: ReadableSize::kb(16), blob_cache_size: ReadableSize::gb(12), min_gc_batch_size: ReadableSize::kb(12), max_gc_batch_size: ReadableSize::mb(12), discardable_ratio: 0.00156, - sample_ratio: None, merge_small_file_threshold: ReadableSize::kb(21), blob_run_mode: BlobRunMode::Fallback, level_merge: true, range_merge: true, max_sorted_runs: 100, - gc_merge_rewrite: false, + ..Default::default() }; let titan_db_config = TitanDbConfig { enabled: true, @@ -432,19 +431,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, @@ -507,19 +505,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, // default value level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, @@ -582,19 +579,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, // default value level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 0fe5df168cc1..94184def8fbf 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -348,7 +348,7 @@ periodic-compaction-seconds = "10d" [rocksdb.defaultcf.titan] min-blob-size = "2018B" -blob-file-compression = "zstd" +blob-file-compression = "lz4" zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" @@ -609,7 +609,7 @@ max-compactions = 3 [raftdb.defaultcf.titan] min-blob-size = "2018B" -blob-file-compression = "zstd" +blob-file-compression = "lz4" zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" From 9993a63993e721105a2d52b949cd60481967173f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 22 Nov 2023 12:31:11 +0800 Subject: [PATCH 151/203] Dockerfile: update base image (#16033) close tikv/tikv#16032 Signed-off-by: Neil Shen --- Dockerfile.FIPS | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Dockerfile.FIPS b/Dockerfile.FIPS index fe34ab00f656..03195d4cf5bb 100644 --- a/Dockerfile.FIPS +++ b/Dockerfile.FIPS @@ -1,21 +1,22 @@ # This Docker image contains a minimal build environment for a FIPS compliant TiKV. -FROM redhat/ubi8-minimal:8.6 as builder +FROM rockylinux:9 as builder -RUN microdnf install -y openssl-devel +RUN dnf install -y openssl-devel -RUN microdnf install -y \ +RUN dnf install -y \ gcc \ gcc-c++ \ - libstdc++-static \ make \ cmake \ perl \ git \ findutils \ curl \ - python3 && \ - microdnf clean all + python3 --allowerasing && \ + dnf --enablerepo=crb install -y \ + libstdc++-static && \ + dnf clean all # Install Rustup RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y @@ -32,12 +33,13 @@ ENV ENABLE_FIPS 1 RUN make build_dist_release # Export to a clean image -FROM redhat/ubi8-minimal:8.6 -COPY --from=builder /tikv/target/release/tikv-server /tikv-server -COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl +FROM rockylinux:9-minimal RUN microdnf install -y openssl +COPY --from=builder /tikv/target/release/tikv-server /tikv-server +COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl + EXPOSE 20160 20180 ENTRYPOINT ["/tikv-server"] From 86d4a49848cd920e081c2cf0249977149835d1c5 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 22 Nov 2023 15:08:41 +0800 Subject: [PATCH 152/203] raftstore: check stale peer on leader missing (#16038) close tikv/tikv#11847, close tikv/tikv#15520, close pingcap/tidb#39130 Stale peers can impede TiKV store resolved ts and impact RTO for essential functions. Default 2-hour interval for stale peer check is insufficient for stale reads, flashbacks, and ebs backup. To mitigate this, we speed up stale read check by allowing TiKV to check for stale peers every 10 minutes in the event that a leader is missing. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 33 +++++++---- .../raftstore/src/store/local_metrics.rs | 5 +- components/raftstore/src/store/metrics.rs | 5 ++ components/raftstore/src/store/peer.rs | 14 +++-- metrics/grafana/tikv_details.json | 9 +++ tests/integrations/raftstore/test_merge.rs | 59 +++++++++++++++++++ 6 files changed, 107 insertions(+), 18 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 942514153c7a..ee2daf1c3c8f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6395,19 +6395,26 @@ where fail_point!("peer_check_stale_state", state != StaleState::Valid, |_| {}); match state { StaleState::Valid => (), - StaleState::LeaderMissing => { - warn!( - "leader missing longer than abnormal_leader_missing_duration"; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - "expect" => %self.ctx.cfg.abnormal_leader_missing_duration, - ); - self.ctx - .raft_metrics - .leader_missing - .lock() - .unwrap() - .insert(self.region_id()); + StaleState::LeaderMissing | StaleState::MaybeLeaderMissing => { + if state == StaleState::LeaderMissing { + warn!( + "leader missing longer than abnormal_leader_missing_duration"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "expect" => %self.ctx.cfg.abnormal_leader_missing_duration, + ); + self.ctx + .raft_metrics + .leader_missing + .lock() + .unwrap() + .insert(self.region_id()); + } + + // It's very likely that this is a stale peer. To prevent + // resolved ts from being blocked for too long, we check stale + // peer eagerly. + self.fsm.peer.bcast_check_stale_peer_message(self.ctx); } StaleState::ToValidate => { // for peer B in case 1 above diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index aceacdb81ee6..7207ac7869d6 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, Mutex}; use collections::HashSet; -use prometheus::local::LocalHistogram; +use prometheus::local::{LocalHistogram, LocalIntCounter}; use raft::eraftpb::MessageType; use tikv_util::time::{Duration, Instant}; use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; @@ -135,6 +135,7 @@ pub struct RaftMetrics { // local statistics for slowness pub stat_commit_log: RaftCommitLogStatistics, + pub check_stale_peer: LocalIntCounter, pub leader_missing: Arc>>, last_flush_time: Instant, @@ -172,6 +173,7 @@ impl RaftMetrics { wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), stat_commit_log: RaftCommitLogStatistics::default(), + check_stale_peer: CHECK_STALE_PEER_COUNTER.local(), leader_missing: Arc::default(), last_flush_time: Instant::now_coarse(), } @@ -211,6 +213,7 @@ impl RaftMetrics { self.wf_commit_not_persist_log.flush(); } + self.check_stale_peer.flush(); let mut missing = self.leader_missing.lock().unwrap(); LEADER_MISSING.set(missing.len() as i64); missing.clear(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 8f7bc8af2264..c2aff17a907b 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -659,6 +659,11 @@ lazy_static! { "Total number of leader missed region." ).unwrap(); + pub static ref CHECK_STALE_PEER_COUNTER: IntCounter = register_int_counter!( + "tikv_raftstore_check_stale_peer", + "Total number of checking stale peers." + ).unwrap(); + pub static ref INGEST_SST_DURATION_SECONDS: Histogram = register_histogram!( "tikv_snapshot_ingest_sst_duration_seconds", diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 90676411bfca..17d8e51f4cf0 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -127,6 +127,7 @@ pub enum StaleState { Valid, ToValidate, LeaderMissing, + MaybeLeaderMissing, } #[derive(Debug)] @@ -2117,7 +2118,6 @@ where self.leader_missing_time = None; return StaleState::Valid; } - let naive_peer = !self.is_initialized() || !self.raft_group.raft.promotable(); // Updates the `leader_missing_time` according to the current state. // // If we are checking this it means we suspect the leader might be missing. @@ -2137,13 +2137,18 @@ where StaleState::ToValidate } Some(instant) - if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0 - && !naive_peer => + if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0 => { // A peer is considered as in the leader missing state // if it's initialized but is isolated from its leader or // something bad happens that the raft group can not elect a leader. - StaleState::LeaderMissing + if self.is_initialized() && self.raft_group.raft.promotable() { + StaleState::LeaderMissing + } else { + // Uninitialized peer and learner may not have leader info, + // even if there is a valid leader. + StaleState::MaybeLeaderMissing + } } _ => StaleState::Valid, } @@ -5440,6 +5445,7 @@ where &mut self, ctx: &mut PollContext, ) { + ctx.raft_metrics.check_stale_peer.inc(); if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() || self.region().get_region_epoch().get_conf_ver() == 0 { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3efa0e310662..fab335a8fd27 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40597,6 +40597,15 @@ "legendFormat": "{{instance}}-{{reason}}", "refId": "A", "step": 10 + }, + { + "expr": "sum(delta(tikv_raftstore_check_stale_peer{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}-stale-peer", + "refId": "B", + "step": 10 } ], "thresholds": [], diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 7d964c033192..8482feb84817 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -464,6 +464,65 @@ fn test_node_gc_uninitialized_peer_after_merge() { cluster.must_region_not_exist(left.get_id(), 4); } +/// Test leader missing should issue check stale peer requests. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_gc_uninitialized_peer_after_merge_on_leader_missing() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + // Set a large max_leader_missing_duration so that check stale peer will + // only be triggered by leader missing. + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(1); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(left.get_id(), new_peer(4, 4)); + cluster.must_region_exist(left.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(left.get_id(), new_peer(4, 4)); + pd_client.must_merge(left.get_id(), right.get_id()); + cluster.clear_send_filters(); + + // Wait for the peer (4,4) to be destroyed. + sleep_ms( + 3 * cluster + .cfg + .raft_store + .abnormal_leader_missing_duration + .as_millis(), + ); + cluster.must_region_not_exist(left.get_id(), 4); +} + // Test if a merge handled properly when there is a unfinished slow split before // merge. // No v2, it requires all peers to be available to check trim status. From dce0e55ad79d91ce10a5c6a886c3264d75d7a5ac Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Wed, 22 Nov 2023 12:50:40 -0800 Subject: [PATCH 153/203] raftstore: make full compaction incremental, pause when load is high (#15995) ref tikv/tikv#15271 Makes full compaction incremental, by range. Currently regions' ranges are used as increments. Run a predicate ("load-check") function before starting full compaction and between each incremental range. If the function evaluates to false, pause with exponential backoff (up to a maximum duration) until it evaluates to true. If periodic full compaction is enabled, poll process CPU stats every 30 seconds to determine usage for the "load-check" function. If usage exceeds a certain threshold before full compaction starts, compaction will not be started, and if started, full compaction will be paused. This cpu usage is also exported as ``tikv_storage_process_stat_cpu_usage`` gauge metric. Signed-off-by: Alex Feinberg --- components/raftstore/src/store/fsm/store.rs | 121 ++++-- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/mod.rs | 6 +- components/raftstore/src/store/msg.rs | 2 + .../raftstore/src/store/worker/compact.rs | 275 ++++++++++-- .../raftstore/src/store/worker/metrics.rs | 15 + components/raftstore/src/store/worker/mod.rs | 5 +- metrics/grafana/tikv_details.json | 400 ++++++++++++++++++ 8 files changed, 772 insertions(+), 53 deletions(-) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 6227e28cd193..1f72bcd9c907 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -107,9 +107,10 @@ use crate::{ ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, SplitCheckTask, }, - Callback, CasualMessage, CompactThreshold, GlobalReplicationState, InspectedRaftMessage, - MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, SignificantMsg, SnapManager, - StoreMsg, StoreTick, + worker_metrics::PROCESS_STAT_CPU_USAGE, + Callback, CasualMessage, CompactThreshold, FullCompactController, GlobalReplicationState, + InspectedRaftMessage, MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, + SignificantMsg, SnapManager, StoreMsg, StoreTick, }, Error, Result, }; @@ -121,8 +122,11 @@ pub const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region // Every 30 minutes, check if we can run full compaction. This allows the config -// setting `periodic_full_compact_start_max_cpu` to be changed dynamically. +// setting `periodic_full_compact_start_times` to be changed dynamically. const PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION: Duration = Duration::from_secs(30 * 60); +// If periodic full compaction is enabled (`periodic_full_compact_start_times` +// is set), sample load metrics every 10 minutes. +const LOAD_STATS_WINDOW_DURATION: Duration = Duration::from_secs(10 * 60); pub struct StoreInfo { pub kv_engine: EK, @@ -582,6 +586,8 @@ where pub pending_latency_inspect: Vec, pub safe_point: Arc, + + pub process_stat: Option, } impl PollContext @@ -780,6 +786,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::CompactLockCf => self.on_compact_lock_cf(), StoreTick::CompactCheck => self.on_compact_check_tick(), StoreTick::PeriodicFullCompact => self.on_full_compact_tick(), + StoreTick::LoadMetricsWindow => self.on_load_metrics_window_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), } @@ -871,6 +878,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.register_cleanup_import_sst_tick(); self.register_compact_check_tick(); self.register_full_compact_tick(); + self.register_load_metrics_window_tick(); self.register_pd_store_heartbeat_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); @@ -1468,6 +1476,7 @@ where sync_write_worker, pending_latency_inspect: vec![], safe_point: self.safe_point.clone(), + process_stat: None, }; ctx.update_ticks_timeout(); let tag = format!("[store {}]", ctx.store.get_id()); @@ -1625,7 +1634,7 @@ impl RaftBatchSystem { } else { None }; - + let bgworker_remote = background_worker.remote(); let workers = Workers { pd_worker, background_worker, @@ -1663,7 +1672,7 @@ impl RaftBatchSystem { ReadRunner::new(self.router.clone(), engines.raft.clone()), ); - let compact_runner = CompactRunner::new(engines.kv.clone()); + let compact_runner = CompactRunner::new(engines.kv.clone(), bgworker_remote); let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), @@ -2450,6 +2459,26 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn register_load_metrics_window_tick(&self) { + // For now, we will only gather these metrics is periodic full compaction is + // enabled. + if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { + self.ctx + .schedule_store_tick(StoreTick::LoadMetricsWindow, LOAD_STATS_WINDOW_DURATION) + } + } + + fn on_load_metrics_window_tick(&mut self) { + self.register_load_metrics_window_tick(); + + let proc_stat = self + .ctx + .process_stat + .get_or_insert_with(|| ProcessStat::cur_proc_stat().unwrap()); + let cpu_usage: f64 = proc_stat.cpu_usage().unwrap(); + PROCESS_STAT_CPU_USAGE.set(cpu_usage); + } + fn register_full_compact_tick(&self) { if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { self.ctx.schedule_store_tick( @@ -2477,30 +2506,26 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER return; } - if self.ctx.global_stat.stat.is_busy.load(Ordering::SeqCst) { - warn!("full compaction may not run at this time, `is_busy` flag is true",); + let compact_predicate_fn = self.is_low_load_for_full_compact(); + // Do not start if the load is high. + if !compact_predicate_fn() { return; } - let mut proc_stats = ProcessStat::cur_proc_stat().unwrap(); - let cpu_usage = proc_stats.cpu_usage().unwrap(); - let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; - if cpu_usage > max_start_cpu_usage { - warn!( - "full compaction may not run at this time, cpu usage is above max"; - "cpu_usage" => cpu_usage, - "threshold" => max_start_cpu_usage, - ); - return; - } + let ranges = self.ranges_for_full_compact(); + + let compact_load_controller = + FullCompactController::new(1, 15 * 60, Box::new(compact_predicate_fn)); // Attempt executing a periodic full compaction. - // Note that full compaction will not run if other compaction tasks are running. - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::Compact(CompactTask::PeriodicFullCompact)) - { + // Note that full compaction will not run if another full compact tasks has + // started. + if let Err(e) = self.ctx.cleanup_scheduler.schedule(CleanupTask::Compact( + CompactTask::PeriodicFullCompact { + ranges, + compact_load_controller, + }, + )) { error!( "failed to schedule a periodic full compaction"; "store_id" => self.fsm.store.id, @@ -2509,6 +2534,52 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + /// Use ranges assigned to each region as increments for full compaction. + fn ranges_for_full_compact(&self) -> Vec<(Vec, Vec)> { + let meta = self.ctx.store_meta.lock().unwrap(); + let mut ranges = Vec::with_capacity(meta.regions.len()); + + for region in meta.regions.values() { + let start_key = keys::enc_start_key(region); + let end_key = keys::enc_end_key(region); + ranges.push((start_key, end_key)) + } + ranges + } + + /// Returns a predicate `Fn` which is evaluated: + /// 1. Before full compaction runs: if `false`, we return and wait for the + /// next full compaction tick + /// (`PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION`) before starting. If + /// true, we begin full compaction, which means the first incremental range + /// will be compactecd. See: ``StoreFsmDelegate::on_full_compact_tick`` + /// in this file. + /// + /// 2. After each incremental range finishes and before next one (if any) + /// starts. If `false`, we pause compaction and wait. See: + /// `CompactRunner::full_compact` in `worker/compact.rs`. + fn is_low_load_for_full_compact(&self) -> impl Fn() -> bool { + let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; + let global_stat = self.ctx.global_stat.clone(); + move || { + if global_stat.stat.is_busy.load(Ordering::SeqCst) { + warn!("full compaction may not run at this time, `is_busy` flag is true",); + return false; + } + + let cpu_usage = PROCESS_STAT_CPU_USAGE.get(); + if cpu_usage > max_start_cpu_usage { + warn!( + "full compaction may not run at this time, cpu usage is above max"; + "cpu_usage" => cpu_usage, + "threshold" => max_start_cpu_usage, + ); + return false; + } + true + } + } + fn register_compact_check_tick(&self) { self.ctx.schedule_store_tick( StoreTick::CompactCheck, diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c2aff17a907b..d8282cfa486b 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -215,6 +215,7 @@ make_static_metric! { pub label_enum RaftEventDurationType { compact_check, periodic_full_compact, + load_metrics_window, pd_store_heartbeat, snap_gc, compact_lock_cf, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 4cae84d1d25d..123289c2057e 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -86,9 +86,9 @@ pub use self::{ worker::{ metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, BucketRange, BucketStatsInfo, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, - CompactThreshold, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, - LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, - ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + CompactThreshold, FlowStatistics, FlowStatsReporter, FullCompactController, KeyEntry, + LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 249c550db148..bf3a09986470 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -436,6 +436,7 @@ impl PeerTick { pub enum StoreTick { CompactCheck, PeriodicFullCompact, + LoadMetricsWindow, PdStoreHeartbeat, SnapGc, CompactLockCf, @@ -454,6 +455,7 @@ impl StoreTick { StoreTick::CompactLockCf => RaftEventDurationType::compact_lock_cf, StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, + StoreTick::LoadMetricsWindow => RaftEventDurationType::load_metrics_window, } } } diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index abdbaf5e938d..45fd7e586e74 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -4,19 +4,33 @@ use std::{ collections::VecDeque, error::Error as StdError, fmt::{self, Display, Formatter}, + sync::atomic::{AtomicBool, Ordering}, + time::Duration, }; use engine_traits::{KvEngine, RangeStats, CF_WRITE}; use fail::fail_point; +use futures_util::compat::Future01CompatExt; use thiserror::Error; -use tikv_util::{box_try, error, info, time::Instant, warn, worker::Runnable}; +use tikv_util::{ + box_try, debug, error, info, time::Instant, timer::GLOBAL_TIMER_HANDLE, warn, worker::Runnable, +}; +use yatp::Remote; -use super::metrics::{COMPACT_RANGE_CF, FULL_COMPACT}; +use super::metrics::{ + COMPACT_RANGE_CF, FULL_COMPACT, FULL_COMPACT_INCREMENTAL, FULL_COMPACT_PAUSE, +}; type Key = Vec; +static FULL_COMPACTION_IN_PROCESS: AtomicBool = AtomicBool::new(false); + pub enum Task { - PeriodicFullCompact, + PeriodicFullCompact { + // Ranges, or empty if we wish to compact the entire store + ranges: Vec<(Key, Key)>, + compact_load_controller: FullCompactController, + }, Compact { cf_name: String, @@ -34,6 +48,65 @@ pub enum Task { }, } +type CompactPredicateFn = Box bool + Send + Sync>; + +pub struct FullCompactController { + /// Initial delay between retries for ``FullCompactController::pause``. + pub initial_pause_duration_secs: u64, + /// Max delay between retries. + pub max_pause_duration_secs: u64, + /// Predicate function to evaluate that indicates if we can proceed with + /// full compaction. + pub incremental_compaction_pred: CompactPredicateFn, +} + +impl fmt::Debug for FullCompactController { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("FullCompactController") + .field( + "initial_pause_duration_secs", + &self.initial_pause_duration_secs, + ) + .field("max_pause_duration_secs", &self.max_pause_duration_secs) + .finish() + } +} +impl FullCompactController { + pub fn new( + initial_pause_duration_secs: u64, + max_pause_duration_secs: u64, + incremental_compaction_pred: CompactPredicateFn, + ) -> Self { + Self { + initial_pause_duration_secs, + max_pause_duration_secs, + incremental_compaction_pred, + } + } + + /// Pause until `incremental_compaction_pred` evaluates to `true`: delay + /// using exponential backoff (initial value + /// `initial_pause_duration_secs`, max value `max_pause_duration_secs`) + /// between retries. + pub async fn pause(&self) -> Result<(), Error> { + let mut duration_secs = self.initial_pause_duration_secs; + loop { + box_try!( + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + Duration::from_secs(duration_secs)) + .compat() + .await + ); + if (self.incremental_compaction_pred)() { + break; + }; + duration_secs = self.max_pause_duration_secs.max(duration_secs * 2); + } + Ok(()) + } +} + +#[derive(Debug)] pub struct CompactThreshold { pub tombstones_num_threshold: u64, pub tombstones_percent_threshold: u64, @@ -60,7 +133,24 @@ impl CompactThreshold { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Task::PeriodicFullCompact => f.debug_struct("FullCompact").finish(), + Task::PeriodicFullCompact { + ref ranges, + ref compact_load_controller, + } => f + .debug_struct("PeriodicFullCompact") + .field( + "ranges", + &( + ranges + .first() + .map(|k| log_wrappers::Value::key(k.0.as_slice())), + ranges + .last() + .map(|k| log_wrappers::Value::key(k.1.as_slice())), + ), + ) + .field("compact_load_controller", compact_load_controller) + .finish(), Task::Compact { ref cf_name, ref start_key, @@ -120,33 +210,83 @@ pub enum Error { pub struct Runner { engine: E, + remote: Remote, } impl Runner where E: KvEngine, { - pub fn new(engine: E) -> Runner { - Runner { engine } + pub fn new(engine: E, remote: Remote) -> Runner { + Runner { engine, remote } } /// Periodic full compaction. + /// Note: this does not accept a `&self` due to async lifetime issues. /// - /// NOTE this is a highly experimental feature! + /// NOTE this is an experimental feature! /// - /// TODO: Do not start if there is heavy I/O. - /// TODO: Make it possible to rate limit, pause, or abort this by compacting - /// a range at a time. - pub fn full_compact(&mut self) -> Result<(), Error> { + /// TODO: Support stopping a full compaction. + async fn full_compact( + engine: E, + ranges: Vec<(Key, Key)>, + compact_controller: FullCompactController, + ) -> Result<(), Error> { fail_point!("on_full_compact"); info!("full compaction started"); + let mut ranges: VecDeque<_> = ranges + .iter() + .map(|(start, end)| (Some(start.as_slice()), Some(end.as_slice()))) + .collect(); + if ranges.is_empty() { + ranges.push_front((None, None)) + } + let timer = Instant::now(); let full_compact_timer = FULL_COMPACT.start_coarse_timer(); - box_try!(self.engine.compact_range( - None, None, // Compact the entire key range. - true, // no other compaction will run when this is running - 1, // number of threads threads - )); + + while let Some(range) = ranges.pop_front() { + debug!( + "incremental range full compaction started"; + "start_key" => ?range.0.map(log_wrappers::Value::key), + "end_key" => ?range.1.map(log_wrappers::Value::key), + ); + let incremental_timer = FULL_COMPACT_INCREMENTAL.start_coarse_timer(); + box_try!(engine.compact_range( + range.0, range.1, // Compact the entire key range. + false, // non-exclusive + 1, // number of threads threads + )); + incremental_timer.observe_duration(); + debug!( + "finished incremental range full compaction"; + "remaining" => ranges.len(), + ); + // If there is at least one range remaining in `ranges` remaining, evaluate + // `compact_controller.incremental_compaction_pred`. If `true`, proceed to next + // range; otherwise, pause this task + // (see `FullCompactController::pause` for details) until predicate + // evaluates to true. + if let Some(next_range) = ranges.front() { + if !(compact_controller.incremental_compaction_pred)() { + info!("pausing full compaction before next increment"; + "finished_start_key" => ?range.0.map(log_wrappers::Value::key), + "finished_end_key" => ?range.1.map(log_wrappers::Value::key), + "next_range_start_key" => ?next_range.0.map(log_wrappers::Value::key), + "next_range_end_key" => ?next_range.1.map(log_wrappers::Value::key), + "remaining" => ranges.len(), + ); + let pause_started = Instant::now(); + let pause_timer = FULL_COMPACT_PAUSE.start_coarse_timer(); + compact_controller.pause().await?; + pause_timer.observe_duration(); + info!("resuming incremental full compaction"; + "paused" => ?pause_started.saturating_elapsed(), + ); + } + } + } + full_compact_timer.observe_duration(); info!( "full compaction finished"; @@ -191,10 +331,28 @@ where fn run(&mut self, task: Task) { match task { - Task::PeriodicFullCompact => { - if let Err(e) = self.full_compact() { - error!("periodic full compaction failed"; "err" => %e); - } + Task::PeriodicFullCompact { + ranges, + compact_load_controller, + } => { + // Since periodic full compaction is submitted as a task to the background + // worker pool, verify we will not start full compaction if + // another full compaction is running in the background. + if FULL_COMPACTION_IN_PROCESS.load(Ordering::SeqCst) + || FULL_COMPACTION_IN_PROCESS.swap(true, Ordering::SeqCst) + { + info!("full compaction is already in process, not starting"); + return; + }; + let engine = self.engine.clone(); + self.remote.spawn(async move { + if let Err(e) = + Self::full_compact(engine, ranges, compact_load_controller).await + { + error!("periodic full compaction failed"; "err" => %e); + } + FULL_COMPACTION_IN_PROCESS.store(false, Ordering::SeqCst); + }); } Task::Compact { cf_name, @@ -315,10 +473,19 @@ mod tests { }; use keys::data_key; use tempfile::Builder; + use tikv_util::yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}; use txn_types::{Key, TimeStamp, Write, WriteType}; use super::*; + fn make_compact_runner(engine: E) -> (FuturePool, Runner) + where + E: KvEngine, + { + let pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); + (pool.clone(), Runner::new(engine, pool.remote().clone())) + } + #[test] fn test_compact_range() { let path = Builder::new() @@ -326,8 +493,7 @@ mod tests { .tempdir() .unwrap(); let db = new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT]).unwrap(); - - let mut runner = Runner::new(db.clone()); + let (_pool, mut runner) = make_compact_runner(db.clone()); // Generate the first SST file. let mut wb = db.write_batch(); @@ -494,7 +660,7 @@ mod tests { fn test_full_compact_deletes() { let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); let engine = open_db(tmp_dir.path().to_str().unwrap()); - let mut runner = Runner::new(engine.clone()); + let (_pool, mut runner) = make_compact_runner(engine.clone()); // mvcc_put 0..5 for i in 0..5 { @@ -522,7 +688,68 @@ mod tests { .unwrap(); assert_eq!(stats.num_entries - stats.num_versions, 5); - runner.run(Task::PeriodicFullCompact); + runner.run(Task::PeriodicFullCompact { + ranges: Vec::new(), + compact_load_controller: FullCompactController::new(0, 0, Box::new(|| true)), + }); + std::thread::sleep(Duration::from_millis(500)); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 0); + } + + #[test] + fn test_full_compact_incremental_pausable() { + let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); + let engine = open_db(tmp_dir.path().to_str().unwrap()); + let (_pool, mut runner) = make_compact_runner(engine.clone()); + + // mvcc_put 0..100 + for i in 0..100 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let (start, end) = (data_key(b"k0"), data_key(b"k5")); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries, stats.num_versions); + + for i in 0..100 { + let k = format!("k{}", i); + delete(&engine, k.as_bytes(), 3.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 100); + + let started_at = Instant::now(); + let pred_fn: CompactPredicateFn = + Box::new(move || Instant::now() - started_at > Duration::from_millis(500)); + let ranges = vec![ + (data_key(b"k0"), data_key(b"k25")), + (data_key(b"k25"), data_key(b"k50")), + (data_key(b"k50"), data_key(b"k100")), + ]; + runner.run(Task::PeriodicFullCompact { + ranges, + compact_load_controller: FullCompactController::new(1, 5, pred_fn), + }); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 100); + std::thread::sleep(Duration::from_secs(2)); let stats = engine .get_range_stats(CF_WRITE, &start, &end) .unwrap() diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index bdf244590112..2b10bc3e053e 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -165,6 +165,21 @@ lazy_static! { "Bucketed histogram of full compaction for the storage." ) .unwrap(); + pub static ref FULL_COMPACT_INCREMENTAL: Histogram = register_histogram!( + "tikv_storage_full_compact_increment_duration_seconds", + "Bucketed histogram of full compaction increments for the storage." + ) + .unwrap(); + pub static ref FULL_COMPACT_PAUSE: Histogram = register_histogram!( + "tikv_storage_full_compact_pause_duration_seconds", + "Bucketed histogram of full compaction pauses for the storage." + ) + .unwrap(); + pub static ref PROCESS_STAT_CPU_USAGE: Gauge = register_gauge!( + "tikv_storage_process_stat_cpu_usage", + "CPU usage measured over a 30 second window", + ) + .unwrap(); pub static ref REGION_HASH_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_hash_duration_seconds", "Bucketed histogram of raftstore hash computation duration" diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index c67832385202..c47461d62ff3 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -23,7 +23,10 @@ pub use self::{ cleanup::{Runner as CleanupRunner, Task as CleanupTask}, cleanup_snapshot::{Runner as GcSnapshotRunner, Task as GcSnapshotTask}, cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, - compact::{need_compact, CompactThreshold, Runner as CompactRunner, Task as CompactTask}, + compact::{ + need_compact, CompactThreshold, FullCompactController, Runner as CompactRunner, + Task as CompactTask, + }, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index fab335a8fd27..b5d45d2fea0a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -19282,6 +19282,406 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "hiddenSeries": false, + "id": 24763574239, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Full compaction duration seconds", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + }, + "timeFrom": null, + "timeShift": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "hiddenSeries": false, + "id": 24763574241, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_pause_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Full compaction pause duration ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "hiddenSeries": false, + "id": 24763574240, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_increment_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Full compaction per-increment duration ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 24763574242, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "tikv_storage_process_stat_cpu_usage", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process Stat Cpu Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, From 1b097636c91c7fc7556f3f3f4b850057eeeb4576 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 22 Nov 2023 20:26:41 -0800 Subject: [PATCH 154/203] update default value of region_compact_redundant_rows_percent (#16051) ref tikv/tikv#15282 Change the default value of raftstore.redundant-rows-percent-threshold to 20 from 100. This would triggers a compaction when a region has 20% entries as stale MVCC versions. Signed-off-by: tonyxuqqi Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore/src/store/config.rs | 11 +---------- etc/config-template.toml | 2 +- src/config/mod.rs | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index facaa1514d84..62de6b57fafe 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -440,7 +440,7 @@ impl Default for Config { region_compact_min_tombstones: 10000, region_compact_tombstones_percent: 30, region_compact_min_redundant_rows: 50000, - region_compact_redundant_rows_percent: None, + region_compact_redundant_rows_percent: Some(20), pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), // Disable periodic full compaction by default. @@ -630,15 +630,6 @@ impl Config { } } - if self.region_compact_redundant_rows_percent.is_none() { - if raft_kv_v2 { - self.region_compact_redundant_rows_percent = Some(20); - } else { - // Disable redundant rows check in default for v1. - self.region_compact_redundant_rows_percent = Some(100); - } - } - // When use raft kv v2, we can set raft log gc size limit to a smaller value to // avoid too many entry logs in cache. // The snapshot support to increment snapshot sst, so the old snapshot files diff --git a/etc/config-template.toml b/etc/config-template.toml index cfa8e30af484..9e253e8ff79d 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -456,7 +456,7 @@ ## It should be set between 1 and 100. Manual compaction is only triggered when the number of ## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys ## exceeds `region-compact-redundant-rows-percent`. -# region-compact-redundant-rows-percent = 100 +# region-compact-redundant-rows-percent = 20 ## Interval to check whether to start a manual compaction for Lock Column Family. ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will diff --git a/src/config/mod.rs b/src/config/mod.rs index 1c29c0637eed..27f38abee4a7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -7058,7 +7058,7 @@ mod tests { cfg.raft_store .region_compact_redundant_rows_percent .unwrap(), - 100 + 20 ); let content = r#" From 5ada40f64fd51796bf69aa0f676c954cc5a5599f Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 23 Nov 2023 16:11:42 +0800 Subject: [PATCH 155/203] raftstore: backport the detection of network-io from raftstore-v2. (#15908) ref tikv/tikv#15909 Make raftstore perspect the jitters of network-io by backporting the implementation from raftstore-v2. Signed-off-by: lucasliang --- components/raftstore/src/store/config.rs | 9 ++ components/raftstore/src/store/fsm/store.rs | 10 +- components/raftstore/src/store/peer.rs | 15 +- components/raftstore/src/store/worker/pd.rs | 171 ++++++++++++-------- tests/integrations/config/mod.rs | 1 + 5 files changed, 134 insertions(+), 72 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 62de6b57fafe..73c0bb760bc5 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -357,6 +357,8 @@ pub struct Config { pub slow_trend_unsensitive_cause: f64, // The unsensitive(increase it to reduce sensitiveness) of the result-trend detection pub slow_trend_unsensitive_result: f64, + // The sensitiveness of slowness on network-io. + pub slow_trend_network_io_factor: f64, // Interval to report min resolved ts, if it is zero, it means disabled. pub report_min_resolved_ts_interval: ReadableDuration, @@ -521,6 +523,7 @@ impl Default for Config { // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, + slow_trend_network_io_factor: 0.0, report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), @@ -933,6 +936,12 @@ impl Config { )); } + if self.slow_trend_network_io_factor < 0.0 { + return Err(box_err!( + "slow_trend_network_io_factor must be greater than 0" + )); + } + Ok(()) } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 1f72bcd9c907..cfc0bb6e6a40 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -846,6 +846,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> mut inspector, } => { inspector.record_store_wait(send_time.saturating_elapsed()); + inspector.record_store_commit(self.ctx.raft_metrics.stat_commit_log.avg()); + // Reset the stat_commit_log and wait it to be refreshed in the next tick. + self.ctx.raft_metrics.stat_commit_log.reset(); self.ctx.pending_latency_inspect.push(inspector); } StoreMsg::UnsafeRecoveryReport(report) => self.store_heartbeat_pd(Some(report)), @@ -2841,16 +2844,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_wake_up_regions(&self, abnormal_stores: Vec) { info!("try to wake up all hibernated regions in this store"; "to_all" => abnormal_stores.is_empty()); + let store_id = self.ctx.store_id(); let meta = self.ctx.store_meta.lock().unwrap(); - for region_id in meta.regions.keys() { - let region = &meta.regions[region_id]; + + for (region_id, region) in &meta.regions { // Check whether the current region is not found on abnormal stores. If so, // this region is not the target to be awaken. if !region_on_stores(region, &abnormal_stores) { continue; } let peer = { - match find_peer(region, self.ctx.store_id()) { + match find_peer(region, store_id) { None => continue, Some(p) => p.clone(), } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 17d8e51f4cf0..185ab9d2a925 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1838,7 +1838,7 @@ where let has_snap_task = self.get_store().has_gen_snap_task(); let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.step(m)?; - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let mut for_balance = false; if !has_snap_task && self.get_store().has_gen_snap_task() { @@ -1883,7 +1883,7 @@ where } } - fn report_commit_log_duration(&self, pre_commit_index: u64, metrics: &RaftMetrics) { + fn report_commit_log_duration(&self, pre_commit_index: u64, metrics: &mut RaftMetrics) { if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } @@ -1903,10 +1903,15 @@ where &metrics.wf_commit_not_persist_log }; for tracker in trackers { - tracker.observe(now, hist, |t| { + // Collect the metrics related to commit_log + // durations. + let duration = tracker.observe(now, hist, |t| { t.metrics.commit_not_persisted = !commit_persisted; &mut t.metrics.wf_commit_log_nanos }); + metrics + .stat_commit_log + .record(Duration::from_nanos(duration)); } } } @@ -3125,7 +3130,7 @@ where let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.on_persist_ready(self.persisted_number); self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; self.mut_store().update_cache_persisted(persist_index); @@ -3170,7 +3175,7 @@ where let pre_commit_index = self.raft_group.raft.raft_log.committed; let mut light_rd = self.raft_group.advance_append(ready); self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; if self.is_in_force_leader() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 5e97adf8d3e6..b73198ecc75c 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -919,6 +919,78 @@ impl SlowScore { } } +struct SlowTrendStatistics { + net_io_factor: f64, + /// Detector to detect NetIo&DiskIo jitters. + slow_cause: Trend, + /// Reactor as an assistant detector to detect the QPS jitters. + slow_result: Trend, + slow_result_recorder: RequestPerSecRecorder, +} + +impl SlowTrendStatistics { + #[inline] + fn new(cfg: &Config) -> Self { + Self { + slow_cause: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(180), + Duration::from_secs(30), + Duration::from_secs(120), + Duration::from_secs(600), + 1, + tikv_util::time::duration_to_us(Duration::from_micros(500)), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_cause, + ), + slow_result: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(120), + Duration::from_secs(15), + Duration::from_secs(60), + Duration::from_secs(300), + 1, + 2000, + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_result, + ), + slow_result_recorder: RequestPerSecRecorder::new(), + net_io_factor: cfg.slow_trend_network_io_factor, /* FIXME: add extra parameter in + * Config to control it. */ + } + } + + #[inline] + fn record(&mut self, duration: RaftstoreDuration) { + // TODO: It's more appropriate to divide the factor into `Disk IO factor` and + // `Net IO factor`. + // Currently, when `network ratio == 1`, it summarizes all factors by `sum` + // simplily, approved valid to common cases when there exists IO jitters on + // Network or Disk. + let latency = || -> u64 { + if self.net_io_factor as u64 >= 1 { + return tikv_util::time::duration_to_us(duration.sum()); + } + let disk_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_disk_io(true)) as f64; + let network_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_net_io()) as f64; + (disk_io_latency + network_io_latency * self.net_io_factor) as u64 + }(); + self.slow_cause.record(latency, Instant::now()); + } +} + pub struct Runner where EK: KvEngine, @@ -949,9 +1021,7 @@ where snap_mgr: SnapManager, remote: Remote, slow_score: SlowScore, - slow_trend_cause: Trend, - slow_trend_result: Trend, - slow_trend_result_recorder: RequestPerSecRecorder, + slow_trend: SlowTrendStatistics, // The health status of the store is updated by the slow score mechanism. health_service: Option, @@ -1020,39 +1090,7 @@ where snap_mgr, remote, slow_score: SlowScore::new(cfg.inspect_interval.0), - slow_trend_cause: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(180), - Duration::from_secs(30), - Duration::from_secs(120), - Duration::from_secs(600), - 1, - tikv_util::time::duration_to_us(Duration::from_micros(500)), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_cause, - ), - slow_trend_result: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(120), - Duration::from_secs(15), - Duration::from_secs(60), - Duration::from_secs(300), - 1, - 2000, - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L1"]), - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_result, - ), - slow_trend_result_recorder: RequestPerSecRecorder::new(), + slow_trend: SlowTrendStatistics::new(cfg), health_service, curr_health_status: ServingStatus::Serving, coprocessor_host, @@ -1330,7 +1368,8 @@ where .engine_total_query_num .sub_query_stats(&self.store_stat.engine_last_query_num); let total_query_num = self - .slow_trend_result_recorder + .slow_trend + .slow_result_recorder .record_and_get_current_rps(res.get_all_query_num(), Instant::now()); stats.set_query_stats(res.0); @@ -1457,16 +1496,17 @@ where stats: &mut pdpb::StoreStats, total_query_num: Option, ) { - let slow_trend_cause_rate = self.slow_trend_cause.increasing_rate(); + let slow_trend_cause_rate = self.slow_trend.slow_cause.increasing_rate(); STORE_SLOW_TREND_GAUGE.set(slow_trend_cause_rate); let mut slow_trend = pdpb::SlowTrend::default(); slow_trend.set_cause_rate(slow_trend_cause_rate); - slow_trend.set_cause_value(self.slow_trend_cause.l0_avg()); + slow_trend.set_cause_value(self.slow_trend.slow_cause.l0_avg()); if let Some(total_query_num) = total_query_num { - self.slow_trend_result + self.slow_trend + .slow_result .record(total_query_num as u64, Instant::now()); - slow_trend.set_result_value(self.slow_trend_result.l0_avg()); - let slow_trend_result_rate = self.slow_trend_result.increasing_rate(); + slow_trend.set_result_value(self.slow_trend.slow_result.l0_avg()); + let slow_trend_result_rate = self.slow_trend.slow_result.increasing_rate(); slow_trend.set_result_rate(slow_trend_result_rate); STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_result_rate); STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(total_query_num); @@ -1479,23 +1519,25 @@ where } fn write_slow_trend_metrics(&mut self) { - STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend_cause.l0_avg()); - STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend_cause.l1_avg()); - STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend_cause.l2_avg()); - STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend_cause.l0_l1_rate()); - STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend_cause.l1_l2_rate()); - STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l1_margin_error_base()); - STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l2_margin_error_base()); + STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend.slow_cause.l0_avg()); + STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend.slow_cause.l1_avg()); + STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend.slow_cause.l2_avg()); + STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend.slow_cause.l0_l1_rate()); + STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend.slow_cause.l1_l2_rate()); + STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE + .set(self.slow_trend.slow_cause.l1_margin_error_base()); + STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE + .set(self.slow_trend.slow_cause.l2_margin_error_base()); // Report results of all slow Trends. - STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend_result.l0_avg()); - STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend_result.l1_avg()); - STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend_result.l2_avg()); - STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend_result.l0_l1_rate()); - STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend_result.l1_l2_rate()); + STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend.slow_result.l0_avg()); + STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend.slow_result.l1_avg()); + STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend.slow_result.l2_avg()); + STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend.slow_result.l0_l1_rate()); + STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend.slow_result.l1_l2_rate()); STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE - .set(self.slow_trend_result.l1_margin_error_base()); + .set(self.slow_trend.slow_result.l1_margin_error_base()); STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE - .set(self.slow_trend_result.l2_margin_error_base()); + .set(self.slow_trend.slow_result.l2_margin_error_base()); } fn handle_report_batch_split(&self, regions: Vec) { @@ -2264,10 +2306,7 @@ where // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. self.slow_score .record(id, duration.delays_on_disk_io(false)); - self.slow_trend_cause.record( - tikv_util::time::duration_to_us(duration.store_wait_duration.unwrap()), - Instant::now(), - ); + self.slow_trend.record(duration); } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTs { @@ -2296,7 +2335,7 @@ where { fn on_timeout(&mut self) { // Record a fairly great value when timeout - self.slow_trend_cause.record(500_000, Instant::now()); + self.slow_trend.slow_cause.record(500_000, Instant::now()); // The health status is recovered to serving as long as any tick // does not timeout. @@ -2335,8 +2374,6 @@ where let inspector = LatencyInspector::new( id, Box::new(move |id, duration| { - let dur = duration.sum(); - STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_process"]) .observe(tikv_util::time::duration_to_sec( @@ -2347,9 +2384,15 @@ where .observe(tikv_util::time::duration_to_sec( duration.store_wait_duration.unwrap(), )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_commit"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_commit_duration.unwrap(), + )); + STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["all"]) - .observe(tikv_util::time::duration_to_sec(dur)); + .observe(tikv_util::time::duration_to_sec(duration.sum())); if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { id, duration }) { warn!("schedule pd task failed"; "err" => ?e); } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 3afcac261a4b..4bb75e0a95f5 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -262,6 +262,7 @@ fn test_serde_custom_tikv_config() { check_request_snapshot_interval: ReadableDuration::minutes(1), slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, + slow_trend_network_io_factor: 0.0, enable_v2_compatible_learner: false, unsafe_disable_check_quorum: false, periodic_full_compact_start_times: ReadableSchedule::default(), From b23787ca7a44401157e8fcbf3a126dfaf3833af3 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 23 Nov 2023 16:37:11 +0800 Subject: [PATCH 156/203] readpool: fix pending tasks counter (#16031) ref tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/read_pool.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/read_pool.rs b/src/read_pool.rs index 32be95698da1..22a11cb2b41f 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -26,7 +26,6 @@ use tikv_util::{ worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; -use tracker::TrackedFuture; use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; use self::metrics::*; @@ -145,8 +144,9 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); + let running_tasks1 = running_tasks.clone(); if let Some(resource_ctl) = resource_ctl { - let fut = TrackedFuture::new(with_resource_limiter( + let fut = with_resource_limiter( ControlledFuture::new( async move { f.await; @@ -156,14 +156,20 @@ impl ReadPoolHandle { group_name, ), resource_limiter, - )); - remote.spawn_with_extras(fut, extras)?; + ); + remote.spawn_with_extras(fut, extras).map_err(|e| { + running_tasks1.dec(); + e + })?; } else { let fut = async move { f.await; running_tasks.dec(); }; - remote.spawn_with_extras(fut, extras)?; + remote.spawn_with_extras(fut, extras).map_err(|e| { + running_tasks1.dec(); + e + })?; } } } @@ -764,12 +770,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool( + let name = "test-yatp-full"; + let pool = build_yatp_read_pool_with_name( &config, DummyReporter, engine, None, CleanupMethod::InPlace, + name.to_owned(), None, ); @@ -805,6 +813,12 @@ mod tests { handle .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default(), None) .unwrap(); + assert_eq!( + UNIFIED_READ_POOL_RUNNING_TASKS + .with_label_values(&[name]) + .get(), + 2 + ); } #[test] From bc1ae3043718418b43ef69d0b04190c0de721278 Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 24 Nov 2023 15:23:43 +0800 Subject: [PATCH 157/203] pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837) ref tikv/tikv#15184 - The min-resolved-ts will report periodically, no need to do retires - support dynamic change `min-resolved-ts` report interval Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 1 - components/raftstore-v2/src/worker/pd/mod.rs | 15 ++---- .../tests/integrations/cluster.rs | 2 +- components/raftstore/src/store/config.rs | 9 ++-- components/raftstore/src/store/fsm/store.rs | 37 ++++++++++++-- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/msg.rs | 2 + components/raftstore/src/store/worker/pd.rs | 48 ++----------------- .../resolved_ts/tests/failpoints/mod.rs | 5 +- components/test_raftstore/src/util.rs | 12 +++++ etc/config-template.toml | 3 ++ tests/integrations/config/mod.rs | 2 +- tests/integrations/config/test-custom.toml | 2 +- 14 files changed, 69 insertions(+), 74 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 06ea6e9055da..80958e151d03 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -1098,9 +1098,7 @@ impl PdClient for RpcClient { }) as PdFuture<_> }; - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + self.pd_client.request(req, executor, NO_RETRY).execute() } fn report_region_buckets(&self, bucket_stat: &BucketStat, period: Duration) -> PdFuture<()> { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 23e419140123..73c4461024e1 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -811,7 +811,6 @@ impl StoreSystem { causal_ts_provider, workers.pd.scheduler(), auto_split_controller, - store_meta.lock().unwrap().region_read_progress.clone(), collector_reg_handle, grpc_service_mgr, self.logger.clone(), diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 77915dd0378e..7e07d26e61f7 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -14,9 +14,9 @@ use pd_client::{BucketStat, PdClient}; use raftstore::store::{ metrics::STORE_INSPECT_DURATION_HISTOGRAM, util::{KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, - AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, - RegionReadProgressRegistry, SplitInfo, StoreStatsReporter, TabletSnapManager, TxnExt, - WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, SplitInfo, + StoreStatsReporter, TabletSnapManager, TxnExt, WriteStats, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; use resource_metering::{Collector, CollectorRegHandle, RawRecords}; use service::service_manager::GrpcServiceManager; @@ -245,7 +245,6 @@ where causal_ts_provider: Option>, // used for rawkv apiv2 pd_scheduler: Scheduler, auto_split_controller: AutoSplitController, - region_read_progress: RegionReadProgressRegistry, collector_reg_handle: CollectorRegHandle, grpc_service_manager: GrpcServiceManager, logger: Logger, @@ -255,16 +254,10 @@ where let store_heartbeat_interval = cfg.value().pd_store_heartbeat_tick_interval.0; let mut stats_monitor = PdStatsMonitor::new( store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, - cfg.value().report_min_resolved_ts_interval.0, cfg.value().inspect_interval.0, PdReporter::new(pd_scheduler, logger.clone()), ); - stats_monitor.start( - auto_split_controller, - region_read_progress, - collector_reg_handle, - store_id, - )?; + stats_monitor.start(auto_split_controller, collector_reg_handle)?; let slowness_stats = slowness::SlownessStatistics::new(&cfg.value()); Ok(Self { store_id, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 5b3cc5feb930..88ad9a0e3809 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -515,6 +515,7 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.region_compact_check_interval = ReadableDuration::ZERO; cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.pd_report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; cfg.lock_cf_compact_interval = ReadableDuration::ZERO; cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; @@ -524,7 +525,6 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.merge_check_tick_interval = ReadableDuration::ZERO; cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; cfg.inspect_interval = ReadableDuration::ZERO; - cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 73c0bb760bc5..b09afb3c6e1c 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -142,6 +142,7 @@ pub struct Config { pub region_compact_redundant_rows_percent: Option, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, + pub pd_report_min_resolved_ts_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, pub snap_gc_timeout: ReadableDuration, /// The duration of snapshot waits for region split. It prevents leader from @@ -360,9 +361,6 @@ pub struct Config { // The sensitiveness of slowness on network-io. pub slow_trend_network_io_factor: f64, - // Interval to report min resolved ts, if it is zero, it means disabled. - pub report_min_resolved_ts_interval: ReadableDuration, - /// Interval to check whether to reactivate in-memory pessimistic lock after /// being disabled before transferring leader. pub reactive_memory_lock_tick_interval: ReadableDuration, @@ -445,6 +443,7 @@ impl Default for Config { region_compact_redundant_rows_percent: Some(20), pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), + pd_report_min_resolved_ts_interval: ReadableDuration::secs(1), // Disable periodic full compaction by default. periodic_full_compact_start_times: ReadableSchedule::default(), // If periodic full compaction is enabled, do not start a full compaction @@ -524,7 +523,6 @@ impl Default for Config { slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, slow_trend_network_io_factor: 0.0, - report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), allow_unsafe_vote_after_start: false, @@ -1042,6 +1040,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_store_heartbeat_tick_interval"]) .set(self.pd_store_heartbeat_tick_interval.as_secs_f64()); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["pd_report_min_resolved_ts_interval"]) + .set(self.pd_report_min_resolved_ts_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["snap_mgr_gc_tick_interval"]) .set(self.snap_mgr_gc_tick_interval.as_secs_f64()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index cfc0bb6e6a40..bef74e3ed29e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -789,6 +789,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::LoadMetricsWindow => self.on_load_metrics_window_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), + StoreTick::PdReportMinResolvedTs => self.on_pd_report_min_resolved_ts_tick(), } let elapsed = timer.saturating_elapsed(); self.ctx @@ -883,6 +884,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.register_full_compact_tick(); self.register_load_metrics_window_tick(); self.register_pd_store_heartbeat_tick(); + self.register_pd_report_min_resolved_ts_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); self.register_consistency_check_tick(); @@ -1702,7 +1704,6 @@ impl RaftBatchSystem { &cfg, )?; - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); let mut builder = RaftPollerBuilder { cfg, store: meta, @@ -1739,7 +1740,6 @@ impl RaftBatchSystem { mgr, pd_client, collector_reg_handle, - region_read_progress, health_service, causal_ts_provider, snap_generator_pool, @@ -1758,7 +1758,6 @@ impl RaftBatchSystem { snap_mgr: SnapManager, pd_client: Arc, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 snap_generator_pool: FuturePool, @@ -1850,7 +1849,6 @@ impl RaftBatchSystem { snap_mgr, workers.pd_worker.remote(), collector_reg_handle, - region_read_progress, health_service, coprocessor_host, causal_ts_provider, @@ -2678,6 +2676,25 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn report_min_resolved_ts(&self) { + let read_progress = { + let meta = self.ctx.store_meta.lock().unwrap(); + meta.region_read_progress().clone() + }; + let min_resolved_ts = read_progress.get_min_resolved_ts(); + + let task = PdTask::ReportMinResolvedTs { + store_id: self.fsm.store.id, + min_resolved_ts, + }; + if let Err(e) = self.ctx.pd_scheduler.schedule(task) { + error!("failed to send min resolved ts to pd worker"; + "store_id" => self.fsm.store.id, + "err" => ?e + ); + } + } + fn store_heartbeat_pd(&mut self, report: Option) { let mut stats = StoreStats::default(); @@ -2784,6 +2801,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.register_pd_store_heartbeat_tick(); } + fn on_pd_report_min_resolved_ts_tick(&mut self) { + self.report_min_resolved_ts(); + self.register_pd_report_min_resolved_ts_tick(); + } + fn on_snap_mgr_gc(&mut self) { // refresh multi_snapshot_files enable flag self.ctx.snap_mgr.set_enable_multi_snapshot_files( @@ -2888,6 +2910,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER ); } + fn register_pd_report_min_resolved_ts_tick(&self) { + self.ctx.schedule_store_tick( + StoreTick::PdReportMinResolvedTs, + self.ctx.cfg.pd_report_min_resolved_ts_interval.0, + ); + } + fn register_snap_mgr_gc_tick(&self) { self.ctx .schedule_store_tick(StoreTick::SnapGc, self.ctx.cfg.snap_mgr_gc_tick_interval.0) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index d8282cfa486b..908b650469cc 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -217,6 +217,7 @@ make_static_metric! { periodic_full_compact, load_metrics_window, pd_store_heartbeat, + pd_report_min_resolved_ts, snap_gc, compact_lock_cf, consistency_check, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index bf3a09986470..52aed7d424f8 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -442,6 +442,7 @@ pub enum StoreTick { CompactLockCf, ConsistencyCheck, CleanupImportSst, + PdReportMinResolvedTs, } impl StoreTick { @@ -456,6 +457,7 @@ impl StoreTick { StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, StoreTick::LoadMetricsWindow => RaftEventDurationType::load_metrics_window, + StoreTick::PdReportMinResolvedTs => RaftEventDurationType::pd_report_min_resolved_ts, } } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index b73198ecc75c..b5bb189d84b6 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -68,7 +68,7 @@ use crate::{ AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, - RegionReadProgressRegistry, SnapManager, StoreInfo, StoreMsg, TxnExt, + SnapManager, StoreInfo, StoreMsg, TxnExt, }, }; @@ -450,16 +450,6 @@ fn default_collect_tick_interval() -> Duration { DEFAULT_COLLECT_TICK_INTERVAL } -fn config(interval: Duration) -> Duration { - fail_point!("mock_min_resolved_ts_interval", |_| { - Duration::from_millis(50) - }); - fail_point!("mock_min_resolved_ts_interval_disable", |_| { - Duration::from_millis(0) - }); - interval -} - #[inline] fn convert_record_pairs(m: HashMap) -> RecordPairVec { m.into_iter() @@ -562,7 +552,6 @@ where collect_store_infos_interval: Duration, load_base_split_check_interval: Duration, collect_tick_interval: Duration, - report_min_resolved_ts_interval: Duration, inspect_latency_interval: Duration, } @@ -570,12 +559,7 @@ impl StatsMonitor where T: StoreStatsReporter, { - pub fn new( - interval: Duration, - report_min_resolved_ts_interval: Duration, - inspect_latency_interval: Duration, - reporter: T, - ) -> Self { + pub fn new(interval: Duration, inspect_latency_interval: Duration, reporter: T) -> Self { StatsMonitor { reporter, handle: None, @@ -587,7 +571,6 @@ where DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, interval, ), - report_min_resolved_ts_interval: config(report_min_resolved_ts_interval), // Use `inspect_latency_interval` as the minimal limitation for collecting tick. collect_tick_interval: cmp::min( inspect_latency_interval, @@ -602,9 +585,7 @@ where pub fn start( &mut self, mut auto_split_controller: AutoSplitController, - region_read_progress: RegionReadProgressRegistry, collector_reg_handle: CollectorRegHandle, - store_id: u64, ) -> Result<(), io::Error> { if self.collect_tick_interval < cmp::min( @@ -625,9 +606,6 @@ where let load_base_split_check_interval = self .load_base_split_check_interval .div_duration_f64(tick_interval) as u64; - let report_min_resolved_ts_interval = self - .report_min_resolved_ts_interval - .div_duration_f64(tick_interval) as u64; let update_latency_stats_interval = self .inspect_latency_interval .div_duration_f64(tick_interval) as u64; @@ -686,12 +664,6 @@ where &mut region_cpu_records_collector, ); } - if is_enable_tick(timer_cnt, report_min_resolved_ts_interval) { - reporter.report_min_resolved_ts( - store_id, - region_read_progress.get_min_resolved_ts(), - ); - } if is_enable_tick(timer_cnt, update_latency_stats_interval) { reporter.update_latency_stats(timer_cnt); } @@ -1050,7 +1022,6 @@ where snap_mgr: SnapManager, remote: Remote, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -1060,16 +1031,10 @@ where let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; let mut stats_monitor = StatsMonitor::new( interval, - cfg.report_min_resolved_ts_interval.0, cfg.inspect_interval.0, WrappedScheduler(scheduler.clone()), ); - if let Err(e) = stats_monitor.start( - auto_split_controller, - region_read_progress, - collector_reg_handle, - store_id, - ) { + if let Err(e) = stats_monitor.start(auto_split_controller, collector_reg_handle) { error!("failed to start stats collector, error = {:?}", e); } @@ -2686,8 +2651,6 @@ mod tests { use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use tikv_util::worker::LazyWorker; - use crate::store::fsm::StoreMeta; - struct RunnerTest { store_stat: Arc>, stats_monitor: StatsMonitor>, @@ -2701,17 +2664,12 @@ mod tests { ) -> RunnerTest { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), - Duration::from_secs(0), Duration::from_secs(interval), WrappedScheduler(scheduler), ); - let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); if let Err(e) = stats_monitor.start( AutoSplitController::default(), - region_read_progress, CollectorRegHandle::new_for_test(), - 1, ) { error!("failed to start stats collector, error = {:?}", e); } diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index 0c594ab1d1dd..64b58e0ed22a 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -63,15 +63,14 @@ fn test_report_min_resolved_ts() { fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); fail::cfg("mock_min_resolved_ts_interval", "return(0)").unwrap(); let mut suite = TestSuite::new(1); - // default config is 1s assert_eq!( suite .cluster .cfg .tikv .raft_store - .report_min_resolved_ts_interval, - ReadableDuration::secs(1) + .pd_report_min_resolved_ts_interval, + ReadableDuration::millis(50) ); let region = suite.cluster.get_region(&[]); let ts1 = suite.cluster.pd_client.get_min_resolved_ts(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 5eb7d97796eb..0bb948f13c9b 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -19,6 +19,7 @@ use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT, CF_WRITE, }; +use fail::fail_point; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; use grpcio::{ChannelBuilder, Environment}; @@ -173,9 +174,20 @@ pub fn new_tikv_config_with_api_ver(cluster_id: u64, api_ver: ApiVersion) -> Tik let mut cfg = TEST_CONFIG.clone(); cfg.server.cluster_id = cluster_id; cfg.storage.set_api_version(api_ver); + cfg.raft_store.pd_report_min_resolved_ts_interval = config(ReadableDuration::secs(1)); cfg } +fn config(interval: ReadableDuration) -> ReadableDuration { + fail_point!("mock_min_resolved_ts_interval", |_| { + ReadableDuration::millis(50) + }); + fail_point!("mock_min_resolved_ts_interval_disable", |_| { + ReadableDuration::millis(0) + }); + interval +} + // Create a base request. pub fn new_base_request(region_id: u64, epoch: RegionEpoch, read_quorum: bool) -> RaftCmdRequest { let mut req = RaftCmdRequest::default(); diff --git a/etc/config-template.toml b/etc/config-template.toml index 9e253e8ff79d..75c7eab0c109 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -394,6 +394,9 @@ ## Store heartbeat tick interval for reporting to PD. # pd-store-heartbeat-tick-interval = "10s" +## Store min resolved ts tick interval for reporting to PD. +# pd-report-min-resolved-ts-interval = "1s" + ## The threshold of triggering Region split check. ## When Region size change exceeds this config, TiKV will check whether the Region should be split ## or not. To reduce the cost of scanning data in the checking process, you can set the value to diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 4bb75e0a95f5..d49f5e50c0d0 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -200,6 +200,7 @@ fn test_serde_custom_tikv_config() { region_compact_redundant_rows_percent: Some(33), pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), + pd_report_min_resolved_ts_interval: ReadableDuration::millis(233), notify_capacity: 12_345, snap_mgr_gc_tick_interval: ReadableDuration::minutes(12), snap_gc_timeout: ReadableDuration::hours(12), @@ -247,7 +248,6 @@ fn test_serde_custom_tikv_config() { io_reschedule_concurrent_max_count: 1234, io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), - report_min_resolved_ts_interval: ReadableDuration::millis(233), check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 94184def8fbf..30a501b1cee5 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -173,6 +173,7 @@ region-compact-min-redundant-rows = 999 region-compact-redundant-rows-percent = 33 pd-heartbeat-tick-interval = "12m" pd-store-heartbeat-tick-interval = "12s" +pd-report-min-resolved-ts-interval = "233ms" snap-mgr-gc-tick-interval = "12m" snap-gc-timeout = "12h" snap-wait-split-duration = "12h" @@ -228,7 +229,6 @@ reactive-memory-lock-tick-interval = "566ms" reactive-memory-lock-timeout-tick = 8 check-long-uncommitted-interval = "1s" long-uncommitted-base-threshold = "1s" -report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" unreachable-backoff = "111s" From 04b857fa7fa6dc153fd2b82aac7009b9c4eb6aa5 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 24 Nov 2023 18:04:16 +0800 Subject: [PATCH 158/203] test: enable fail-point cases in test_kv_service for raftstore-v2 (#15479) ref tikv/tikv#15409 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- src/server/raftkv2/mod.rs | 16 ++++++++++++++-- tests/failpoints/cases/test_kv_service.rs | 23 +++++++++++++++-------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ca813e39887..2ebbbef2f8a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" [[package]] name = "afl" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330d7251127b228cb4187ac2373dc37f615d65199f93b5443edeeed839fff5df" +checksum = "8c80b57a86234ee3e9238f5f2d33d37f8fd5c7ff168c07f2d5147d410e86db33" dependencies = [ "home", "libc 0.2.146", @@ -3370,9 +3370,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -3380,9 +3380,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" dependencies = [ "autocfg", "num-integer", @@ -3402,9 +3402,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index a9f7eb7586e5..321a6614350e 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -202,9 +202,21 @@ impl tikv_kv::Engine for RaftKv2 { let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); cmd.set_requests(vec![req].into()); - let f = self.router.snapshot(cmd); + let res: tikv_kv::Result<()> = (|| { + fail_point!("raftkv_async_snapshot_err", |_| { + Err(box_err!("injected error for async_snapshot")) + }); + Ok(()) + })(); + let f = if res.is_err() { + None + } else { + Some(self.router.snapshot(cmd)) + }; + async move { - let res = f.await; + res?; + let res = f.unwrap().await; match res { Ok(snap) => { let elapse = begin_instant.saturating_elapsed_secs(); diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index 2ec1109edd4d..c8777282787f 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -9,14 +9,16 @@ use kvproto::{ }; use test_raftstore::{ configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_prewrite, - must_kv_prewrite_with, must_new_cluster_and_kv_client, must_new_cluster_mul, - new_server_cluster, try_kv_prewrite_with, try_kv_prewrite_with_impl, + must_kv_prewrite_with, must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with, + try_kv_prewrite_with_impl, }; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, HandyRwLock}; -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_batch_get_memory_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut req = BatchGetRequest::default(); req.set_context(ctx); @@ -32,9 +34,10 @@ fn test_batch_get_memory_lock() { fail::remove("raftkv_async_snapshot_err"); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_kv_scan_memory_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut req = ScanRequest::default(); req.set_context(ctx); @@ -50,9 +53,10 @@ fn test_kv_scan_memory_lock() { fail::remove("raftkv_async_snapshot_err"); } -#[test] +#[test_case(test_raftstore::must_new_cluster_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_mul)] fn test_snapshot_not_block_grpc() { - let (cluster, leader, ctx) = must_new_cluster_mul(1); + let (cluster, leader, ctx) = new_cluster(1); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env) .keepalive_time(Duration::from_millis(500)) @@ -77,6 +81,8 @@ fn test_snapshot_not_block_grpc() { fail::remove("after-snapshot"); } +// the result notify mechanism is different in raft-v2, so no need to add a +// equivalent case for v2. #[test] fn test_undetermined_write_err() { let (cluster, leader, ctx) = must_new_cluster_mul(1); @@ -109,6 +115,7 @@ fn test_undetermined_write_err() { // The previous panic hasn't been captured. assert!(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| drop(cluster))).is_err()); } + #[test] fn test_stale_read_on_local_leader() { let mut cluster = new_server_cluster(0, 1); From 3f7c63646ef5ea842f7ce6552826976feda2f609 Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 24 Nov 2023 18:40:14 +0800 Subject: [PATCH 159/203] ctl: backoff load key range in finish flashback when meet `notLeader` or `regionNotFound` (#16058) close tikv/tikv#15712 Root: After `PrepareFlashback` the region and the region leader transfer, when executing `FinishFlashback` will meet `notLeader`. Since the tikv ctl retry `FinishFlashback` for the same peer, it just keeps doing useless retries. Solution: neet to support backoff load key range to identify peer in finish flashback when meet `notLeader` or `regionNotFound` Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 10 +++++----- cmd/tikv-ctl/src/main.rs | 18 ++++++++++++++++-- src/server/debug.rs | 16 ++++++++++------ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index a20d6ce26026..3e4e505a32aa 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -715,7 +715,7 @@ pub trait DebugExecutor { _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange>; + ) -> Result<(), (KeyRange, grpcio::Error)>; fn get_region_read_progress(&self, region_id: u64, log: bool, min_start_ts: u64); } @@ -948,7 +948,7 @@ impl DebugExecutor for DebugClient { key_range: KeyRange, start_ts: u64, commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { let mut req = FlashbackToVersionRequest::default(); req.set_version(version); req.set_region_id(region_id); @@ -963,7 +963,7 @@ impl DebugExecutor for DebugClient { "flashback key_range {:?} with start_ts {:?}, commit_ts {:?} need to retry, err is {:?}", key_range, start_ts, commit_ts, err ); - Err(key_range) + Err((key_range, err)) } } } @@ -1293,7 +1293,7 @@ where _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { unimplemented!("only available for remote mode"); } @@ -1515,7 +1515,7 @@ impl DebugExecutor for DebuggerImplV2 { _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { unimplemented!("only available for remote mode"); } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index a3961bbc928f..b57a99f8345f 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -912,7 +912,7 @@ fn flashback_whole_cluster( .await { Ok(res) => { - if let Err(key_range) = res { + if let Err((key_range, _)) = res { // Retry specific key range to prepare flashback. let stale_key_range = (key_range.start_key.clone(), key_range.end_key.clone()); let mut key_range_to_prepare = key_range_to_prepare.write().unwrap(); @@ -992,7 +992,21 @@ fn flashback_whole_cluster( { Ok(res) => match res { Ok(_) => break, - Err(_) => { + Err((key_range, err)) => { + // Retry `NotLeader` or `RegionNotFound`. + if err.to_string().contains("not leader") || err.to_string().contains("not found") { + // When finished `PrepareFlashback`, the region may change leader in the `flashback in progress` + // Neet to retry specific key range to finish flashback. + let stale_key_range = (key_range.start_key.clone(), key_range.end_key.clone()); + let mut key_range_to_finish = key_range_to_finish.write().unwrap(); + // Remove stale key range. + key_range_to_finish.remove(&stale_key_range); + load_key_range(&pd_client, stale_key_range.0.clone(), stale_key_range.1.clone()) + .into_iter().for_each(|(key_range, region_info)| { + // Need to update `key_range_to_finish` to replace stale key range. + key_range_to_finish.insert(key_range, region_info); + }); + } thread::sleep(Duration::from_micros(WAIT_APPLY_FLASHBACK_STATE)); continue; } diff --git a/src/server/debug.rs b/src/server/debug.rs index 9e01852455cc..70e1df855d51 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -1111,9 +1111,11 @@ async fn async_key_range_flashback_to_version ?resp.get_error(), "region_err" => ?resp.get_region_error()); - return Err(Error::FlashbackFailed( - "exec prepare flashback failed.".into(), - )); + return Err(Error::FlashbackFailed(format!( + "exec prepare flashback failed: resp err is: {:?}, region err is: {:?}", + resp.get_error(), + resp.get_region_error() + ))); } } else { let mut req = kvrpcpb::FlashbackToVersionRequest::new(); @@ -1127,9 +1129,11 @@ async fn async_key_range_flashback_to_version ?resp.get_error(), "region_err" => ?resp.get_region_error()); - return Err(Error::FlashbackFailed( - "exec finish flashback failed.".into(), - )); + return Err(Error::FlashbackFailed(format!( + "exec finish flashback failed: resp err is: {:?}, region err is: {:?}", + resp.get_error(), + resp.get_region_error() + ))); } } Ok(()) From 53e05485b0030f2b26905d4c83a3463be561ff49 Mon Sep 17 00:00:00 2001 From: tongjian Date: Mon, 27 Nov 2023 10:16:13 +0800 Subject: [PATCH 160/203] server: grpc metrics consider request group priority (#15911) ref tikv/tikv#15803 Signed-off-by: bufferflies <1045931706@qq.com> Signed-off-by: GitHub Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.json | 30 +++++++++++++ src/server/metrics.rs | 26 ++++++++++- src/server/service/batch.rs | 44 +++++++++++++++--- src/server/service/kv.rs | 65 +++++++++++++++++++++++---- src/storage/mod.rs | 75 ++++++++++++++++++++++--------- 5 files changed, 201 insertions(+), 39 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b5d45d2fea0a..f73a59cf3779 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4990,6 +4990,16 @@ "metric": "tikv_grpc_msg_duration_seconds_bucket", "refId": "A", "step": 10 + }, + { + "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type,priority)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], @@ -5200,6 +5210,16 @@ "legendFormat": "{{type}}", "refId": "A", "step": 10 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type,priority))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], @@ -5305,6 +5325,16 @@ "legendFormat": "{{type}}", "refId": "A", "step": 10 + }, + { + "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 122748cdfa9a..cef725c3f28b 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -99,6 +99,13 @@ make_auto_flush_static_metric! { fail, } + pub label_enum ResourcePriority { + high, + medium, + low, + unknown, + } + pub struct GcCommandCounterVec: LocalIntCounter { "type" => GcCommandKind, } @@ -134,6 +141,7 @@ make_auto_flush_static_metric! { pub struct GrpcMsgHistogramVec: LocalHistogram { "type" => GrpcTypeKind, + "priority" => ResourcePriority, } pub struct ReplicaReadLockCheckHistogramVec: LocalHistogram { @@ -234,7 +242,7 @@ lazy_static! { pub static ref GRPC_MSG_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( "tikv_grpc_msg_duration_seconds", "Bucketed histogram of grpc server messages", - &["type"], + &["type","priority"], exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s ) .unwrap(); @@ -600,3 +608,19 @@ pub fn record_request_source_metrics(source: String, duration: Duration) { } }); } + +impl From for ResourcePriority { + fn from(priority: u64) -> Self { + // the mapping definition of priority in TIDB repo, + // see: https://github.com/tikv/tikv/blob/a0dbe2d0b893489015fc99ae73c6646f7989fe32/components/resource_control/src/resource_group.rs#L79-L89 + if priority == 0 { + Self::unknown + } else if priority < 6 { + Self::low + } else if priority < 11 { + Self::medium + } else { + Self::high + } + } +} diff --git a/src/server/service/batch.rs b/src/server/service/batch.rs index ba377bed4d2d..3cc9a45e9dca 100644 --- a/src/server/service/batch.rs +++ b/src/server/service/batch.rs @@ -12,7 +12,7 @@ use tracker::{with_tls_tracker, RequestInfo, RequestType, Tracker, TrackerToken, use crate::{ server::{ - metrics::{GrpcTypeKind, REQUEST_BATCH_SIZE_HISTOGRAM_VEC}, + metrics::{GrpcTypeKind, ResourcePriority, REQUEST_BATCH_SIZE_HISTOGRAM_VEC}, service::kv::{batch_commands_response, GrpcRequestDuration, MeasuredSingleResponse}, }, storage::{ @@ -162,6 +162,7 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse res: Result<(Option>, Statistics)>, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ) { let mut resp = GetResponse::default(); if let Some(err) = extract_region_error(&res) { @@ -185,9 +186,13 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse cmd: Some(batch_commands_response::response::Cmd::Get(resp)), ..Default::default() }; - let mesure = - GrpcRequestDuration::new(begin, GrpcTypeKind::kv_batch_get_command, request_source); - let task = MeasuredSingleResponse::new(id, res, mesure); + let measure = GrpcRequestDuration::new( + begin, + GrpcTypeKind::kv_batch_get_command, + request_source, + resource_priority, + ); + let task = MeasuredSingleResponse::new(id, res, measure); if self.tx.send_with(task, WakePolicy::Immediately).is_err() { error!("KvService response batch commands fail"); } @@ -201,6 +206,7 @@ impl ResponseBatchConsumer>> for GetCommandResponseConsumer { res: Result>>, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ) { let mut resp = RawGetResponse::default(); if let Some(err) = extract_region_error(&res) { @@ -216,9 +222,13 @@ impl ResponseBatchConsumer>> for GetCommandResponseConsumer { cmd: Some(batch_commands_response::response::Cmd::RawGet(resp)), ..Default::default() }; - let mesure = - GrpcRequestDuration::new(begin, GrpcTypeKind::raw_batch_get_command, request_source); - let task = MeasuredSingleResponse::new(id, res, mesure); + let measure = GrpcRequestDuration::new( + begin, + GrpcTypeKind::raw_batch_get_command, + request_source, + resource_priority, + ); + let task = MeasuredSingleResponse::new(id, res, measure); if self.tx.send_with(task, WakePolicy::Immediately).is_err() { error!("KvService response batch commands fail"); } @@ -241,6 +251,15 @@ fn future_batch_get_command( .zip(gets.iter()) .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) .collect(); + + let group_priority = gets + .first() + .unwrap() + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); + let res = storage.batch_get_command( gets, requests, @@ -266,6 +285,7 @@ fn future_batch_get_command( begin_instant, GrpcTypeKind::kv_batch_get_command, source, + resource_priority, ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_with(task, WakePolicy::Immediately).is_err() { @@ -292,6 +312,15 @@ fn future_batch_raw_get_command( .zip(gets.iter()) .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) .collect(); + + let group_priority = gets + .first() + .unwrap() + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); + let res = storage.raw_batch_get_command( gets, requests, @@ -312,6 +341,7 @@ fn future_batch_raw_get_command( begin_instant, GrpcTypeKind::raw_batch_get_command, source, + resource_priority, ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_with(task, WakePolicy::Immediately).is_err() { diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8426143d502f..01aae59fe183 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -198,8 +198,10 @@ macro_rules! handle_request { let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority= ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -212,6 +214,7 @@ macro_rules! handle_request { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .$fn_name + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -430,6 +433,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .kv_prepare_flashback_to_version + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -461,6 +465,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .kv_flashback_to_version + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -480,9 +485,13 @@ impl Tikv for Service { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } + GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); @@ -495,6 +504,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .coprocessor + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -518,8 +528,11 @@ impl Tikv for Service { ) { let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -533,6 +546,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .raw_coprocessor + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -584,6 +598,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .unsafe_destroy_range + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -607,8 +622,11 @@ impl Tikv for Service { ) { let begin_instant = Instant::now(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -628,6 +646,7 @@ impl Tikv for Service { Ok(_) => { GRPC_MSG_HISTOGRAM_STATIC .coprocessor_stream + .get(resource_group_priority) .observe(begin_instant.saturating_elapsed().as_secs_f64()); let _ = sink.close().await; } @@ -865,6 +884,7 @@ impl Tikv for Service { } GRPC_MSG_HISTOGRAM_STATIC .split_region + .unknown .observe(begin_instant.saturating_elapsed().as_secs_f64()); sink.success(resp).await?; ServerResult::Ok(()) @@ -1017,6 +1037,7 @@ impl Tikv for Service { let regions = resp.await?; GRPC_MSG_HISTOGRAM_STATIC .check_leader + .unknown .observe(begin_instant.saturating_elapsed().as_secs_f64()); let mut resp = CheckLeaderResponse::default(); resp.set_ts(ts); @@ -1029,6 +1050,11 @@ impl Tikv for Service { } return Err(Error::from(e)); } + let elapsed = begin_instant.saturating_elapsed(); + GRPC_MSG_HISTOGRAM_STATIC + .check_leader + .unknown + .observe(elapsed.as_secs_f64()); ServerResult::Ok(()) } .map_err(move |e| { @@ -1098,6 +1124,7 @@ fn response_batch_commands_request( begin: Instant, label: GrpcTypeKind, source: String, + resource_priority: ResourcePriority, ) where MemoryTraceGuard: From, F: Future> + Send + 'static, @@ -1108,6 +1135,7 @@ fn response_batch_commands_request( begin, label, source, + resource_priority, }; let task = MeasuredSingleResponse::new(id, resp, measure); if let Err(e) = tx.send_with(task, WakePolicy::Immediately) { @@ -1146,15 +1174,18 @@ fn handle_batch_commands_request( // For some invalid requests. let begin_instant = Instant::now(); let resp = future::ok(batch_commands_response::Response::default()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default(), ResourcePriority::unknown); }, Some(batch_commands_request::request::Cmd::Get(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } + GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) @@ -1166,13 +1197,15 @@ fn handle_batch_commands_request( let resp = future_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::Get)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.kv_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source,resource_group_priority); } }, Some(batch_commands_request::request::Cmd::RawGet(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1187,17 +1220,19 @@ fn handle_batch_commands_request( let resp = future_raw_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::RawGet)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.raw_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source,resource_group_priority); } }, Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority ); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); let resp = future_copr(copr, Some(peer.to_string()), req) @@ -1205,7 +1240,7 @@ fn handle_batch_commands_request( resp.map(oneof!(batch_commands_response::response::Cmd::Coprocessor)) }) .map_err(|_| GRPC_MSG_FAIL_COUNTER.coprocessor.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor, source,resource_group_priority); }, Some(batch_commands_request::request::Cmd::Empty(req)) => { let begin_instant = Instant::now(); @@ -1222,12 +1257,15 @@ fn handle_batch_commands_request( begin_instant, GrpcTypeKind::invalid, String::default(), + ResourcePriority::unknown, ); } $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1237,7 +1275,7 @@ fn handle_batch_commands_request( let resp = $future_fn($($arg,)* req) .map_ok(oneof!(batch_commands_response::response::Cmd::$cmd)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.$metric_name.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source,resource_group_priority); })* Some(batch_commands_request::request::Cmd::Import(_)) => unimplemented!(), } @@ -1287,10 +1325,12 @@ fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { label, begin, source, + resource_priority, } = measure; let elapsed = now.saturating_duration_since(begin); GRPC_MSG_HISTOGRAM_STATIC .get(label) + .get(resource_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); let exec_details = resp.cmd.as_mut().and_then(|cmd| match cmd { @@ -2233,13 +2273,20 @@ pub struct GrpcRequestDuration { pub begin: Instant, pub label: GrpcTypeKind, pub source: String, + pub resource_priority: ResourcePriority, } impl GrpcRequestDuration { - pub fn new(begin: Instant, label: GrpcTypeKind, source: String) -> Self { + pub fn new( + begin: Instant, + label: GrpcTypeKind, + source: String, + resource_priority: ResourcePriority, + ) -> Self { GrpcRequestDuration { begin, label, source, + resource_priority, } } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c89a767a80ba..2bdc07625ee0 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -121,7 +121,7 @@ pub use self::{ use self::{kv::SnapContext, test_util::latest_feature_gate}; use crate::{ read_pool::{ReadPool, ReadPoolHandle}, - server::lock_manager::waiter_manager, + server::{lock_manager::waiter_manager, metrics::ResourcePriority}, storage::{ config::Config, kv::{with_tls_engine, Modify, WriteData}, @@ -776,17 +776,20 @@ impl Storage { let priority = requests[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(requests[0].get_context().get_resource_control_context()); + let resource_group_name = requests[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(); + let group_priority = requests[0] + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { r.get_resource_limiter( - requests[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name(), + resource_group_name, requests[0].get_context().get_request_source(), - requests[0] - .get_context() - .get_resource_control_context() - .get_override_priority(), + group_priority, ) }); let concurrency_manager = self.concurrency_manager.clone(); @@ -862,7 +865,7 @@ impl Storage { snap_ctx } Err(e) => { - consumer.consume(id, Err(e), begin_instant, source); + consumer.consume(id, Err(e), begin_instant, source, resource_priority); continue; } }; @@ -901,7 +904,13 @@ impl Storage { ) = req_snap; let snap_res = snap.await; if let Err(e) = deadline.check() { - consumer.consume(id, Err(Error::from(e)), begin_instant, source); + consumer.consume( + id, + Err(Error::from(e)), + begin_instant, + source, + resource_priority, + ); continue; } @@ -933,6 +942,7 @@ impl Storage { .map(|v| (v, stat)), begin_instant, source, + resource_priority, ); } Err(e) => { @@ -941,12 +951,13 @@ impl Storage { Err(Error::from(txn::Error::from(e))), begin_instant, source, + resource_priority, ); } } }), Err(e) => { - consumer.consume(id, Err(e), begin_instant, source); + consumer.consume(id, Err(e), begin_instant, source, resource_priority); } } } @@ -1756,17 +1767,20 @@ impl Storage { // all requests in a batch have the same region, epoch, term, replica_read let priority = gets[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(gets[0].get_context().get_resource_control_context()); + let resource_group_name = gets[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(); + let group_priority = gets[0] + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { r.get_resource_limiter( - gets[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name(), + resource_group_name, gets[0].get_context().get_request_source(), - gets[0] - .get_context() - .get_resource_control_context() - .get_override_priority(), + group_priority, ) }); let priority_tag = get_priority_tag(priority); @@ -1848,6 +1862,7 @@ impl Storage { .map_err(Error::from), begin_instant, ctx.take_request_source(), + resource_priority, ); tls_collect_read_flow( ctx.get_region_id(), @@ -1863,12 +1878,19 @@ impl Storage { Err(e), begin_instant, ctx.take_request_source(), + resource_priority, ); } } } Err(e) => { - consumer.consume(id, Err(e), begin_instant, ctx.take_request_source()); + consumer.consume( + id, + Err(e), + begin_instant, + ctx.take_request_source(), + resource_priority, + ); } } } @@ -3445,6 +3467,7 @@ pub trait ResponseBatchConsumer: Send { res: Result, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ); } @@ -3745,6 +3768,7 @@ pub mod test_util { res: Result<(Option>, Statistics)>, _: Instant, _source: String, + _resource_priority: ResourcePriority, ) { self.data.lock().unwrap().push(GetResult { id, @@ -3754,7 +3778,14 @@ pub mod test_util { } impl ResponseBatchConsumer>> for GetConsumer { - fn consume(&self, id: u64, res: Result>>, _: Instant, _source: String) { + fn consume( + &self, + id: u64, + res: Result>>, + _: Instant, + _source: String, + _resource_priority: ResourcePriority, + ) { self.data.lock().unwrap().push(GetResult { id, res }); } } From 3b30d692c5ed0c5bdd8922a4129a715300182ed1 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 27 Nov 2023 12:08:43 +0800 Subject: [PATCH 161/203] cdc: limit pending scan tasks (#16048) close tikv/tikv#16035 When TiCDC starts changefeed, it may send numerous requests leading to the creation of numerous scan tasks. However, the initial surge of scan tasks may cause OOM. This commit aims to resolve the issue by implementing a mechanism that allows TiKV to reject requests when the number of pending tasks reaches a certain limit. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/endpoint.rs | 155 +++++++++++++++++++-- src/config/mod.rs | 24 ++++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 5 +- 4 files changed, 170 insertions(+), 15 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e62650c77c6d..e1a985d4e981 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -5,7 +5,10 @@ use std::{ cmp::{Ord, Ordering as CmpOrdering, PartialOrd, Reverse}, collections::BinaryHeap, fmt, - sync::{Arc, Mutex as StdMutex}, + sync::{ + atomic::{AtomicIsize, Ordering}, + Arc, Mutex as StdMutex, + }, time::Duration, }; @@ -382,6 +385,8 @@ pub struct Endpoint { // Incremental scan workers: Runtime, + // The total number of scan tasks including running and pending. + scan_task_counter: Arc, scan_concurrency_semaphore: Arc, scan_speed_limiter: Limiter, fetch_speed_limiter: Limiter, @@ -475,6 +480,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint self.config.incremental_scan_concurrency_limit as isize { + debug!("cdc rejects registration, too many scan tasks"; + "region_id" => region_id, + "conn_id" => ?conn_id, + "req_id" => request_id, + "scan_task_count" => scan_task_count, + "incremental_scan_concurrency_limit" => self.config.incremental_scan_concurrency_limit, + ); + // To avoid OOM (e.g., https://github.com/tikv/tikv/issues/16035), + // TiKV needs to reject and return error immediately. + // + // TODO: TiKV is supposed to return a "busy" error, but for the sake + // of compatibility, it returns a "region not found" error. + let _ = downstream.sink_region_not_found(region_id); + return; + } + let txn_extra_op = match self.store_meta.lock().unwrap().reader(region_id) { Some(reader) => reader.txn_extra_op.clone(), None => { @@ -842,6 +870,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint Task { + fn set_conn_version_task(conn_id: ConnId, version: semver::Version) -> Task { Task::SetConnVersion { conn_id, version, @@ -1541,7 +1570,7 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id, FeatureGate::batch_resolved_ts(), )); @@ -1828,7 +1857,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(0, 0, 0), + )); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -1880,7 +1912,7 @@ mod tests { // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2016,6 +2048,97 @@ mod tests { } } + #[test] + fn test_too_many_scan_tasks() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + incremental_scan_concurrency: 1, + incremental_scan_concurrency_limit: 1, + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); + + // Pause scan task runtime. + suite.endpoint.workers = Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + let (pause_tx, pause_rx) = std::sync::mpsc::channel::<()>(); + suite.endpoint.workers.spawn(async move { + let _ = pause_rx.recv(); + }); + + suite.add_region(1, 100); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); + let (tx, mut rx) = channel::channel(1, quota); + let mut rx = rx.drain(); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_version_task(conn_id, version)); + + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + req.set_region_id(1); + req.set_request_id(1); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.endpoint.capture_regions.len(), 1); + + // Test too many scan tasks error. + req.set_request_id(2); + let downstream = Downstream::new( + "".to_string(), + region_epoch, + 2, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + if let CdcEvent::Event(mut e) = cdc_event.0 { + assert_eq!(e.region_id, 1); + assert_eq!(e.request_id, 2); + let event = e.event.take().unwrap(); + match event { + Event_oneof_event::Error(err) => { + assert!(err.has_region_not_found()); + } + other => panic!("unknown event {:?}", other), + } + } else { + panic!("unknown cdc event {:?}", cdc_event); + } + + drop(pause_tx); + } + #[test] fn test_raw_causal_min_ts() { let sleep_interval = Duration::from_secs(1); @@ -2062,7 +2185,7 @@ mod tests { // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2151,7 +2274,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(4, 0, 5))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(4, 0, 5), + )); req.set_region_id(3); req.set_request_id(3); @@ -2222,7 +2348,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(0, 0, 0), + )); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2375,7 +2504,7 @@ mod tests { let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); for region_id in region_ids { suite.add_region(region_id, 100); @@ -2488,7 +2617,7 @@ mod tests { let conn_a = Conn::new(tx1, String::new()); let conn_id_a = conn_a.get_id(); suite.run(Task::OpenConn { conn: conn_a }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id_a, semver::Version::new(0, 0, 0), )); @@ -2499,7 +2628,7 @@ mod tests { let conn_b = Conn::new(tx2, String::new()); let conn_id_b = conn_b.get_id(); suite.run(Task::OpenConn { conn: conn_b }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id_b, semver::Version::new(0, 0, 0), )); @@ -2656,7 +2785,7 @@ mod tests { suite.run(Task::OpenConn { conn }); // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2749,7 +2878,7 @@ mod tests { suite.run(Task::OpenConn { conn }); let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); diff --git a/src/config/mod.rs b/src/config/mod.rs index 27f38abee4a7..a862d01ace4f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2956,7 +2956,12 @@ pub struct CdcConfig { // TODO(hi-rustin): Consider resizing the thread pool based on `incremental_scan_threads`. #[online_config(skip)] pub incremental_scan_threads: usize, + // The number of scan tasks that is allowed to run concurrently. pub incremental_scan_concurrency: usize, + // The number of scan tasks that is allowed to be created. In other words, + // there will be at most `incremental_scan_concurrency_limit - incremental_scan_concurrency` + // number of scan tasks that is waitting to run. + pub incremental_scan_concurrency_limit: usize, /// Limit scan speed based on disk I/O traffic. pub incremental_scan_speed_limit: ReadableSize, /// Limit scan speed based on memory accesing traffic. @@ -2999,6 +3004,8 @@ impl Default for CdcConfig { incremental_scan_threads: 4, // At most 6 concurrent running tasks. incremental_scan_concurrency: 6, + // At most 10000 tasks can exist simultaneously. + incremental_scan_concurrency_limit: 10000, // TiCDC requires a SSD, the typical write speed of SSD // is more than 500MB/s, so 128MB/s is enough. incremental_scan_speed_limit: ReadableSize::mb(128), @@ -3040,6 +3047,14 @@ impl CdcConfig { ); self.incremental_scan_concurrency = self.incremental_scan_threads } + if self.incremental_scan_concurrency_limit < self.incremental_scan_concurrency { + warn!( + "cdc.incremental-scan-concurrency-limit must be larger than cdc.incremental-scan-concurrency, + change it to {}", + self.incremental_scan_concurrency + ); + self.incremental_scan_concurrency_limit = self.incremental_scan_concurrency + } if self.incremental_scan_ts_filter_ratio < 0.0 || self.incremental_scan_ts_filter_ratio > 1.0 { @@ -6806,6 +6821,15 @@ mod tests { let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); + let content = r#" + [cdc] + incremental-scan-concurrency = 6 + incremental-scan-concurrency-limit = 0 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert!(cfg.cdc.incremental_scan_concurrency_limit >= cfg.cdc.incremental_scan_concurrency); + let content = r#" [storage] engine = "partitioned-raft-kv" diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d49f5e50c0d0..5e7e4529c405 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -845,6 +845,7 @@ fn test_serde_custom_tikv_config() { hibernate_regions_compatible: false, incremental_scan_threads: 3, incremental_scan_concurrency: 4, + incremental_scan_concurrency_limit: 5, incremental_scan_speed_limit: ReadableSize(7), incremental_fetch_speed_limit: ReadableSize(8), incremental_scan_ts_filter_ratio: 0.7, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 30a501b1cee5..d1e83663c24f 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -16,8 +16,8 @@ max-backups = 2 max-days = 3 [memory] -enable-heap-profiling = false -profiling-sample-per-bytes = "1MB" +enable-heap-profiling = false +profiling-sample-per-bytes = "1MB" [readpool.unified] min-thread-count = 5 @@ -700,6 +700,7 @@ old-value-cache-size = 0 hibernate-regions-compatible = false incremental-scan-threads = 3 incremental-scan-concurrency = 4 +incremental-scan-concurrency-limit = 5 incremental-scan-speed-limit = 7 incremental-fetch-speed-limit = 8 incremental-scan-ts-filter-ratio = 0.7 From 9711e316e3c0581b3703ae2b485ef9abdbbb718f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 27 Nov 2023 14:37:15 +0800 Subject: [PATCH 162/203] raftstore: update apply state even if peer is removed (#16060) close tikv/tikv#16069, close pingcap/tidb#48802 When a peer is removed, it is necessary to update its apply state because this peer may be simultaneously taking a snapshot. An outdated apply state will invalidate the coprocessor cache assumption and potentially lead to a violation of linearizability (returning stale cache). Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/apply.rs | 146 +++++++++++++++++++- 1 file changed, 143 insertions(+), 3 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 339dff68e76e..1639f441e384 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -662,9 +662,7 @@ where results: VecDeque>, ) { if self.host.pre_persist(&delegate.region, true, None) { - if !delegate.pending_remove { - delegate.maybe_write_apply_state(self); - } + delegate.maybe_write_apply_state(self); self.commit_opt(delegate, false); } else { debug!("do not persist when finish_for"; @@ -5532,6 +5530,21 @@ mod tests { ) } + fn cb_conf_change( + idx: u64, + term: u64, + tx: Sender, + ) -> Proposal> { + proposal( + true, + idx, + term, + Callback::write(Box::new(move |resp: WriteResponse| { + tx.send(resp.response).unwrap(); + })), + ) + } + struct EntryBuilder { entry: Entry, req: RaftCmdRequest, @@ -5659,6 +5672,14 @@ mod tests { self } + fn conf_change(mut self, changes: Vec) -> EntryBuilder { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::ChangePeerV2); + req.mut_change_peer_v2().set_changes(changes.into()); + self.req.set_admin_request(req); + self + } + fn build(mut self) -> Entry { self.entry .set_data(self.req.write_to_bytes().unwrap().into()); @@ -7656,6 +7677,125 @@ mod tests { system.shutdown(); } + // When a peer is removed, it is necessary to update its apply state because + // this peer may be simultaneously taking a snapshot. An outdated apply state + // invalidates the coprocessor cache assumption (apply state must match data + // in the snapshot) and potentially lead to a violation of linearizability + // (returning stale cache). + #[test] + fn test_conf_change_remove_node_update_apply_state() { + let (_path, engine) = create_tmp_engine("test-delegate"); + let (_import_dir, importer) = create_tmp_importer("test-delegate"); + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + term: 1, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.set_end_key(b"k5".to_vec()); + reg.region.mut_region_epoch().set_version(3); + let peers = vec![new_peer(2, 3), new_peer(4, 5), new_learner_peer(6, 7)]; + reg.region.set_peers(peers.into()); + let (tx, apply_res_rx) = mpsc::channel(); + let sender = Box::new(TestNotifier { tx }); + let coprocessor_host = CoprocessorHost::::default(); + let (region_scheduler, _) = dummy_scheduler(); + let cfg = Arc::new(VersionTrack::new(Config::default())); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-store".to_owned(), + cfg, + sender, + importer, + region_scheduler, + coprocessor_host, + engine: engine.clone(), + router: router.clone(), + store_id: 2, + pending_create_peers, + }; + system.spawn("test-conf-change".to_owned(), builder); + + router.schedule_task(1, Msg::Registration(reg.dup())); + + let mut index_id = 1; + let epoch = reg.region.get_region_epoch().to_owned(); + + // Write some data. + let (capture_tx, capture_rx) = mpsc::channel(); + let put_entry = EntryBuilder::new(index_id, 1) + .put(b"k1", b"v1") + .epoch(epoch.get_conf_ver(), epoch.get_version()) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![put_entry], + vec![cb(index_id, 1, capture_tx)], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let initial_state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap(); + assert_ne!(initial_state.get_applied_index(), 0); + match apply_res_rx.recv_timeout(Duration::from_secs(3)) { + Ok(PeerMsg::ApplyRes { + res: TaskRes::Apply(apply_res), + }) => assert_eq!(apply_res.apply_state, initial_state), + e => panic!("unexpected result: {:?}", e), + } + index_id += 1; + + // Remove itself. + let (capture_tx, capture_rx) = mpsc::channel(); + let mut remove_node = ChangePeerRequest::default(); + remove_node.set_change_type(ConfChangeType::RemoveNode); + remove_node.set_peer(new_peer(2, 3)); + let conf_change = EntryBuilder::new(index_id, 1) + .conf_change(vec![remove_node]) + .epoch(epoch.get_conf_ver(), epoch.get_version()) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![conf_change], + vec![cb_conf_change(index_id, 1, capture_tx)], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + let apply_state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap(); + match apply_res_rx.recv_timeout(Duration::from_secs(3)) { + Ok(PeerMsg::ApplyRes { + res: TaskRes::Apply(apply_res), + }) => assert_eq!(apply_res.apply_state, apply_state), + e => panic!("unexpected result: {:?}", e), + } + assert!( + apply_state.get_applied_index() > initial_state.get_applied_index(), + "\n{:?}\n{:?}", + apply_state, + initial_state + ); + + system.shutdown(); + } + #[test] fn pending_cmd_leak() { let res = panic_hook::recover_safe(|| { From 88542955b6953815e1f5ca53071f60084f61632d Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 27 Nov 2023 15:52:15 +0800 Subject: [PATCH 163/203] sst_importer: Use generic sst reader for importer (#16059) ref tikv/tikv#15986 use generic sst reader for importer Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 + components/engine_panic/src/sst.rs | 10 +- components/engine_rocks/src/encryption.rs | 13 +- components/engine_rocks/src/lib.rs | 6 +- components/engine_rocks/src/sst.rs | 54 +++--- components/engine_traits/src/sst.rs | 6 +- components/engine_traits_tests/src/sst.rs | 13 +- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/apply.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 4 +- components/raftstore-v2/src/raft/apply.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 4 +- .../raftstore/src/store/compaction_guard.rs | 3 +- components/raftstore/src/store/fsm/apply.rs | 8 +- components/raftstore/src/store/fsm/store.rs | 6 +- .../raftstore/src/store/worker/cleanup.rs | 4 +- .../raftstore/src/store/worker/cleanup_sst.rs | 11 +- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 2 +- components/sst_importer/src/import_file.rs | 34 ++-- components/sst_importer/src/sst_importer.rs | 181 +++++++++++------- components/sst_importer/src/sst_writer.rs | 5 +- components/test_raftstore/src/server.rs | 2 +- components/tikv_kv/Cargo.toml | 1 + components/tikv_kv/src/rocksdb_engine.rs | 5 +- src/import/sst_service.rs | 12 +- src/server/node.rs | 4 +- src/server/raftkv2/node.rs | 4 +- 28 files changed, 229 insertions(+), 182 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ebbbef2f8a2..d191ca34188a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6556,6 +6556,7 @@ version = "0.1.0" dependencies = [ "backtrace", "collections", + "encryption", "engine_panic", "engine_rocks", "engine_test", diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index 119cd5884a33..2e51c0b87b57 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -18,18 +18,18 @@ impl SstExt for PanicEngine { pub struct PanicSstReader; impl SstReader for PanicSstReader { - fn open(path: &str) -> Result { - panic!() - } - fn open_encrypted( + fn open( path: &str, - mgr: Arc, + mgr: Option>, ) -> Result { panic!() } fn verify_checksum(&self) -> Result<()> { panic!() } + fn kv_count_and_size(&self) -> (u64, u64) { + panic!() + } } impl RefIterable for PanicSstReader { diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 4dbe3ab10d28..58d359b39df4 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -2,7 +2,6 @@ use std::{io::Result, sync::Arc}; -use encryption::{self, DataKeyManager}; use engine_traits::{EncryptionKeyManager, EncryptionMethod, FileEncryptionInfo}; use rocksdb::{ DBEncryptionMethod, EncryptionKeyManager as DBEncryptionKeyManager, @@ -12,16 +11,16 @@ use rocksdb::{ use crate::{r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. -pub(crate) fn get_env( +pub(crate) fn get_env( base_env: Option>, - key_manager: Option>, -) -> engine_traits::Result> { - let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); + key_manager: Option>, +) -> engine_traits::Result>> { if let Some(manager) = key_manager { - Ok(Arc::new( + let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); + Ok(Some(Arc::new( Env::new_key_managed_encrypted_env(base_env, WrappedEncryptionKeyManager { manager }) .map_err(r2e)?, - )) + ))) } else { Ok(base_env) } diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index b5561b3de426..3226a4592f03 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -117,10 +117,10 @@ pub use flow_control_factors::*; pub mod raw; -pub fn get_env( - key_manager: Option>, +pub fn get_env( + key_manager: Option>, limiter: Option>, ) -> engine_traits::Result> { let env = encryption::get_env(None /* base_env */, key_manager)?; - file_system::get_env(Some(env), limiter) + file_system::get_env(env, limiter) } diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 145fa9a7bcef..0a234983aa33 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -3,21 +3,18 @@ use std::{path::PathBuf, sync::Arc}; use engine_traits::{ - EncryptionKeyManager, Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, - SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, + SstExt, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, }; use fail::fail_point; -use kvproto::import_sstpb::SstMeta; +use file_system::get_io_rate_limiter; use rocksdb::{ rocksdb::supported_compression, ColumnFamilyOptions, DBCompressionType, DBIterator, Env, EnvOptions, ExternalSstFileInfo as RawExternalSstFileInfo, SequentialFile, SstFileReader, SstFileWriter, DB, }; -use tikv_util::box_err; -use crate::{ - encryption::WrappedEncryptionKeyManager, engine::RocksEngine, options::RocksReadOptions, r2e, -}; +use crate::{engine::RocksEngine, get_env, options::RocksReadOptions, r2e}; impl SstExt for RocksEngine { type SstReader = RocksSstReader; @@ -30,19 +27,6 @@ pub struct RocksSstReader { } impl RocksSstReader { - pub fn sst_meta_info(&self, sst: SstMeta) -> SstMetaInfo { - let mut meta = SstMetaInfo { - total_kvs: 0, - total_bytes: 0, - meta: sst, - }; - self.inner.read_table_properties(|p| { - meta.total_kvs = p.num_entries(); - meta.total_bytes = p.raw_key_size() + p.raw_value_size(); - }); - meta - } - pub fn open_with_env(path: &str, env: Option>) -> Result { let mut cf_options = ColumnFamilyOptions::new(); if let Some(env) = env { @@ -63,20 +47,26 @@ impl RocksSstReader { } impl SstReader for RocksSstReader { - fn open(path: &str) -> Result { - Self::open_with_env(path, None) - } - fn open_encrypted(path: &str, mgr: Arc) -> Result { - let env = Env::new_key_managed_encrypted_env( - Arc::default(), - WrappedEncryptionKeyManager::new(mgr), - ) - .map_err(|err| Error::Other(box_err!("failed to open encrypted env: {}", err)))?; - Self::open_with_env(path, Some(Arc::new(env))) + fn open( + path: &str, + mgr: Option>, + ) -> Result { + let env = get_env(mgr, get_io_rate_limiter())?; + Self::open_with_env(path, Some(env)) } + fn verify_checksum(&self) -> Result<()> { - self.inner.verify_checksum().map_err(r2e)?; - Ok(()) + self.inner.verify_checksum().map_err(r2e) + } + + fn kv_count_and_size(&self) -> (u64, u64) { + let mut count = 0; + let mut bytes = 0; + self.inner.read_table_properties(|p| { + count = p.num_entries(); + bytes = p.raw_key_size() + p.raw_value_size(); + }); + (count, bytes) } } diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index 4a728df1e971..dccd3a2523d9 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -20,10 +20,10 @@ pub trait SstExt: Sized { } /// SstReader is used to read an SST file. -pub trait SstReader: RefIterable + Sized { - fn open(path: &str) -> Result; - fn open_encrypted(path: &str, mgr: Arc) -> Result; +pub trait SstReader: RefIterable + Sized + Send { + fn open(path: &str, mgr: Option>) -> Result; fn verify_checksum(&self) -> Result<()>; + fn kv_count_and_size(&self) -> (u64, u64); } /// SstWriter is used to create sst files that can be added to database later. diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 26ed686aad4b..629c81df5289 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -4,6 +4,7 @@ use std::fs; +use encryption::DataKeyManager; use engine_test::kv::KvTestEngine; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstExt, SstReader, @@ -48,7 +49,7 @@ fn basic() -> Result<()> { sst_writer.put(b"k1", b"v1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -77,7 +78,7 @@ fn forward() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -114,7 +115,7 @@ fn reverse() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_last()?; @@ -152,7 +153,7 @@ fn delete() -> Result<()> { sst_writer.delete(b"k1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -206,7 +207,7 @@ fn same_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -248,7 +249,7 @@ fn reverse_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 73c4461024e1..a637eca704bd 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -112,7 +112,7 @@ pub struct StoreContext { pub snap_mgr: TabletSnapManager, pub global_stat: GlobalStoreStat, pub store_stat: LocalStoreStat, - pub sst_importer: Arc, + pub sst_importer: Arc>, pub key_manager: Option>, /// Inspector for latency inspecting @@ -366,7 +366,7 @@ struct StorePollerBuilder { shutdown: Arc, snap_mgr: TabletSnapManager, global_stat: GlobalStoreStat, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, node_start_time: Timespec, // monotonic_raw_now } @@ -386,7 +386,7 @@ impl StorePollerBuilder { shutdown: Arc, snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, node_start_time: Timespec, // monotonic_raw_now ) -> Self { @@ -694,7 +694,7 @@ impl StoreSystem { collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, resource_ctl: Option>, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index e55c143a33a6..49530fcd6dfd 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -87,7 +87,7 @@ impl ApplyFsm { log_recovery: Option>, applied_term: u64, buckets: Option, - sst_importer: Arc, + sst_importer: Arc>, coprocessor_host: CoprocessorHost, logger: Logger, ) -> (ApplyScheduler, Self) { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 9ccf08d6d549..24d025c0a4d8 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -36,7 +36,7 @@ pub mod test_util { Arc, }; - use engine_traits::{CfName, CF_DEFAULT}; + use engine_traits::{CfName, KvEngine, CF_DEFAULT}; use kvproto::{kvrpcpb::ApiVersion, metapb::RegionEpoch, raft_cmdpb::RaftRequestHeader}; use raft::prelude::{Entry, EntryType}; use raftstore::store::simple_write::SimpleWriteEncoder; @@ -46,7 +46,7 @@ pub mod test_util { use super::{CatchUpLogs, SimpleWriteReqEncoder}; use crate::{fsm::ApplyResReporter, router::ApplyRes}; - pub fn create_tmp_importer() -> (TempDir, Arc) { + pub fn create_tmp_importer() -> (TempDir, Arc>) { let dir = TempDir::new().unwrap(); let importer = Arc::new( SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1, true).unwrap(), diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index f3aa5a541c1a..35959dd8aea4 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -76,7 +76,7 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, - sst_importer: Arc, + sst_importer: Arc>, observe: Observe, coprocessor_host: CoprocessorHost, @@ -102,7 +102,7 @@ impl Apply { log_recovery: Option>, applied_term: u64, buckets: Option, - sst_importer: Arc, + sst_importer: Arc>, coprocessor_host: CoprocessorHost, tablet_scheduler: Scheduler>, high_priority_pool: FuturePool, @@ -335,7 +335,7 @@ impl Apply { } #[inline] - pub fn sst_importer(&self) -> &SstImporter { + pub fn sst_importer(&self) -> &SstImporter { &self.sst_importer } diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 206e87b3a8e7..b2a6d46e39c5 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -235,7 +235,7 @@ impl Task { pub struct Runner { tablet_registry: TabletRegistry, - sst_importer: Arc, + sst_importer: Arc>, snap_mgr: TabletSnapManager, logger: Logger, @@ -252,7 +252,7 @@ pub struct Runner { impl Runner { pub fn new( tablet_registry: TabletRegistry, - sst_importer: Arc, + sst_importer: Arc>, snap_mgr: TabletSnapManager, logger: Logger, ) -> Self { diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 138d730fa29f..f63a257c9f5e 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -269,6 +269,7 @@ mod tests { use std::{path::Path, str}; use collections::HashMap; + use encryption::DataKeyManager; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -541,7 +542,7 @@ mod tests { } fn collect_keys(path: &str) -> Vec> { - let reader = RocksSstReader::open(path).unwrap(); + let reader = RocksSstReader::open::(path, None).unwrap(); let mut sst_reader = reader.iter(IterOptions::default()).unwrap(); let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 1639f441e384..252249b74b29 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -392,7 +392,7 @@ where tag: String, timer: Option, host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler>, router: ApplyRouter, notifier: Box>, @@ -475,7 +475,7 @@ where pub fn new( tag: String, host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler>, engine: EK, router: ApplyRouter, @@ -4659,7 +4659,7 @@ pub struct Builder { tag: String, cfg: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler::Snapshot>>, engine: EK, sender: Box>, @@ -5060,7 +5060,7 @@ mod tests { (path, engine) } - pub fn create_tmp_importer(path: &str) -> (TempDir, Arc) { + pub fn create_tmp_importer(path: &str) -> (TempDir, Arc>) { let dir = Builder::new().prefix(path).tempdir().unwrap(); let importer = Arc::new( SstImporter::new( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index bef74e3ed29e..8c8919df67e5 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -536,7 +536,7 @@ where pub region_scheduler: Scheduler>, pub apply_router: ApplyRouter, pub router: RaftRouter, - pub importer: Arc, + pub importer: Arc>, pub store_meta: Arc>, pub feature_gate: FeatureGate, /// region_id -> (peer_id, is_splitting) @@ -1209,7 +1209,7 @@ pub struct RaftPollerBuilder { pub region_scheduler: Scheduler>, apply_router: ApplyRouter, pub router: RaftRouter, - pub importer: Arc, + pub importer: Arc>, pub store_meta: Arc>, pub pending_create_peers: Arc>>, snap_mgr: SnapManager, @@ -1599,7 +1599,7 @@ impl RaftBatchSystem { pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, background_worker: Worker, auto_split_controller: AutoSplitController, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 726b7abe5ceb..da2f004f47c1 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -33,7 +33,7 @@ where R: RaftEngine, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } @@ -44,7 +44,7 @@ where { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, ) -> Runner { Runner { diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 44f188e6f8fb..ca139a562a26 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -2,6 +2,7 @@ use std::{fmt, sync::Arc}; +use engine_traits::KvEngine; use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; use tikv_util::worker::Runnable; @@ -18,12 +19,12 @@ impl fmt::Display for Task { } } -pub struct Runner { - importer: Arc, +pub struct Runner { + importer: Arc>, } -impl Runner { - pub fn new(importer: Arc) -> Runner { +impl Runner { + pub fn new(importer: Arc>) -> Self { Runner { importer } } @@ -35,7 +36,7 @@ impl Runner { } } -impl Runnable for Runner { +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 625d9b7cb4f3..ed72d9ca12e1 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -259,7 +259,7 @@ struct Servers { lock_mgr: LockManager, server: LocalServer, node: Node, - importer: Arc, + importer: Arc>, cdc_scheduler: tikv_util::worker::Scheduler, cdc_memory_quota: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 1e170abb1c34..081d4b8f9156 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -255,7 +255,7 @@ struct TikvEngines { struct Servers { lock_mgr: LockManager, server: LocalServer, - importer: Arc, + importer: Arc>, rsmeter_pubsub_service: resource_metering::PubSubService, } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 850df867da86..7ae91d64b35c 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -4,6 +4,7 @@ use std::{ collections::HashMap, fmt, io::{self, Write}, + marker::PhantomData, path::{Path, PathBuf}, sync::Arc, time::SystemTime, @@ -11,11 +12,10 @@ use std::{ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; -use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, }; -use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; +use file_system::{sync_dir, File, OpenOptions}; use keys::data_key; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; use tikv_util::time::Instant; @@ -215,17 +215,19 @@ impl Drop for ImportFile { /// The file being written is stored in `$root/.temp/$file_name`. After writing /// is completed, the file is moved to `$root/$file_name`. The file generated /// from the ingestion process will be placed in `$root/.clone/$file_name`. -pub struct ImportDir { +pub struct ImportDir { root_dir: PathBuf, temp_dir: PathBuf, clone_dir: PathBuf, + + _phantom: PhantomData, } -impl ImportDir { +impl ImportDir { const TEMP_DIR: &'static str = ".temp"; const CLONE_DIR: &'static str = ".clone"; - pub fn new>(root: P) -> Result { + pub fn new>(root: P) -> Result { let root_dir = root.as_ref().to_owned(); let temp_dir = root_dir.join(Self::TEMP_DIR); let clone_dir = root_dir.join(Self::CLONE_DIR); @@ -241,6 +243,7 @@ impl ImportDir { root_dir, temp_dir, clone_dir, + _phantom: PhantomData, }) } @@ -327,10 +330,14 @@ impl ImportDir { ) -> Result { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager, get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager)?; // TODO: check the length and crc32 of ingested file. - let meta_info = sst_reader.sst_meta_info(meta.to_owned()); + let (count, size) = sst_reader.kv_count_and_size(); + let meta_info = SstMetaInfo { + total_kvs: count, + total_bytes: size, + meta: meta.to_owned(), + }; Ok(meta_info) } @@ -354,8 +361,7 @@ impl ImportDir { _ => { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager.clone(), get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager.clone())?; for &(start, end) in TIDB_RANGES_COMPLEMENT { let opt = iter_option(&data_key(start), &data_key(end), false); @@ -377,7 +383,7 @@ impl ImportDir { Ok(true) } - pub fn ingest( + pub fn ingest( &self, metas: &[SstMetaInfo], engine: &E, @@ -427,8 +433,7 @@ impl ImportDir { for meta in metas { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager.clone(), get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager.clone())?; sst_reader.verify_checksum()?; } Ok(()) @@ -528,6 +533,7 @@ pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { mod test { use std::fs; + use engine_rocks::RocksEngine; use engine_traits::CF_DEFAULT; use super::*; @@ -578,7 +584,7 @@ mod test { use uuid::Uuid; let tmp = TempDir::new().unwrap(); - let dir = ImportDir::new(tmp.path()).unwrap(); + let dir = ImportDir::::new(tmp.path()).unwrap(); let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); let filename_v1 = sst_meta_to_path_v1(&meta).unwrap(); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 7e1de9cf44e7..ab4512de692c 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -17,7 +17,6 @@ use std::{ use collections::HashSet; use dashmap::{mapref::entry::Entry, DashMap}; use encryption::{to_engine_encryption_method, DataKeyManager}; -use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, @@ -26,7 +25,7 @@ use engine_traits::{ use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, }; -use file_system::{get_io_rate_limiter, IoType, OpenOptions}; +use file_system::{IoType, OpenOptions}; use kvproto::{ brpb::{CipherInfo, StorageBackend}, import_sstpb::{Range, *}, @@ -153,8 +152,8 @@ impl CacheKvFile { } /// SstImporter manages SST files that are waiting for ingesting. -pub struct SstImporter { - dir: ImportDir, +pub struct SstImporter { + dir: ImportDir, key_manager: Option>, switcher: Either, // TODO: lift api_version as a type parameter. @@ -169,14 +168,14 @@ pub struct SstImporter { mem_limit: Arc, } -impl SstImporter { +impl SstImporter { pub fn new>( cfg: &Config, root: P, key_manager: Option>, api_version: ApiVersion, raft_kv_v2: bool, - ) -> Result { + ) -> Result { let switcher = if raft_kv_v2 { Either::Right(ImportModeSwitcherV2::new(cfg)) } else { @@ -281,7 +280,7 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { + pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { match &self.switcher { Either::Left(switcher) => switcher.start(executor, db.unwrap()), Either::Right(switcher) => switcher.start(executor), @@ -355,7 +354,7 @@ impl SstImporter { .check_api_version(metas, self.key_manager.clone(), self.api_version) } - pub fn ingest(&self, metas: &[SstMetaInfo], engine: &E) -> Result<()> { + pub fn ingest(&self, metas: &[SstMetaInfo], engine: &E) -> Result<()> { match self .dir .ingest(metas, engine, self.key_manager.clone(), self.api_version) @@ -395,7 +394,7 @@ impl SstImporter { // // This method returns the *inclusive* key range (`[start, end]`) of SST // file created, or returns None if the SST is empty. - pub async fn download_ext( + pub async fn download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -413,7 +412,7 @@ impl SstImporter { "rewrite_rule" => ?rewrite_rule, "speed_limit" => speed_limiter.speed_limit(), ); - let r = self.do_download_ext::( + let r = self.do_download_ext( meta, backend, name, @@ -435,7 +434,7 @@ impl SstImporter { } } - pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { + pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { if let Either::Left(ref switcher) = self.switcher { switcher.enter_normal_mode(&db, mf) } else { @@ -443,7 +442,7 @@ impl SstImporter { } } - pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { + pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { if let Either::Left(ref switcher) = self.switcher { switcher.enter_import_mode(&db, mf) } else { @@ -1081,7 +1080,7 @@ impl SstImporter { // raw download, without ext, compatibility to old tests. #[cfg(test)] - fn download( + fn download( &self, meta: &SstMeta, backend: &StorageBackend, @@ -1103,7 +1102,7 @@ impl SstImporter { )) } - async fn do_download_ext( + async fn do_download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -1140,10 +1139,8 @@ impl SstImporter { .await?; // now validate the SST file. - let env = get_env(self.key_manager.clone(), get_io_rate_limiter())?; - // Use abstracted SstReader after Env is abstracted. let dst_file_name = path.temp.to_str().unwrap(); - let sst_reader = RocksSstReader::open_with_env(dst_file_name, Some(env))?; + let sst_reader = E::SstReader::open(dst_file_name, self.key_manager.clone())?; sst_reader.verify_checksum()?; // undo key rewrite so we could compare with the keys inside SST @@ -1389,7 +1386,7 @@ impl SstImporter { self.dir.list_ssts() } - pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { + pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&default_meta)?; @@ -1422,11 +1419,7 @@ impl SstImporter { )) } - pub fn new_raw_writer( - &self, - db: &E, - mut meta: SstMeta, - ) -> Result> { + pub fn new_raw_writer(&self, db: &E, mut meta: SstMeta) -> Result> { meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&meta)?; let default = E::SstWriterBuilder::new() @@ -1484,6 +1477,7 @@ mod tests { usize, }; + use engine_rocks::get_env; use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, @@ -2005,7 +1999,8 @@ mod tests { ..Default::default() }; let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); let mem_limit_old = importer.mem_limit.load(Ordering::SeqCst); // create new config and get the diff config. @@ -2052,7 +2047,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2110,7 +2105,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2178,8 +2173,14 @@ mod tests { memory_use_ratio: 0.0, ..Default::default() }; - let importer = - SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &cfg, + import_dir, + Some(key_manager), + ApiVersion::V1, + false, + ) + .unwrap(); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2226,7 +2227,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager.clone()), @@ -2263,7 +2264,7 @@ mod tests { let (_, key_manager) = new_key_manager_for_test(); let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2303,11 +2304,13 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2353,7 +2356,7 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let (temp_dir, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &cfg, &importer_dir, Some(key_manager.clone()), @@ -2367,7 +2370,7 @@ mod tests { let db = new_test_engine_with_env(db_path.to_str().unwrap(), DATA_CFS, env.clone()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2412,11 +2415,13 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2457,14 +2462,16 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_default().unwrap(); let db = create_sst_test_engine().unwrap(); let _ = importer - .download::( + .download( &meta, &backend, "sample_default.sst", @@ -2501,14 +2508,16 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_write().unwrap(); let db = create_sst_test_engine().unwrap(); let _ = importer - .download::( + .download( &meta, &backend, "sample_write.sst", @@ -2568,11 +2577,12 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2640,14 +2650,16 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); // note: the range doesn't contain the DATA_PREFIX 'z'. meta.mut_range().set_start(b"t123_r02".to_vec()); meta.mut_range().set_end(b"t123_r12".to_vec()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2686,13 +2698,15 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(b"t5_r02".to_vec()); meta.mut_range().set_end(b"t5_r12".to_vec()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2733,11 +2747,13 @@ mod tests { meta.set_uuid(vec![0u8; 16]); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let backend = external_storage::make_local_backend(ext_sst_dir.path()); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2758,12 +2774,14 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(vec![b'x']); meta.mut_range().set_end(vec![b'y']); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2784,10 +2802,12 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2821,11 +2841,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2880,11 +2901,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2935,11 +2957,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2983,12 +3006,13 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let mut importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Snappy)); let db = create_sst_test_engine().unwrap(); importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -3016,12 +3040,13 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let mut importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Zstd)); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); - let mut w = importer.new_txn_writer::(&db, meta).unwrap(); + let mut w = importer.new_txn_writer(&db, meta).unwrap(); let mut batch = WriteBatch::default(); let mut pairs = vec![]; @@ -3064,12 +3089,18 @@ mod tests { #[test] fn test_import_support_download() { let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); assert_eq!(importer.import_support_download(), false); let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config { memory_use_ratio: 0.0, ..Default::default() @@ -3087,8 +3118,14 @@ mod tests { fn test_inc_mem_and_check() { // create importer object. let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); // test inc_mem_and_check() and dec_mem() successfully. @@ -3115,8 +3152,14 @@ mod tests { #[test] fn test_dashmap_lock() { let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); let key = "file1"; let r = Arc::new(OnceCell::new()); diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index f6f896a09239..1c6b06902a45 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -301,7 +301,7 @@ mod tests { use crate::{Config, SstImporter}; // Return the temp dir path to avoid it drop out of the scope. - fn new_writer Result>( + fn new_writer, &RocksEngine, SstMeta) -> Result>( f: F, api_version: ApiVersion, ) -> (W, TempDir) { @@ -310,7 +310,8 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); (f(&importer, &db, meta).unwrap(), importer_dir) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f5c64fa86e91..20e651ea1dc9 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -147,7 +147,7 @@ pub struct ServerCluster { addrs: AddressMap, pub storages: HashMap, pub region_info_accessors: HashMap, - pub importers: HashMap>, + pub importers: HashMap>>, pub pending_services: HashMap, pub coprocessor_hooks: HashMap, pub health_services: HashMap, diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 7d517de2cba9..6df829ad9255 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -27,6 +27,7 @@ test-engines-panic = [ [dependencies] backtrace = "0.3" collections = { workspace = true } +encryption = { workspace = true } engine_panic = { workspace = true } engine_rocks = { workspace = true } engine_test = { workspace = true } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 21099974d2d4..332168a4e93e 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -12,6 +12,7 @@ use std::{ }; use collections::HashMap; +use encryption::DataKeyManager; pub use engine_rocks::RocksSnapshot; use engine_rocks::{ get_env, RocksCfOptions, RocksDbOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, @@ -127,7 +128,9 @@ impl RocksEngine { let worker = Worker::new("engine-rocksdb"); let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { - db_opts.set_env(get_env(None /* key_manager */, io_rate_limiter).unwrap()); + db_opts.set_env( + get_env::(None /* key_manager */, io_rate_limiter).unwrap(), + ); } let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 2dc4f76b1944..d5b5c7c41038 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -126,7 +126,7 @@ pub struct ImportSstService { tablets: LocalTablets, engine: E, threads: Arc, - importer: Arc, + importer: Arc>, limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, @@ -322,7 +322,7 @@ impl ImportSstService { raft_entry_max_size: ReadableSize, engine: E, tablets: LocalTablets, - importer: Arc, + importer: Arc>, store_meta: Option>>>, resource_manager: Option>, region_info_accessor: Arc, @@ -350,7 +350,7 @@ impl ImportSstService { if let LocalTablets::Singleton(tablet) = &tablets { importer.start_switch_mode_check(threads.handle(), Some(tablet.clone())); } else { - importer.start_switch_mode_check::(threads.handle(), None); + importer.start_switch_mode_check(threads.handle(), None); } let writer = raft_writer::ThrottledTlsEngineWriter::default(); @@ -385,7 +385,7 @@ impl ImportSstService { self.cfg.clone() } - async fn tick(importer: Arc, cfg: ConfigManager) { + async fn tick(importer: Arc>, cfg: ConfigManager) { loop { sleep(Duration::from_secs(10)).await; @@ -563,7 +563,7 @@ impl ImportSstService { async fn apply_imp( mut req: ApplyRequest, - importer: Arc, + importer: Arc>, writer: raft_writer::ThrottledTlsEngineWriter, limiter: Limiter, max_raft_size: usize, @@ -1098,7 +1098,7 @@ impl ImportSst for ImportSstService { }; let res = with_resource_limiter( - importer.download_ext::( + importer.download_ext( req.get_sst(), req.get_storage_backend(), req.get_name(), diff --git a/src/server/node.rs b/src/server/node.rs index 228f679ed14d..fb2f28d9c1aa 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -167,7 +167,7 @@ where pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, @@ -455,7 +455,7 @@ where pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index d9b17c5d35c0..5fce5c0024ba 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -113,7 +113,7 @@ where pd_worker: LazyWorker, store_cfg: Arc>, state: &Mutex, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result<()> @@ -218,7 +218,7 @@ where background: Worker, pd_worker: LazyWorker, store_cfg: Arc>, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result<()> From d96284cb29969ef8bd046b4d6f576b91fc3e3287 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 28 Nov 2023 15:32:46 +0800 Subject: [PATCH 164/203] encryption: remove useless `EncryptionKeyManager` trait (#16086) ref tikv/tikv#15986 remove useless EncryptionKeyManager trait Signed-off-by: Connor1996 --- Cargo.lock | 3 +- cmd/tikv-ctl/src/main.rs | 7 +-- components/encryption/Cargo.toml | 1 - components/encryption/export/src/lib.rs | 6 +- components/encryption/src/crypter.rs | 59 +++++++++++------- components/encryption/src/lib.rs | 5 +- components/encryption/src/manager/mod.rs | 49 +++++++-------- components/engine_panic/Cargo.toml | 1 + components/engine_panic/src/sst.rs | 6 +- components/engine_rocks/src/encryption.rs | 20 +++--- components/engine_rocks/src/lib.rs | 5 +- components/engine_rocks/src/sst.rs | 6 +- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/encryption.rs | 62 ------------------- components/engine_traits/src/lib.rs | 2 - components/engine_traits/src/sst.rs | 5 +- components/engine_traits_tests/src/ctor.rs | 2 +- components/engine_traits_tests/src/sst.rs | 13 ++-- components/external_storage/src/lib.rs | 5 +- components/raft_log_engine/src/engine.rs | 7 ++- .../src/operation/ready/snapshot.rs | 5 +- .../raftstore/src/store/compaction_guard.rs | 3 +- components/raftstore/src/store/snap.rs | 6 +- components/raftstore/src/store/snap/io.rs | 12 ++-- components/sst_importer/src/import_file.rs | 4 +- components/sst_importer/src/sst_importer.rs | 15 ++--- components/sst_importer/src/util.rs | 5 +- components/tikv_kv/src/rocksdb_engine.rs | 5 +- src/server/tablet_snap.rs | 2 +- 29 files changed, 126 insertions(+), 196 deletions(-) delete mode 100644 components/engine_traits/src/encryption.rs diff --git a/Cargo.lock b/Cargo.lock index d191ca34188a..9c93540d3ee9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1465,7 +1465,6 @@ dependencies = [ "crc32fast", "crossbeam", "derive_more", - "engine_traits", "error_code", "fail", "file_system", @@ -1520,6 +1519,7 @@ dependencies = [ name = "engine_panic" version = "0.0.1" dependencies = [ + "encryption", "engine_traits", "kvproto", "raft", @@ -1612,6 +1612,7 @@ version = "0.0.1" dependencies = [ "case_macros", "collections", + "encryption", "error_code", "fail", "file_system", diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index b57a99f8345f..5ed1bcbd9cc9 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -21,11 +21,10 @@ use std::{ use collections::HashMap; use encryption_export::{ - create_backend, data_key_manager_from_config, from_engine_encryption_method, DataKeyManager, - DecrypterReader, Iv, + create_backend, data_key_manager_from_config, DataKeyManager, DecrypterReader, Iv, }; use engine_rocks::get_env; -use engine_traits::{EncryptionKeyManager, Peekable}; +use engine_traits::Peekable; use file_system::calc_crc32; use futures::{executor::block_on, future::try_join_all}; use gag::BufferRedirect; @@ -166,7 +165,7 @@ fn main() { let infile1 = Path::new(infile).canonicalize().unwrap(); let file_info = key_manager.get_file(infile1.to_str().unwrap()).unwrap(); - let mthd = from_engine_encryption_method(file_info.method); + let mthd = file_info.method; if mthd == EncryptionMethod::Plaintext { println!( "{} is not encrypted, skip to decrypt it into {}", diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 9698618a4ba9..0f2eac6ad5ab 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -18,7 +18,6 @@ cloud = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" derive_more = "0.99.3" -engine_traits = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 8820402be6b2..a36406d44ea5 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -9,9 +9,9 @@ use cloud::kms::Config as CloudConfig; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ - clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, AzureConfig, - Backend, DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, - EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, + clean_up_dir, clean_up_trash, trash_dir_all, AzureConfig, Backend, DataKeyImporter, + DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, + KmsConfig, MasterKeyConfig, Result, }; use encryption::{cloud_convert_error, FileBackend, PlaintextBackend}; use tikv_util::{box_err, error, info}; diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 3940d392be6b..aafbe7cf88f4 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -1,8 +1,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use std::fmt::{self, Debug, Formatter}; + use byteorder::{BigEndian, ByteOrder}; use cloud::kms::PlainKey; -use engine_traits::EncryptionMethod as EtEncryptionMethod; use kvproto::encryptionpb::EncryptionMethod; use openssl::symm::{self, Cipher as OCipher}; use rand::{rngs::OsRng, RngCore}; @@ -10,28 +11,6 @@ use tikv_util::box_err; use crate::{Error, Result}; -pub fn to_engine_encryption_method(method: EncryptionMethod) -> EtEncryptionMethod { - match method { - EncryptionMethod::Plaintext => EtEncryptionMethod::Plaintext, - EncryptionMethod::Aes128Ctr => EtEncryptionMethod::Aes128Ctr, - EncryptionMethod::Aes192Ctr => EtEncryptionMethod::Aes192Ctr, - EncryptionMethod::Aes256Ctr => EtEncryptionMethod::Aes256Ctr, - EncryptionMethod::Sm4Ctr => EtEncryptionMethod::Sm4Ctr, - EncryptionMethod::Unknown => EtEncryptionMethod::Unknown, - } -} - -pub fn from_engine_encryption_method(method: EtEncryptionMethod) -> EncryptionMethod { - match method { - EtEncryptionMethod::Plaintext => EncryptionMethod::Plaintext, - EtEncryptionMethod::Aes128Ctr => EncryptionMethod::Aes128Ctr, - EtEncryptionMethod::Aes192Ctr => EncryptionMethod::Aes192Ctr, - EtEncryptionMethod::Aes256Ctr => EncryptionMethod::Aes256Ctr, - EtEncryptionMethod::Sm4Ctr => EncryptionMethod::Sm4Ctr, - EtEncryptionMethod::Unknown => EncryptionMethod::Unknown, - } -} - pub fn get_method_key_length(method: EncryptionMethod) -> usize { match method { EncryptionMethod::Plaintext => 0, @@ -43,6 +22,40 @@ pub fn get_method_key_length(method: EncryptionMethod) -> usize { } } +#[derive(Clone, PartialEq)] +pub struct FileEncryptionInfo { + pub method: EncryptionMethod, + pub key: Vec, + pub iv: Vec, +} +impl Default for FileEncryptionInfo { + fn default() -> Self { + FileEncryptionInfo { + method: EncryptionMethod::Unknown, + key: vec![], + iv: vec![], + } + } +} + +impl Debug for FileEncryptionInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "FileEncryptionInfo [method={:?}, key=...<{} bytes>, iv=...<{} bytes>]", + self.method, + self.key.len(), + self.iv.len() + ) + } +} + +impl FileEncryptionInfo { + pub fn is_empty(&self) -> bool { + self.key.is_empty() && self.iv.is_empty() + } +} + // IV's the length should be 12 btyes for GCM mode. const GCM_IV_12: usize = 12; // IV's the length should be 16 btyes for CTR mode. diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index 38c38108dc54..2a9ad4c6f44f 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -16,10 +16,7 @@ use std::{io::ErrorKind, path::Path}; pub use self::{ config::*, - crypter::{ - from_engine_encryption_method, to_engine_encryption_method, verify_encryption_config, - AesGcmCrypter, Iv, - }, + crypter::{verify_encryption_config, AesGcmCrypter, FileEncryptionInfo, Iv}, encrypted_file::EncryptedFile, errors::{cloud_convert_error, Error, Result, RetryCodedError}, file_dict_file::FileDictionaryFile, diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index f3594e8a96bb..f5a203e96262 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -13,9 +13,6 @@ use std::{ }; use crossbeam::channel::{self, select, tick}; -use engine_traits::{ - EncryptionKeyManager, EncryptionMethod as EtEncryptionMethod, FileEncryptionInfo, -}; use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; @@ -24,7 +21,7 @@ use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, use crate::{ config::EncryptionConfig, - crypter::{self, Iv}, + crypter::{self, FileEncryptionInfo, Iv}, encrypted_file::EncryptedFile, file_dict_file::FileDictionaryFile, io::{DecrypterReader, EncrypterWriter}, @@ -661,9 +658,9 @@ impl DataKeyManager { }; EncrypterWriter::new( writer, - crypter::from_engine_encryption_method(file.method), + file.method, &file.key, - if file.method == EtEncryptionMethod::Plaintext { + if file.method == EncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -691,9 +688,9 @@ impl DataKeyManager { let file = self.get_file(fname)?; DecrypterReader::new( reader, - crypter::from_engine_encryption_method(file.method), + file.method, &file.key, - if file.method == EtEncryptionMethod::Plaintext { + if file.method == EncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -767,11 +764,7 @@ impl DataKeyManager { } } }; - let encrypted_file = FileEncryptionInfo { - key, - method: crypter::to_engine_encryption_method(method), - iv, - }; + let encrypted_file = FileEncryptionInfo { key, method, iv }; Ok(Some(encrypted_file)) } @@ -844,8 +837,8 @@ impl DataKeyManager { } /// Return which method this manager is using. - pub fn encryption_method(&self) -> engine_traits::EncryptionMethod { - crypter::to_engine_encryption_method(self.method) + pub fn encryption_method(&self) -> EncryptionMethod { + self.method } /// For tests. @@ -869,9 +862,9 @@ impl Drop for DataKeyManager { } } -impl EncryptionKeyManager for DataKeyManager { +impl DataKeyManager { // Get key to open existing file. - fn get_file(&self, fname: &str) -> IoResult { + pub fn get_file(&self, fname: &str) -> IoResult { match self.get_file_exists(fname) { Ok(Some(result)) => Ok(result), Ok(None) => { @@ -881,7 +874,7 @@ impl EncryptionKeyManager for DataKeyManager { let method = EncryptionMethod::Plaintext; Ok(FileEncryptionInfo { key: vec![], - method: crypter::to_engine_encryption_method(method), + method, iv: file.iv, }) } @@ -889,21 +882,25 @@ impl EncryptionKeyManager for DataKeyManager { } } - fn new_file(&self, fname: &str) -> IoResult { + pub fn new_file(&self, fname: &str) -> IoResult { let (_, data_key) = self.dicts.current_data_key(); let key = data_key.get_key().to_owned(); let file = self.dicts.new_file(fname, self.method, true)?; let encrypted_file = FileEncryptionInfo { key, - method: crypter::to_engine_encryption_method(file.method), + method: file.method, iv: file.get_iv().to_owned(), }; Ok(encrypted_file) } - // See comments of `remove_dir` for more details when using this with a - // directory. - fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> IoResult<()> { + // Can be used with both file and directory. See comments of `remove_dir` for + // more details when using this with a directory. + // + // `physical_fname` is a hint when `fname` was renamed physically. + // Depending on the implementation, providing false negative or false + // positive value may result in leaking encryption keys. + pub fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> IoResult<()> { fail_point!("key_manager_fails_before_delete_file", |_| IoResult::Err( io::ErrorKind::Other.into() )); @@ -924,7 +921,7 @@ impl EncryptionKeyManager for DataKeyManager { Ok(()) } - fn link_file(&self, src_fname: &str, dst_fname: &str) -> IoResult<()> { + pub fn link_file(&self, src_fname: &str, dst_fname: &str) -> IoResult<()> { let src_path = Path::new(src_fname); let dst_path = Path::new(dst_fname); if src_path.is_dir() { @@ -1120,8 +1117,8 @@ impl<'a> Drop for DataKeyImporter<'a> { #[cfg(test)] mod tests { - use engine_traits::EncryptionMethod as EtEncryptionMethod; use file_system::{remove_file, File}; + use kvproto::encryptionpb::EncryptionMethod; use matches::assert_matches; use tempfile::TempDir; use test_util::create_test_key_file; @@ -1243,7 +1240,7 @@ mod tests { let foo3 = manager.get_file("foo").unwrap(); assert_eq!(foo1, foo3); let bar = manager.new_file("bar").unwrap(); - assert_eq!(bar.method, EtEncryptionMethod::Plaintext); + assert_eq!(bar.method, EncryptionMethod::Plaintext); } // When enabling encryption, using insecure master key is not allowed. diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index f5da1dad5507..7c41290993cf 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -11,6 +11,7 @@ testexport = [] [dependencies] engine_traits = { workspace = true } kvproto = { workspace = true } +encryption = { workspace = true } raft = { workspace = true } tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index 2e51c0b87b57..59c23e67636b 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -2,6 +2,7 @@ use std::{marker::PhantomData, path::PathBuf, sync::Arc}; +use ::encryption::DataKeyManager; use engine_traits::{ CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, RefIterable, Result, SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, @@ -18,10 +19,7 @@ impl SstExt for PanicEngine { pub struct PanicSstReader; impl SstReader for PanicSstReader { - fn open( - path: &str, - mgr: Option>, - ) -> Result { + fn open(path: &str, mgr: Option>) -> Result { panic!() } fn verify_checksum(&self) -> Result<()> { diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 58d359b39df4..75dc407e3c3e 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -2,18 +2,18 @@ use std::{io::Result, sync::Arc}; -use engine_traits::{EncryptionKeyManager, EncryptionMethod, FileEncryptionInfo}; +use encryption::{DataKeyManager, FileEncryptionInfo}; +use kvproto::encryptionpb::EncryptionMethod; use rocksdb::{ - DBEncryptionMethod, EncryptionKeyManager as DBEncryptionKeyManager, - FileEncryptionInfo as DBFileEncryptionInfo, + DBEncryptionMethod, EncryptionKeyManager, FileEncryptionInfo as DBFileEncryptionInfo, }; use crate::{r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. -pub(crate) fn get_env( +pub(crate) fn get_env( base_env: Option>, - key_manager: Option>, + key_manager: Option>, ) -> engine_traits::Result>> { if let Some(manager) = key_manager { let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); @@ -26,17 +26,17 @@ pub(crate) fn get_env( } } -pub struct WrappedEncryptionKeyManager { - manager: Arc, +pub struct WrappedEncryptionKeyManager { + manager: Arc, } -impl WrappedEncryptionKeyManager { - pub fn new(manager: Arc) -> Self { +impl WrappedEncryptionKeyManager { + pub fn new(manager: Arc) -> Self { Self { manager } } } -impl DBEncryptionKeyManager for WrappedEncryptionKeyManager { +impl EncryptionKeyManager for WrappedEncryptionKeyManager { fn get_file(&self, fname: &str) -> Result { self.manager .get_file(fname) diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 3226a4592f03..5afa5452344e 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -113,12 +113,13 @@ pub use rocksdb::{ }; pub mod flow_control_factors; +use ::encryption::DataKeyManager; pub use flow_control_factors::*; pub mod raw; -pub fn get_env( - key_manager: Option>, +pub fn get_env( + key_manager: Option>, limiter: Option>, ) -> engine_traits::Result> { let env = encryption::get_env(None /* base_env */, key_manager)?; diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 0a234983aa33..1030b7aa17ff 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -2,6 +2,7 @@ use std::{path::PathBuf, sync::Arc}; +use ::encryption::DataKeyManager; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, @@ -47,10 +48,7 @@ impl RocksSstReader { } impl SstReader for RocksSstReader { - fn open( - path: &str, - mgr: Option>, - ) -> Result { + fn open(path: &str, mgr: Option>) -> Result { let env = get_env(mgr, get_io_rate_limiter())?; Self::open_with_env(path, Some(env)) } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 2d11b59f623c..8e8812ec6e20 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -20,6 +20,7 @@ lazy_static = "1.0" log_wrappers = { workspace = true } protobuf = "2" raft = { workspace = true } +encryption = { workspace = true } serde = "1.0" slog = { workspace = true } slog-global = { workspace = true } diff --git a/components/engine_traits/src/encryption.rs b/components/engine_traits/src/encryption.rs deleted file mode 100644 index 7376e2d55927..000000000000 --- a/components/engine_traits/src/encryption.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - fmt::{self, Debug, Formatter}, - io::Result, -}; - -pub trait EncryptionKeyManager: Sync + Send { - fn get_file(&self, fname: &str) -> Result; - fn new_file(&self, fname: &str) -> Result; - /// Can be used with both file and directory. - /// - /// `physical_fname` is a hint when `fname` was renamed physically. - /// Depending on the implementation, providing false negative or false - /// positive value may result in leaking encryption keys. - fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> Result<()>; - fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result<()>; -} - -#[derive(Clone, PartialEq)] -pub struct FileEncryptionInfo { - pub method: EncryptionMethod, - pub key: Vec, - pub iv: Vec, -} -impl Default for FileEncryptionInfo { - fn default() -> Self { - FileEncryptionInfo { - method: EncryptionMethod::Unknown, - key: vec![], - iv: vec![], - } - } -} - -impl Debug for FileEncryptionInfo { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "FileEncryptionInfo [method={:?}, key=...<{} bytes>, iv=...<{} bytes>]", - self.method, - self.key.len(), - self.iv.len() - ) - } -} - -impl FileEncryptionInfo { - pub fn is_empty(&self) -> bool { - self.key.is_empty() && self.iv.is_empty() - } -} - -#[derive(Copy, Clone, Debug, PartialEq)] -pub enum EncryptionMethod { - Unknown = 0, - Plaintext = 1, - Aes128Ctr = 2, - Aes192Ctr = 3, - Aes256Ctr = 4, - Sm4Ctr = 5, -} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index e09b1b52733d..9cf4c22dd829 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -295,8 +295,6 @@ mod sst; pub use crate::sst::*; mod write_batch; pub use crate::write_batch::*; -mod encryption; -pub use crate::encryption::*; mod mvcc_properties; mod sst_partitioner; pub use crate::sst_partitioner::*; diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index dccd3a2523d9..036c8999e3f4 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -2,9 +2,10 @@ use std::{path::PathBuf, sync::Arc}; +use encryption::DataKeyManager; use kvproto::import_sstpb::SstMeta; -use crate::{errors::Result, EncryptionKeyManager, RefIterable}; +use crate::{errors::Result, RefIterable}; #[derive(Clone, Debug)] pub struct SstMetaInfo { @@ -21,7 +22,7 @@ pub trait SstExt: Sized { /// SstReader is used to read an SST file. pub trait SstReader: RefIterable + Sized + Send { - fn open(path: &str, mgr: Option>) -> Result; + fn open(path: &str, mgr: Option>) -> Result; fn verify_checksum(&self) -> Result<()>; fn kv_count_and_size(&self) -> (u64, u64); } diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index 5d987d648580..ba3154d9267c 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -9,7 +9,7 @@ use engine_test::{ ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, kv::KvTestEngine, }; -use engine_traits::{EncryptionKeyManager, KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; +use engine_traits::{KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use super::tempdir; diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 629c81df5289..77258e649ffa 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -4,7 +4,6 @@ use std::fs; -use encryption::DataKeyManager; use engine_test::kv::KvTestEngine; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstExt, SstReader, @@ -49,7 +48,7 @@ fn basic() -> Result<()> { sst_writer.put(b"k1", b"v1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -78,7 +77,7 @@ fn forward() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -115,7 +114,7 @@ fn reverse() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_last()?; @@ -153,7 +152,7 @@ fn delete() -> Result<()> { sst_writer.delete(b"k1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -207,7 +206,7 @@ fn same_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -249,7 +248,7 @@ fn reverse_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 082073abe4f6..05dbf6f965d3 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -17,8 +17,7 @@ use std::{ use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; -use encryption::{from_engine_encryption_method, DecrypterReader, Iv}; -use engine_traits::FileEncryptionInfo; +use encryption::{DecrypterReader, FileEncryptionInfo, Iv}; use file_system::File; use futures::io::BufReader; use futures_io::AsyncRead; @@ -249,7 +248,7 @@ pub fn encrypt_wrap_reader( let input = match file_crypter { Some(x) => Box::new(DecrypterReader::new( reader, - from_engine_encryption_method(x.method), + x.method, &x.key, Iv::from_slice(&x.iv)?, )?), diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 1f19a161b096..c71b9fd65d92 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -10,12 +10,13 @@ use std::{ use codec::number::NumberCodec; use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ - CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, - RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, - CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + CacheStats, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, RaftEngineDebug, + RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, }; use file_system::{IoOp, IoRateLimiter, IoType, WithIoType}; use kvproto::{ + encryptionpb::EncryptionMethod, metapb::Region, raft_serverpb::{ RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 9e0ed449cef5..c29399ac6a03 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -30,10 +30,7 @@ use std::{ }; use encryption_export::DataKeyManager; -use engine_traits::{ - EncryptionKeyManager, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, - ALL_CFS, -}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, ALL_CFS}; use fail::fail_point; use kvproto::{ metapb::PeerRole, diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index f63a257c9f5e..161a8f9c4db5 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -269,7 +269,6 @@ mod tests { use std::{path::Path, str}; use collections::HashMap; - use encryption::DataKeyManager; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -542,7 +541,7 @@ mod tests { } fn collect_keys(path: &str) -> Vec> { - let reader = RocksSstReader::open::(path, None).unwrap(); + let reader = RocksSstReader::open(path, None).unwrap(); let mut sst_reader = reader.iter(IterOptions::default()).unwrap(); let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6fe21fe97502..a857cbffdfda 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -15,8 +15,8 @@ use std::{ }; use collections::{HashMap, HashMapEntry as Entry}; -use encryption::{create_aes_ctr_crypter, from_engine_encryption_method, DataKeyManager, Iv}; -use engine_traits::{CfName, EncryptionKeyManager, KvEngine, CF_DEFAULT, CF_LOCK, CF_WRITE}; +use encryption::{create_aes_ctr_crypter, DataKeyManager, Iv}; +use engine_traits::{CfName, KvEngine, CF_DEFAULT, CF_LOCK, CF_WRITE}; use error_code::{self, ErrorCode, ErrorCodeExt}; use fail::fail_point; use file_system::{ @@ -614,7 +614,7 @@ impl Snapshot { if let Some(mgr) = &s.mgr.encryption_key_manager { let enc_info = mgr.new_file(&file_paths[idx])?; - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; if mthd != EncryptionMethod::Plaintext { let file_for_recving = cf_file.file_for_recving.last_mut().unwrap(); file_for_recving.encrypter = Some( diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 3cdee1e40f1c..952f49baf446 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -8,12 +8,10 @@ use std::{ usize, }; -use encryption::{ - from_engine_encryption_method, DataKeyManager, DecrypterReader, EncrypterWriter, Iv, -}; +use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter, Iv}; use engine_traits::{ - CfName, EncryptionKeyManager, Error as EngineError, Iterable, KvEngine, Mutable, - SstCompressionType, SstWriter, SstWriterBuilder, WriteBatch, + CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstWriter, + SstWriterBuilder, WriteBatch, }; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ @@ -60,7 +58,7 @@ where if let Some(key_mgr) = key_mgr { let enc_info = box_try!(key_mgr.new_file(path)); - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; if mthd != EncryptionMethod::Plaintext { let writer = box_try!(EncrypterWriter::new( file.take().unwrap(), @@ -287,7 +285,7 @@ pub fn get_decrypter_reader( encryption_key_manager: &DataKeyManager, ) -> Result, Error> { let enc_info = box_try!(encryption_key_manager.get_file(file)); - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; debug!( "get_decrypter_reader gets enc_info for {:?}, method: {:?}", file, mthd diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 7ae91d64b35c..a8fdea6a564b 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -12,9 +12,7 @@ use std::{ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; -use engine_traits::{ - iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, -}; +use engine_traits::{iter_option, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader}; use file_system::{sync_dir, File, OpenOptions}; use keys::data_key; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index ab4512de692c..6eef07b1ebc0 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -16,11 +16,11 @@ use std::{ use collections::HashSet; use dashmap::{mapref::entry::Entry, DashMap}; -use encryption::{to_engine_encryption_method, DataKeyManager}; +use encryption::{DataKeyManager, FileEncryptionInfo}; use engine_traits::{ - name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, - IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, - SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, + name_to_cf, util::check_key_in_range, CfName, IterOptions, Iterator, KvEngine, RefIterable, + SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + CF_WRITE, }; use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, @@ -1116,7 +1116,7 @@ impl SstImporter { let path = self.dir.join_for_write(meta)?; let file_crypter = crypter.map(|c| FileEncryptionInfo { - method: to_engine_encryption_method(c.cipher_type), + method: c.cipher_type, key: c.cipher_key, iv: meta.cipher_iv.to_owned(), }); @@ -1479,11 +1479,12 @@ mod tests { use engine_rocks::get_env; use engine_traits::{ - collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, - RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, + collect, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, + SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; use external_storage::read_external_storage_info_buff; use file_system::File; + use kvproto::encryptionpb::EncryptionMethod; use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; use tempfile::Builder; diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 4adfe3db51ea..121daf49ea81 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -3,7 +3,6 @@ use std::path::Path; use encryption::DataKeyManager; -use engine_traits::EncryptionKeyManager; use external_storage::ExternalStorage; use file_system::File; @@ -127,8 +126,8 @@ mod tests { RocksTitanDbOptions, }; use engine_traits::{ - CfName, CfOptions, DbOptions, EncryptionKeyManager, ImportExt, Peekable, SstWriter, - SstWriterBuilder, TitanCfOptions, CF_DEFAULT, + CfName, CfOptions, DbOptions, ImportExt, Peekable, SstWriter, SstWriterBuilder, + TitanCfOptions, CF_DEFAULT, }; use tempfile::Builder; use test_util::encryption::new_test_key_manager; diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 332168a4e93e..21099974d2d4 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -12,7 +12,6 @@ use std::{ }; use collections::HashMap; -use encryption::DataKeyManager; pub use engine_rocks::RocksSnapshot; use engine_rocks::{ get_env, RocksCfOptions, RocksDbOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, @@ -128,9 +127,7 @@ impl RocksEngine { let worker = Worker::new("engine-rocksdb"); let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { - db_opts.set_env( - get_env::(None /* key_manager */, io_rate_limiter).unwrap(), - ); + db_opts.set_env(get_env(None /* key_manager */, io_rate_limiter).unwrap()); } let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index ca869f5c7610..997a932be9dc 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -35,7 +35,7 @@ use std::{ use collections::HashMap; use crc64fast::Digest; use encryption_export::{DataKeyImporter, DataKeyManager}; -use engine_traits::{Checkpointer, EncryptionKeyManager, KvEngine, TabletRegistry}; +use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; use file_system::{IoType, OpenOptions, WithIoType}; use futures::{ future::FutureExt, From 6bb3d2eca36932e6545b6f00f0d5728354c45acf Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 28 Nov 2023 20:19:17 +0800 Subject: [PATCH 165/203] titan: update titan to fix compaction filter (#16092) close tikv/tikv#16091 update titan to fix compaction filter Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c93540d3ee9..4211e2bfdf45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2849,7 +2849,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2868,7 +2868,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "bzip2-sys", "cc", @@ -4708,7 +4708,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "libc 0.2.146", "librocksdb_sys", From dd567e60799e38f2e250ad4c4b3054c4ef794014 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 29 Nov 2023 15:20:51 +0800 Subject: [PATCH 166/203] readpool: gate future-pool running tasks per priority (#16049) close tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: tongjian --- Cargo.lock | 1 + components/resource_control/src/lib.rs | 4 +- .../resource_control/src/resource_group.rs | 221 ++---------------- components/resource_control/src/worker.rs | 38 +-- components/server/src/server.rs | 4 +- components/server/src/server2.rs | 4 +- components/tikv_util/Cargo.toml | 1 + components/tikv_util/src/lib.rs | 1 + components/tikv_util/src/resource_control.rs | 191 +++++++++++++++ .../tikv_util/src/yatp_pool/future_pool.rs | 50 +++- components/tikv_util/src/yatp_pool/metrics.rs | 2 +- components/tikv_util/src/yatp_pool/mod.rs | 28 +-- src/config/mod.rs | 1 - src/read_pool.rs | 60 +++-- src/storage/txn/sched_pool.rs | 6 +- 15 files changed, 345 insertions(+), 267 deletions(-) create mode 100644 components/tikv_util/src/resource_control.rs diff --git a/Cargo.lock b/Cargo.lock index 4211e2bfdf45..146e9aa04ab6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6639,6 +6639,7 @@ dependencies = [ "slog-global", "slog-json", "slog-term", + "strum 0.20.0", "sysinfo", "tempfile", "thiserror", diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index a7b4cf031923..917718e84094 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -11,9 +11,9 @@ use serde::{Deserialize, Serialize}; mod resource_group; pub use resource_group::{ - priority_from_task_meta, ResourceConsumeType, ResourceController, ResourceGroupManager, - TaskMetadata, MIN_PRIORITY_UPDATE_INTERVAL, + ResourceConsumeType, ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL, }; +pub use tikv_util::resource_control::*; mod future; pub use future::{with_resource_limiter, ControlledFuture}; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 7e6d4279a25f..d6933d0a383d 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -1,7 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::Cow, cell::Cell, cmp::{max, min}, collections::HashSet, @@ -22,8 +21,11 @@ use kvproto::{ resource_manager::{GroupMode, ResourceGroup as PbResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use strum::{EnumCount, EnumIter, IntoEnumIterator}; -use tikv_util::{info, time::Instant}; +use tikv_util::{ + info, + resource_control::{TaskMetadata, TaskPriority, DEFAULT_RESOURCE_GROUP_NAME}, + time::Instant, +}; use yatp::queue::priority::TaskPriorityProvider; use crate::{metrics::deregister_metrics, resource_limiter::ResourceLimiter}; @@ -34,13 +36,12 @@ const DEFAULT_PRIORITY_PER_READ_TASK: u64 = 50; const TASK_EXTRA_FACTOR_BY_LEVEL: [u64; 3] = [0, 20, 100]; /// duration to update the minimal priority value of each resource group. pub const MIN_PRIORITY_UPDATE_INTERVAL: Duration = Duration::from_secs(1); -/// default resource group name -const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; /// default value of max RU quota. const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; /// The maximum RU quota that can be configured. const MAX_RU_QUOTA: u64 = i32::MAX as u64; +#[cfg(test)] const LOW_PRIORITY: u32 = 1; const MEDIUM_PRIORITY: u32 = 8; #[cfg(test)] @@ -56,40 +57,6 @@ pub enum ResourceConsumeType { IoBytes(u64), } -#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] -#[repr(usize)] -pub enum TaskPriority { - High = 0, - Medium = 1, - Low = 2, -} - -impl TaskPriority { - pub fn as_str(&self) -> &'static str { - match *self { - TaskPriority::High => "high", - TaskPriority::Medium => "medium", - TaskPriority::Low => "low", - } - } -} - -impl From for TaskPriority { - fn from(value: u32) -> Self { - // map the resource group priority value (1,8,16) to (Low,Medium,High) - // 0 means the priority is not set, so map it to medium by default. - if value == 0 { - Self::Medium - } else if value < 6 { - Self::Low - } else if value < 11 { - Self::Medium - } else { - Self::High - } - } -} - /// ResourceGroupManager manages the metadata of each resource group. pub struct ResourceGroupManager { pub(crate) resource_groups: DashMap, @@ -100,24 +67,20 @@ pub struct ResourceGroupManager { // resource limiter has changed. version_generator: AtomicU64, // the shared resource limiter of each priority - priority_limiters: [Arc; TaskPriority::COUNT], + priority_limiters: [Arc; TaskPriority::PRIORITY_COUNT], } impl Default for ResourceGroupManager { fn default() -> Self { - let priority_limiters = TaskPriority::iter() - .map(|p| { - Arc::new(ResourceLimiter::new( - p.as_str().to_owned(), - f64::INFINITY, - f64::INFINITY, - 0, - false, - )) - }) - .collect::>() - .try_into() - .unwrap(); + let priority_limiters = TaskPriority::priorities().map(|p| { + Arc::new(ResourceLimiter::new( + p.as_str().to_owned(), + f64::INFINITY, + f64::INFINITY, + 0, + false, + )) + }); let manager = Self { resource_groups: Default::default(), group_count: AtomicU64::new(0), @@ -308,14 +271,6 @@ impl ResourceGroupManager { self.get_group_count() > 1 } - /// return the priority of target resource group. - #[inline] - pub fn get_resource_group_priority(&self, group: &str) -> u32 { - self.resource_groups - .get(group) - .map_or(LOW_PRIORITY, |g| g.group.priority) - } - // Always return the background resource limiter if any; // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( @@ -385,7 +340,9 @@ impl ResourceGroupManager { } #[inline] - pub fn get_priority_resource_limiters(&self) -> [Arc; 3] { + pub fn get_priority_resource_limiters( + &self, + ) -> [Arc; TaskPriority::PRIORITY_COUNT] { self.priority_limiters.clone() } } @@ -525,7 +482,9 @@ impl ResourceController { let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); // skip to adjust max ru if it is the "default" group and the ru config eq // MAX_RU_QUOTA - if ru_quota > *max_ru_quota && (name != b"default" || ru_quota < MAX_RU_QUOTA) { + if ru_quota > *max_ru_quota + && (name != DEFAULT_RESOURCE_GROUP_NAME.as_bytes() || ru_quota < MAX_RU_QUOTA) + { *max_ru_quota = ru_quota; // adjust all group weight because the current value is too small. self.adjust_all_resource_group_factors(ru_quota); @@ -668,101 +627,9 @@ impl ResourceController { } } -const OVERRIDE_PRIORITY_MASK: u8 = 0b1000_0000; -const RESOURCE_GROUP_NAME_MASK: u8 = 0b0100_0000; - -#[derive(Clone, Default)] -pub struct TaskMetadata<'a> { - // The first byte is a bit map to indicate which field exists, - // then append override priority if nonzero, - // then append resource group name if not default - metadata: Cow<'a, [u8]>, -} - -impl<'a> TaskMetadata<'a> { - pub fn deep_clone(&self) -> TaskMetadata<'static> { - TaskMetadata { - metadata: Cow::Owned(self.metadata.to_vec()), - } - } - - pub fn from_ctx(ctx: &ResourceControlContext) -> Self { - let mut mask = 0; - let mut buf = vec![]; - if ctx.override_priority != 0 { - mask |= OVERRIDE_PRIORITY_MASK; - } - if !ctx.resource_group_name.is_empty() - && ctx.resource_group_name != DEFAULT_RESOURCE_GROUP_NAME - { - mask |= RESOURCE_GROUP_NAME_MASK; - } - if mask == 0 { - // if all are default value, no need to write anything to save copy cost - return Self { - metadata: Cow::Owned(buf), - }; - } - buf.push(mask); - if mask & OVERRIDE_PRIORITY_MASK != 0 { - buf.extend_from_slice(&(ctx.override_priority as u32).to_ne_bytes()); - } - if mask & RESOURCE_GROUP_NAME_MASK != 0 { - buf.extend_from_slice(ctx.resource_group_name.as_bytes()); - } - Self { - metadata: Cow::Owned(buf), - } - } - - fn from_bytes(bytes: &'a [u8]) -> Self { - Self { - metadata: Cow::Borrowed(bytes), - } - } - - pub fn to_vec(self) -> Vec { - self.metadata.into_owned() - } - - pub fn override_priority(&self) -> u32 { - if self.metadata.is_empty() { - return 0; - } - if self.metadata[0] & OVERRIDE_PRIORITY_MASK == 0 { - return 0; - } - u32::from_ne_bytes(self.metadata[1..5].try_into().unwrap()) - } - - pub fn group_name(&self) -> &[u8] { - if self.metadata.is_empty() { - return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); - } - if self.metadata[0] & RESOURCE_GROUP_NAME_MASK == 0 { - return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); - } - let start = if self.metadata[0] & OVERRIDE_PRIORITY_MASK != 0 { - 5 - } else { - 1 - }; - &self.metadata[start..] - } -} - -// return the TaskPriority value from task metadata. -// This function is used for handling thread pool task waiting metrics. -pub fn priority_from_task_meta(meta: &[u8]) -> usize { - let priority = TaskMetadata::from_bytes(meta).override_priority(); - // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) - debug_assert!(priority <= 16); - TaskPriority::from(priority) as usize -} - impl TaskPriorityProvider for ResourceController { fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { - let metadata = TaskMetadata::from_bytes(extras.metadata()); + let metadata = TaskMetadata::from(extras.metadata()); self.resource_group(metadata.group_name()).get_priority( extras.current_level() as usize, if metadata.override_priority() == 0 { @@ -1316,32 +1183,6 @@ pub(crate) mod tests { assert!(v5 < v1); } - #[test] - fn test_task_metadata() { - let cases = [ - ("default", 0u32), - ("default", 6u32), - ("test", 0u32), - ("test", 15u32), - ]; - - let metadata = TaskMetadata::from_ctx(&ResourceControlContext::default()); - assert_eq!(metadata.group_name(), b"default"); - for (group_name, priority) in cases { - let metadata = TaskMetadata::from_ctx(&ResourceControlContext { - resource_group_name: group_name.to_string(), - override_priority: priority as u64, - ..Default::default() - }); - assert_eq!(metadata.override_priority(), priority); - assert_eq!(metadata.group_name(), group_name.as_bytes()); - let vec = metadata.to_vec(); - let metadata1 = TaskMetadata::from_bytes(&vec); - assert_eq!(metadata1.override_priority(), priority); - assert_eq!(metadata1.group_name(), group_name.as_bytes()); - } - } - #[test] fn test_get_resource_limiter() { let mgr = ResourceGroupManager::default(); @@ -1433,20 +1274,4 @@ pub(crate) mod tests { &mgr.priority_limiters[1] )); } - - #[test] - fn test_task_priority() { - use TaskPriority::*; - let cases = [ - (0, Medium), - (1, Low), - (7, Medium), - (8, Medium), - (15, High), - (16, High), - ]; - for (value, priority) in cases { - assert_eq!(TaskPriority::from(value), priority); - } - } } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 79dea73d0ae2..2ea72f132eed 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -10,9 +10,10 @@ use std::{ use file_system::{fetch_io_bytes, IoBytes, IoType}; use prometheus::Histogram; -use strum::{EnumCount, IntoEnumIterator}; +use strum::EnumCount; use tikv_util::{ debug, + resource_control::TaskPriority, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, warn, @@ -21,13 +22,17 @@ use tikv_util::{ use crate::{ metrics::*, - resource_group::{ResourceGroupManager, TaskPriority}, + resource_group::ResourceGroupManager, resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, }; pub const BACKGROUND_LIMIT_ADJUST_DURATION: Duration = Duration::from_secs(10); const MICROS_PER_SEC: f64 = 1_000_000.0; +// the minimal schedule wait duration due to the overhead of queue. +// We should exclude this cause when calculate the estimated total wait +// duration. +const MINIMAL_SCHEDULE_WAIT_SECS: f64 = 0.000_005; //5us pub struct ResourceUsageStats { total_quota: f64, @@ -303,7 +308,7 @@ struct GroupStats { /// In general, caller should call this function in a fixed interval. pub struct PriorityLimiterAdjustWorker { resource_ctl: Arc, - trackers: [PriorityLimiterStatsTracker; 3], + trackers: [PriorityLimiterStatsTracker; TaskPriority::PRIORITY_COUNT], resource_quota_getter: R, last_adjust_time: Instant, is_last_low_cpu: bool, @@ -327,10 +332,9 @@ impl PriorityLimiterAdjustWorker { resource_ctl: Arc, resource_quota_getter: R, ) -> Self { - let priorities: [_; 3] = TaskPriority::iter().collect::>().try_into().unwrap(); let trackers = resource_ctl .get_priority_resource_limiters() - .zip(priorities) + .zip(TaskPriority::priorities()) .map(|(l, p)| PriorityLimiterStatsTracker::new(l, p.as_str())); Self { resource_ctl, @@ -367,8 +371,8 @@ impl PriorityLimiterAdjustWorker { } self.is_last_single_group = false; - let stats: [_; 3] = - std::array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); + let stats: [_; TaskPriority::PRIORITY_COUNT] = + array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); let process_cpu_stats = match self .resource_quota_getter @@ -415,12 +419,13 @@ impl PriorityLimiterAdjustWorker { return; } - let real_cpu_total: f64 = stats.iter().map(|s| s.cpu_secs).sum(); + let cpu_duration: [_; TaskPriority::PRIORITY_COUNT] = array::from_fn(|i| stats[i].cpu_secs); + let real_cpu_total: f64 = cpu_duration.iter().sum(); let expect_pool_cpu_total = real_cpu_total * (process_cpu_stats.total_quota * 0.95) / process_cpu_stats.current_used; let mut limits = [0.0; 2]; - let level_expected: [_; 3] = - std::array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); + let level_expected: [_; TaskPriority::PRIORITY_COUNT] = + array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); // substract the cpu time usage for priority high. let mut expect_cpu_time_total = expect_pool_cpu_total - level_expected[0]; @@ -442,8 +447,10 @@ impl PriorityLimiterAdjustWorker { limits[i - 1] = limit; expect_cpu_time_total -= level_expected[i]; } - debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, - "limits" => ?limits, "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); + debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, + "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, + "cpu_costs" => ?cpu_duration, "limits" => ?limits, + "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); } } @@ -516,12 +523,15 @@ impl PriorityLimiterStatsTracker { let stats_delta = (cur_stats - self.last_stats) / dur_secs; self.last_stats = cur_stats; let wait_stats: [_; 2] = - std::array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); + array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); let schedule_wait_dur_secs = wait_stats.iter().map(|s| s.0).sum::() / dur_secs; + let expected_wait_dur_secs = stats_delta.request_count as f64 * MINIMAL_SCHEDULE_WAIT_SECS; + let normed_schedule_wait_dur_secs = + (schedule_wait_dur_secs - expected_wait_dur_secs).max(0.0); LimiterStats { cpu_secs: stats_delta.total_consumed as f64 / MICROS_PER_SEC, wait_secs: stats_delta.total_wait_dur_us as f64 / MICROS_PER_SEC - + schedule_wait_dur_secs, + + normed_schedule_wait_dur_secs, req_count: stats_delta.request_count, } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ed72d9ca12e1..059cda0bb91e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -70,7 +70,7 @@ use raftstore::{ RaftRouterCompactedEventSender, }; use resolved_ts::{LeadershipResolver, Task}; -use resource_control::{priority_from_task_meta, ResourceGroupManager}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; @@ -549,7 +549,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), - Some(Arc::new(priority_from_task_meta)), + true, )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 081d4b8f9156..636a4bc9282b 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,7 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resolved_ts::Task; -use resource_control::{priority_from_task_meta, ResourceGroupManager}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ @@ -460,7 +460,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), - Some(Arc::new(priority_from_task_meta)), + true, )) } else { None diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 0b6fc5978cb8..9250dd03cb04 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -52,6 +52,7 @@ slog-async = "2.3" slog-global = { workspace = true } slog-json = "2.3" slog-term = "2.4" +strum = { version = "0.20", features = ["derive"] } sysinfo = "0.26" thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index b8aa578a8789..cdcfc4673c9f 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -54,6 +54,7 @@ pub mod memory; pub mod metrics; pub mod mpsc; pub mod quota_limiter; +pub mod resource_control; pub mod store; pub mod stream; pub mod sys; diff --git a/components/tikv_util/src/resource_control.rs b/components/tikv_util/src/resource_control.rs new file mode 100644 index 000000000000..c7b46c2ddabb --- /dev/null +++ b/components/tikv_util/src/resource_control.rs @@ -0,0 +1,191 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +/// This mod provide some utility types and functions for resource control. +use std::borrow::Cow; + +use kvproto::kvrpcpb::ResourceControlContext; +use strum::{EnumCount, EnumIter}; + +/// default resource group name +pub const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; + +const OVERRIDE_PRIORITY_MASK: u8 = 0b1000_0000; +const RESOURCE_GROUP_NAME_MASK: u8 = 0b0100_0000; + +#[derive(Clone, Default)] +pub struct TaskMetadata<'a> { + // The first byte is a bit map to indicate which field exists, + // then append override priority if nonzero, + // then append resource group name if not default + metadata: Cow<'a, [u8]>, +} + +impl<'a> TaskMetadata<'a> { + pub fn deep_clone(&self) -> TaskMetadata<'static> { + TaskMetadata { + metadata: Cow::Owned(self.metadata.to_vec()), + } + } + + pub fn from_ctx(ctx: &ResourceControlContext) -> Self { + let mut mask = 0; + let mut buf = vec![]; + if ctx.override_priority != 0 { + mask |= OVERRIDE_PRIORITY_MASK; + } + if !ctx.resource_group_name.is_empty() + && ctx.resource_group_name != DEFAULT_RESOURCE_GROUP_NAME + { + mask |= RESOURCE_GROUP_NAME_MASK; + } + if mask == 0 { + // if all are default value, no need to write anything to save copy cost + return Self { + metadata: Cow::Owned(buf), + }; + } + buf.push(mask); + if mask & OVERRIDE_PRIORITY_MASK != 0 { + buf.extend_from_slice(&(ctx.override_priority as u32).to_ne_bytes()); + } + if mask & RESOURCE_GROUP_NAME_MASK != 0 { + buf.extend_from_slice(ctx.resource_group_name.as_bytes()); + } + Self { + metadata: Cow::Owned(buf), + } + } + + pub fn to_vec(self) -> Vec { + self.metadata.into_owned() + } + + pub fn override_priority(&self) -> u32 { + if self.metadata.is_empty() { + return 0; + } + if self.metadata[0] & OVERRIDE_PRIORITY_MASK == 0 { + return 0; + } + u32::from_ne_bytes(self.metadata[1..5].try_into().unwrap()) + } + + pub fn group_name(&self) -> &[u8] { + if self.metadata.is_empty() { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + if self.metadata[0] & RESOURCE_GROUP_NAME_MASK == 0 { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + let start = if self.metadata[0] & OVERRIDE_PRIORITY_MASK != 0 { + 5 + } else { + 1 + }; + &self.metadata[start..] + } +} + +impl<'a> From<&'a [u8]> for TaskMetadata<'a> { + fn from(bytes: &'a [u8]) -> Self { + Self { + metadata: Cow::Borrowed(bytes), + } + } +} + +// return the TaskPriority value from task metadata. +pub fn priority_from_task_meta(meta: &[u8]) -> TaskPriority { + let priority = TaskMetadata::from(meta).override_priority(); + // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) + debug_assert!(priority <= 16); + TaskPriority::from(priority) +} + +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] +#[repr(usize)] +pub enum TaskPriority { + High = 0, + Medium = 1, + Low = 2, +} + +impl TaskPriority { + // reexport enum count, caller can use it without importing `EnumCount`. + pub const PRIORITY_COUNT: usize = Self::COUNT; + pub fn as_str(&self) -> &'static str { + match *self { + TaskPriority::High => "high", + TaskPriority::Medium => "medium", + TaskPriority::Low => "low", + } + } + + pub fn priorities() -> [Self; Self::COUNT] { + use TaskPriority::*; + [High, Medium, Low] + } +} + +impl From for TaskPriority { + fn from(value: u32) -> Self { + // map the resource group priority value (1,8,16) to (Low,Medium,High) + // 0 means the priority is not set, so map it to medium by default. + if value == 0 { + Self::Medium + } else if value < 6 { + Self::Low + } else if value < 11 { + Self::Medium + } else { + Self::High + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_task_metadata() { + let cases = [ + ("default", 0u32), + ("default", 6u32), + ("test", 0u32), + ("test", 15u32), + ]; + + let metadata = TaskMetadata::from_ctx(&ResourceControlContext::default()); + assert_eq!(metadata.group_name(), b"default"); + for (group_name, priority) in cases { + let metadata = TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: group_name.to_string(), + override_priority: priority as u64, + ..Default::default() + }); + assert_eq!(metadata.override_priority(), priority); + assert_eq!(metadata.group_name(), group_name.as_bytes()); + let vec = metadata.to_vec(); + let metadata1 = TaskMetadata::from(vec.as_slice()); + assert_eq!(metadata1.override_priority(), priority); + assert_eq!(metadata1.group_name(), group_name.as_bytes()); + } + } + + #[test] + fn test_task_priority() { + use TaskPriority::*; + let cases = [ + (0, Medium), + (1, Low), + (7, Medium), + (8, Medium), + (15, High), + (16, High), + ]; + for (value, priority) in cases { + assert_eq!(TaskPriority::from(value), priority); + } + } +} diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index c6a34b2673be..75d65fe46415 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -17,13 +17,15 @@ use prometheus::{IntCounter, IntGauge}; use tracker::TrackedFuture; use yatp::{queue::Extras, task::future}; +use crate::resource_control::{priority_from_task_meta, TaskPriority}; + pub type ThreadPool = yatp::ThreadPool; use super::metrics; #[derive(Clone)] struct Env { - metrics_running_task_count: IntGauge, + metrics_running_task_count_by_priority: [IntGauge; TaskPriority::PRIORITY_COUNT], metrics_handled_task_count: IntCounter, } @@ -46,8 +48,9 @@ impl crate::AssertSync for FuturePool {} impl FuturePool { pub fn from_pool(pool: ThreadPool, name: &str, pool_size: usize, max_tasks: usize) -> Self { let env = Env { - metrics_running_task_count: metrics::FUTUREPOOL_RUNNING_TASK_VEC - .with_label_values(&[name]), + metrics_running_task_count_by_priority: TaskPriority::priorities().map(|p| { + metrics::FUTUREPOOL_RUNNING_TASK_VEC.with_label_values(&[name, p.as_str()]) + }), metrics_handled_task_count: metrics::FUTUREPOOL_HANDLED_TASK_VEC .with_label_values(&[name]), }; @@ -71,6 +74,16 @@ impl FuturePool { self.inner.scale_pool_size(thread_count) } + #[inline] + pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + self.inner.set_max_tasks_per_worker(tasks_per_thread); + } + + #[inline] + pub fn get_max_tasks_count(&self) -> usize { + self.inner.max_tasks.load(Ordering::Relaxed) + } + /// Gets current running task count. #[inline] pub fn get_running_task_count(&self) -> usize { @@ -148,13 +161,25 @@ impl PoolInner { self.pool_size.store(thread_count, Ordering::Release); } + fn set_max_tasks_per_worker(&self, max_tasks_per_thread: usize) { + let max_tasks = self + .pool_size + .load(Ordering::Acquire) + .saturating_mul(max_tasks_per_thread); + self.max_tasks.store(max_tasks, Ordering::Release); + } + fn get_running_task_count(&self) -> usize { // As long as different future pool has different name prefix, we can safely use // the value in metrics. - self.env.metrics_running_task_count.get() as usize + self.env + .metrics_running_task_count_by_priority + .iter() + .map(|r| r.get()) + .sum::() as usize } - fn gate_spawn(&self) -> Result<(), Full> { + fn gate_spawn(&self, current_tasks: usize) -> Result<(), Full> { fail_point!("future_pool_spawn_full", |_| Err(Full { current_tasks: 100, max_tasks: 100, @@ -165,7 +190,6 @@ impl PoolInner { return Ok(()); } - let current_tasks = self.get_running_task_count(); if current_tasks >= max_tasks { Err(Full { current_tasks, @@ -181,9 +205,14 @@ impl PoolInner { F: Future + Send + 'static, { let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); - let metrics_running_task_count = self.env.metrics_running_task_count.clone(); + let task_priority = extras + .as_ref() + .map(|m| priority_from_task_meta(m.metadata())) + .unwrap_or(TaskPriority::Medium); + let metrics_running_task_count = + self.env.metrics_running_task_count_by_priority[task_priority as usize].clone(); - self.gate_spawn()?; + self.gate_spawn(metrics_running_task_count.get() as usize)?; metrics_running_task_count.inc(); @@ -210,9 +239,10 @@ impl PoolInner { F::Output: Send, { let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); - let metrics_running_task_count = self.env.metrics_running_task_count.clone(); + let metrics_running_task_count = + self.env.metrics_running_task_count_by_priority[TaskPriority::Medium as usize].clone(); - self.gate_spawn()?; + self.gate_spawn(metrics_running_task_count.get() as usize)?; let (tx, rx) = oneshot::channel(); metrics_running_task_count.inc(); diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index efb1379dcc79..a3e68b260dba 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -7,7 +7,7 @@ lazy_static! { pub static ref FUTUREPOOL_RUNNING_TASK_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_futurepool_pending_task_total", "Current future_pool pending + running tasks.", - &["name"] + &["name", "priority"] ) .unwrap(); pub static ref FUTUREPOOL_HANDLED_TASK_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 3cb237bad15d..0b4cffbdc146 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -17,6 +17,7 @@ use yatp::{ }; use crate::{ + resource_control::{priority_from_task_meta, TaskPriority}, thread_group::GroupProperties, time::{Duration, Instant}, timer::GLOBAL_TIMER_HANDLE, @@ -166,9 +167,7 @@ pub struct YatpPoolRunner { // Statistics about the schedule wait duration. // local histogram for high,medium,low priority tasks. - schedule_wait_durations: [LocalHistogram; 3], - // return the index of `schedule_wait_durations` from task metadata. - metric_idx_from_task_meta: Arc usize + Send + Sync>, + schedule_wait_durations: [LocalHistogram; TaskPriority::PRIORITY_COUNT], } impl Runner for YatpPoolRunner { @@ -193,7 +192,7 @@ impl Runner for YatpPoolRunner { fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { let extras = task_cell.mut_extras(); if let Some(schedule_time) = extras.schedule_time() { - let idx = (*self.metric_idx_from_task_meta)(extras.metadata()); + let idx = priority_from_task_meta(extras.metadata()) as usize; self.schedule_wait_durations[idx].observe(schedule_time.elapsed().as_secs_f64()); } let finished = self.inner.handle(local, task_cell); @@ -232,8 +231,7 @@ impl YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, - schedule_wait_durations: [Histogram; 3], - metric_idx_from_task_meta: Arc usize + Send + Sync>, + schedule_wait_durations: [Histogram; TaskPriority::PRIORITY_COUNT], ) -> Self { YatpPoolRunner { inner, @@ -243,7 +241,6 @@ impl YatpPoolRunner { before_stop, before_pause, schedule_wait_durations: schedule_wait_durations.map(|m| m.local()), - metric_idx_from_task_meta, } } } @@ -356,8 +353,8 @@ impl YatpPoolBuilder { self } - pub fn enable_task_wait_metrics(mut self) -> Self { - self.enable_task_wait_metrics = true; + pub fn enable_task_wait_metrics(mut self, enable: bool) -> Self { + self.enable_task_wait_metrics = enable; self } @@ -506,15 +503,13 @@ impl YatpPoolBuilder { let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); let schedule_wait_durations = if self.enable_task_wait_metrics { - ["high", "medium", "low"].map(|p| { - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name, p]) + TaskPriority::priorities().map(|p| { + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC + .with_label_values(&[&name, p.as_str()]) }) } else { std::array::from_fn(|_| Histogram::with_opts(HistogramOpts::new("_", "_")).unwrap()) }; - let metric_idx_from_task_meta = self - .metric_idx_from_task_meta - .unwrap_or_else(|| Arc::new(|_| 0)); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), @@ -522,7 +517,6 @@ impl YatpPoolBuilder { before_stop, before_pause, schedule_wait_durations, - metric_idx_from_task_meta, ); (builder, read_pool_runner) } @@ -545,7 +539,7 @@ mod tests { let name = "test_record_schedule_wait_duration"; let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(name) - .enable_task_wait_metrics() + .enable_task_wait_metrics(true) .build_single_level_pool(); let (tx, rx) = mpsc::channel(); for _ in 0..3 { @@ -565,7 +559,7 @@ mod tests { // Drop the pool so the local metrics are flushed. drop(pool); let histogram = - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "high"]); + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "medium"]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } diff --git a/src/config/mod.rs b/src/config/mod.rs index a862d01ace4f..e5df8c3e153e 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2234,7 +2234,6 @@ pub struct UnifiedReadPoolConfig { pub max_thread_count: usize, #[online_config(skip)] pub stack_size: ReadableSize, - #[online_config(skip)] pub max_tasks_per_worker: usize, pub auto_adjust_pool_size: bool, // FIXME: Add more configs when they are effective in yatp diff --git a/src/read_pool.rs b/src/read_pool.rs index 22a11cb2b41f..111d3f0ce8ae 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -17,10 +17,11 @@ use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use prometheus::{core::Metric, Histogram, IntCounter, IntGauge}; use resource_control::{ - with_resource_limiter, ControlledFuture, ResourceController, ResourceLimiter, TaskMetadata, + with_resource_limiter, ControlledFuture, ResourceController, ResourceLimiter, }; use thiserror::Error; use tikv_util::{ + resource_control::TaskMetadata, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, @@ -238,6 +239,17 @@ impl ReadPoolHandle { } } + pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + match self { + ReadPoolHandle::FuturePools { .. } => { + unreachable!() + } + ReadPoolHandle::Yatp { remote, .. } => { + remote.set_max_tasks_per_worker(tasks_per_thread); + } + } + } + pub fn get_ewma_time_slice(&self) -> Option { match self { ReadPoolHandle::FuturePools { .. } => None, @@ -401,7 +413,7 @@ pub fn build_yatp_read_pool( engine: E, resource_ctl: Option>, cleanup_method: CleanupMethod, - metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, + enable_task_wait_metrics: bool, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); build_yatp_read_pool_with_name( @@ -411,7 +423,7 @@ pub fn build_yatp_read_pool( resource_ctl, cleanup_method, unified_read_pool_name, - metric_idx_from_task_meta_fn, + enable_task_wait_metrics, ) } @@ -422,10 +434,10 @@ pub fn build_yatp_read_pool_with_name( resource_ctl: Option>, cleanup_method: CleanupMethod, unified_read_pool_name: String, - metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, + enable_task_wait_metrics: bool, ) -> ReadPool { let raftkv = Arc::new(Mutex::new(engine)); - let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }) + let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) .cleanup_method(cleanup_method) .stack_size(config.stack_size.0 as usize) @@ -452,12 +464,8 @@ pub fn build_yatp_read_pool_with_name( }) .before_stop(|| unsafe { destroy_tls_engine::(); - }); - if let Some(metric_idx_from_task_meta_fn) = metric_idx_from_task_meta_fn { - builder = builder - .enable_task_wait_metrics() - .metric_idx_from_task_meta(metric_idx_from_task_meta_fn); - } + }) + .enable_task_wait_metrics(enable_task_wait_metrics); let pool = if let Some(ref r) = resource_ctl { builder.build_priority_future_pool(r.clone()) @@ -563,6 +571,9 @@ impl Runnable for ReadPoolConfigRunner { self.cur_thread_count = self.core_thread_count; } } + Task::MaxTasksPerWorker(s) => { + self.handle.set_max_tasks_per_worker(s); + } } } } @@ -647,6 +658,7 @@ impl ReadPoolConfigRunner { enum Task { PoolSize(usize), AutoAdjust(bool), + MaxTasksPerWorker(usize), } impl std::fmt::Display for Task { @@ -654,6 +666,7 @@ impl std::fmt::Display for Task { match self { Task::PoolSize(s) => write!(f, "PoolSize({})", *s), Task::AutoAdjust(s) => write!(f, "AutoAdjust({})", *s), + Task::MaxTasksPerWorker(s) => write!(f, "MaxTasksPerWorker({})", *s), } } } @@ -706,6 +719,10 @@ impl ConfigManager for ReadPoolConfigManager { if let Some(ConfigValue::Bool(b)) = unified.get("auto_adjust_pool_size") { self.scheduler.schedule(Task::AutoAdjust(*b))?; } + if let Some(ConfigValue::Usize(max_tasks)) = unified.get("max_tasks_per_worker") { + self.scheduler + .schedule(Task::MaxTasksPerWorker(*max_tasks))?; + } } info!( "readpool config changed"; @@ -745,6 +762,8 @@ mod tests { use std::{thread, time::Duration}; use futures::channel::oneshot; + use futures_executor::block_on; + use kvproto::kvrpcpb::ResourceControlContext; use raftstore::store::{ReadStats, WriteStats}; use resource_control::ResourceGroupManager; @@ -778,7 +797,7 @@ mod tests { None, CleanupMethod::InPlace, name.to_owned(), - None, + false, ); let gen_task = || { @@ -838,7 +857,7 @@ mod tests { engine, None, CleanupMethod::InPlace, - None, + false, ); let gen_task = || { @@ -891,7 +910,7 @@ mod tests { max_tasks_per_worker: 1, ..Default::default() }; - // max running tasks number should be 2*1 = 2 + // max running tasks number for each priority should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); let pool = build_yatp_read_pool( @@ -900,7 +919,7 @@ mod tests { engine, None, CleanupMethod::InPlace, - None, + false, ); let gen_task = || { @@ -931,6 +950,15 @@ mod tests { _ => panic!("should return full error"), } + // spawn a high-priority task, should not return Full error. + let (task_high, tx_h) = gen_task(); + let mut ctx = ResourceControlContext::default(); + ctx.override_priority = 16; // high priority + let metadata = TaskMetadata::from_ctx(&ctx); + let f = handle.spawn_handle(task_high, CommandPri::Normal, 6, metadata, None); + tx_h.send(()).unwrap(); + block_on(f).unwrap(); + tx1.send(()).unwrap(); tx2.send(()).unwrap(); thread::sleep(Duration::from_millis(300)); @@ -1027,7 +1055,7 @@ mod tests { resource_manager, CleanupMethod::InPlace, name.clone(), - None, + false, ); let gen_task = || { diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 8674a581c725..c6d7b477db0b 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -13,8 +13,7 @@ use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; use resource_control::{ - priority_from_task_meta, with_resource_limiter, ControlledFuture, ResourceController, - ResourceGroupManager, TaskMetadata, + with_resource_limiter, ControlledFuture, ResourceController, ResourceGroupManager, TaskMetadata, }; use tikv_util::{ sys::SysQuota, @@ -194,8 +193,7 @@ impl SchedPool { destroy_tls_engine::(); tls_flush(&reporter); }) - .enable_task_wait_metrics() - .metric_idx_from_task_meta(Arc::new(priority_from_task_meta)) + .enable_task_wait_metrics(true) }; let vanilla = VanillaQueue { worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), From bc971dd5afa11ad6bd3a1641972e6b321636d6dc Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 29 Nov 2023 15:41:49 +0800 Subject: [PATCH 167/203] metrics: generate Grafana dashboards with python (#15857) close tikv/tikv#12796 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 1 + metrics/grafana/README.md | 11 + metrics/grafana/common.py | 1087 + metrics/grafana/tikv_details.dashboard.py | 8602 ++++ metrics/grafana/tikv_details.json | 47480 +++++++++++++------- metrics/grafana/tikv_details.json.sha256 | 1 + scripts/check-dashboards | 14 + scripts/gen-tikv-details-dashboard | 29 + 8 files changed, 42190 insertions(+), 15035 deletions(-) create mode 100644 metrics/grafana/README.md create mode 100644 metrics/grafana/common.py create mode 100644 metrics/grafana/tikv_details.dashboard.py create mode 100644 metrics/grafana/tikv_details.json.sha256 create mode 100755 scripts/check-dashboards create mode 100755 scripts/gen-tikv-details-dashboard diff --git a/Makefile b/Makefile index 632794f3208f..d7b0940fd5c9 100644 --- a/Makefile +++ b/Makefile @@ -360,6 +360,7 @@ pre-clippy: unset-override clippy: pre-clippy @./scripts/check-redact-log @./scripts/check-log-style + @./scripts/check-dashboards @./scripts/check-docker-build @./scripts/check-license @./scripts/clippy-all diff --git a/metrics/grafana/README.md b/metrics/grafana/README.md new file mode 100644 index 000000000000..dec76a67529e --- /dev/null +++ b/metrics/grafana/README.md @@ -0,0 +1,11 @@ +# TiKV Grafana Dashboard + +The "TiKV Details" dashboard is generated by the `tikv_details.dashboard.py` +Python script. + +## Updating the Dashboard + +To add or update panels on the dashboard, make your changes in +`tikv_details.dashboard.py` and then run `./scripts/gen-tikv-details-dashboard`. + +Please avoid manually modifying `tikv_details.json`. diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py new file mode 100644 index 000000000000..cb6757bee937 --- /dev/null +++ b/metrics/grafana/common.py @@ -0,0 +1,1087 @@ +from typing import Optional, Union + +import attr +from attr.validators import in_, instance_of +from grafanalib import formatunits as UNITS +from grafanalib.core import ( + NULL_AS_ZERO, + TIME_SERIES_TARGET_FORMAT, + DataSourceInput, + Graph, + GraphThreshold, + GridPos, + Heatmap, + HeatmapColor, + Legend, + Panel, + RowPanel, + SeriesOverride, + Stat, + StatValueMappings, + Target, + Template, + TimeSeries, + Tooltip, + YAxes, + YAxis, +) + +DATASOURCE_INPUT = DataSourceInput( + name="DS_TEST-CLUSTER", + label="test-cluster", + pluginId="prometheus", + pluginName="Prometheus", +) +DATASOURCE = f"${{{DATASOURCE_INPUT.name}}}" + + +@attr.s +class Expr(object): + """ + A prometheus expression that matches the following grammar: + + expr ::= ( + [aggr_param,] + [func]( + + [{,}] + [[]] + ) + ) [by (,)] [extra_expr] + """ + + metric: str = attr.ib(validator=instance_of(str)) + aggr_op: str = attr.ib( + default="", + validator=in_( + [ + "", + "sum", + "min", + "max", + "avg", + "group", + "stddev", + "stdvar", + "count", + "count_values", + "bottomk", + "topk", + "quantile", + ] + ), + ) + aggr_param: str = attr.ib(default="", validator=instance_of(str)) + func: str = attr.ib(default="", validator=instance_of(str)) + range_selector: str = attr.ib(default="", validator=instance_of(str)) + label_selectors: list[str] = attr.ib(default=[], validator=instance_of(list)) + by_labels: list[str] = attr.ib(default=[], validator=instance_of(list)) + default_label_selectors: list[str] = attr.ib( + default=[ + r'k8s_cluster="$k8s_cluster"', + r'tidb_cluster="$tidb_cluster"', + r'instance=~"$instance"', + ], + validator=instance_of(list), + ) + skip_default_instance: bool = attr.ib(default=False, validator=instance_of(bool)) + extra_expr: str = attr.ib(default="", validator=instance_of(str)) + + def __str__(self) -> str: + aggr_opeator = self.aggr_op if self.aggr_op else "" + aggr_param = self.aggr_param + "," if self.aggr_param else "" + by_clause = ( + "by ({})".format(", ".join(self.by_labels)) if self.by_labels else "" + ) + func = self.func if self.func else "" + label_selectors = self.default_label_selectors + self.label_selectors + if self.skip_default_instance: + # Remove instance=~"$instance" + label_selectors = [l for l in label_selectors if "$instance" not in l] + assert all( + ("=" in item or "~" in item) for item in label_selectors + ), f"Not all items contain '=' or '~', invalid {self.label_selectors}" + instant_selectors = ( + "{{{}}}".format(",".join(label_selectors)) if label_selectors else "" + ) + range_selector = f"[{self.range_selector}]" if self.range_selector else "" + extra_expr = self.extra_expr if self.extra_expr else "" + return f"""{aggr_opeator}({aggr_param}{func}( + {self.metric} + {instant_selectors} + {range_selector} +)) {by_clause} {extra_expr}""" + + def aggregate( + self, + aggr_op: str, + aggr_param: str = "", + by_labels: list[str] = [], + label_selectors: list[str] = [], + ) -> "Expr": + self.aggr_op = aggr_op + self.aggr_param = aggr_param + self.by_labels = by_labels + self.label_selectors = label_selectors + return self + + def function( + self, + func: str, + label_selectors: list[str] = [], + range_selector: str = "", + ) -> "Expr": + self.func = func + self.label_selectors = label_selectors + self.range_selector = range_selector + return self + + def extra( + self, + extra_expr: Optional[str] = None, + default_label_selectors: Optional[list[str]] = None, + ) -> "Expr": + if extra_expr is not None: + self.extra_expr = extra_expr + if default_label_selectors is not None: + self.default_label_selectors = default_label_selectors + return self + + def skip_default_instance_selector(self) -> "Expr": + self.skip_default_instance = True + return self + + +def expr_aggr( + metric: str, + aggr_op: str, + aggr_param: str = "", + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the aggregation of a metric. + + Example: + + sum(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + expr = Expr(metric=metric) + expr.aggregate( + aggr_op, + aggr_param=aggr_param, + by_labels=by_labels, + label_selectors=label_selectors, + ) + return expr + + +def expr_sum( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of a metric. + + Example: + + sum(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "sum", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_avg( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the avg of a metric. + + Example: + + avg(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "avg", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_max( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the max of a metric. + + Example: + + max(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "max", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_min( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the min of a metric. + + Example: + + min(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "min", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_aggr_func( + metric: str, + aggr_op: str, + func: str, + aggr_param: str = "", + label_selectors: list[str] = [], + range_selector: str = "", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the aggregation of function of a metric. + + Example: + + expr_aggr_func( + tikv_grpc_msg_duration_seconds_count, "sum", "rate", lables_selectors=['type!="kv_gc"'] + ) + + sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + expr = Expr(metric=metric) + expr.aggregate( + aggr_op, + aggr_param=aggr_param, + by_labels=by_labels, + ) + expr.function( + func, + label_selectors=label_selectors, + range_selector=range_selector, + ) + return expr + + +def expr_sum_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of rate of a metric. + + Example: + + sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_sum_delta( + metric: str, + label_selectors: list[str] = [], + range_selector: str = "$__rate_interval", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of delta of a metric. + + Example: + + sum(delta( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="delta", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_sum_increase( + metric: str, + label_selectors: list[str] = [], + range_selector: str = "$__rate_interval", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of increase of a metric. + + Example: + + sum(increase( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="increase", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_sum_aggr_over_time( + metric: str, + aggr: str, + range_selector: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of average value of all points in the specified interval of a metric. + + Example: + + sum(avg_over_time( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [1m] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func=f"{aggr}_over_time", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_max_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the max of rate of a metric. + + Example: + + max(rate( + tikv_thread_voluntary_context_switches + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (name) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="max", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_count_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the count of rate of a metric. + + Example: + + count(rate( + tikv_thread_cpu_seconds_total + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",name=~"sst_.*"} + [$__rate_interval] + )) by (instance) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="count", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_simple( + metric: str, + label_selectors: list[str] = [], +) -> Expr: + """ + Query an instant vector of a metric. + + Example: + + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + """ + expr = Expr(metric=metric) + expr.function("", label_selectors=label_selectors) + return expr + + +def expr_operator(lhs: Union[Expr, str], operator: str, rhs: Union[Expr, str]) -> str: + return f"""({lhs} {operator} {rhs})""" + + +def expr_histogram_quantile( + quantile: float, + metrics: str, + label_selectors: list[str] = [], + by_labels: list[str] = [], +) -> Expr: + """ + Query a quantile of a histogram metric. + + Example: + + histogram_quantile(0.99, sum(rate( + tikv_grpc_msg_duration_seconds_bucket + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (le)) + """ + # sum(rate(metrics_bucket{label_selectors}[$__rate_interval])) by (le) + assert not metrics.endswith( + "_bucket" + ), f"'{metrics}' should not specify '_bucket' suffix manually" + by_labels = list(filter(lambda label: label != "le", by_labels)) + sum_rate_of_buckets = expr_sum_rate( + metrics + "_bucket", + label_selectors=label_selectors, + by_labels=by_labels + ["le"], + ) + # histogram_quantile({quantile}, {sum_rate_of_buckets}) + return expr_aggr( + metric=f"{sum_rate_of_buckets}", + aggr_op="histogram_quantile", + aggr_param=f"{quantile}", + label_selectors=[], + by_labels=[], + ).extra( + # Do not attach default label selector again. + default_label_selectors=[] + ) + + +def expr_topk( + k: int, + metrics: str, +) -> Expr: + """ + Query topk of a metric. + + Example: + + topk(20, tikv_thread_voluntary_context_switches) + """ + # topk({k}, {metric}) + return expr_aggr( + metric=metrics, + aggr_op="topk", + aggr_param=f"{k}", + label_selectors=[], + by_labels=[], + ).extra( + # Do not attach default label selector again. + default_label_selectors=[] + ) + + +def expr_histogram_avg( + metrics: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> str: + """ + Query the avg of a histogram metric. + + Example: + + sum(rate( + tikv_grpc_msg_duration_seconds_sum + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance"} + [$__rate_interval] + )) / sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance"} + [$__rate_interval] + )) + """ + for suffix in ["_bucket", "_count", "_sum"]: + assert not metrics.endswith( + suffix + ), f"'{metrics}' should not specify '{suffix}' suffix manually" + + return expr_operator( + expr_sum_rate( + metrics + "_sum", + label_selectors=label_selectors, + by_labels=by_labels, + ), + "/", + expr_sum_rate( + metrics + "_count", + label_selectors=label_selectors, + by_labels=by_labels, + ), + ) + + +def target( + expr: Union[Expr, str], + legend_format: Optional[str] = None, + hide=False, + data_source=DATASOURCE, + interval_factor=1, # Prefer "high" resolution +) -> Target: + if legend_format is None and isinstance(expr, Expr) and expr.by_labels: + legend_format = "-".join(map(lambda x: "{{" + f"{x}" + "}}", expr.by_labels)) + return Target( + expr=f"{expr}", + hide=hide, + legendFormat=legend_format, + intervalFactor=interval_factor, + datasource=data_source, + ) + + +def template( + name, + query, + data_source, + hide, + regex=None, + multi=False, + include_all=False, + all_value=None, +) -> Template: + return Template( + dataSource=data_source, + hide=hide, + label=name, + multi=multi, + name=name, + query=query, + refresh=2, + sort=1, + type="query", + useTags=False, + regex=regex, + includeAll=include_all, + allValue=all_value, + ) + + +class Layout: + # Rows are always 24 "units" wide. + ROW_WIDTH = 24 + PANEL_HEIGHT = 7 + row_panel: RowPanel + current_row_y_pos: int + current_row_x_pos: int + + def __init__(self, title, collapsed=True, repeat: Optional[str] = None) -> None: + extraJson = None + if repeat: + extraJson = {"repeat": repeat} + title = f"{title} - ${repeat}" + self.current_row_y_pos = 0 + self.current_row_x_pos = 0 + self.row_panel = RowPanel( + title=title, + gridPos=GridPos(h=self.PANEL_HEIGHT, w=self.ROW_WIDTH, x=0, y=0), + collapsed=collapsed, + extraJson=extraJson, + ) + + def row(self, panels: list[Panel], width: int = ROW_WIDTH): + """Start a new row and evenly scales panels width""" + count = len(panels) + if count == 0: + return panels + width = width // count + remain = self.ROW_WIDTH % count + x = self.current_row_x_pos % self.ROW_WIDTH + for panel in panels: + panel.gridPos = GridPos( + h=self.PANEL_HEIGHT, + w=width, + x=x, + y=self.current_row_y_pos, + ) + x += width + panels[-1].gridPos.w += remain + self.row_panel.panels.extend(panels) + self.current_row_y_pos += self.PANEL_HEIGHT + self.current_row_x_pos = x + + def half_row(self, panels: list[Panel]): + self.row(panels, self.ROW_WIDTH // 2) + + +def timeseries_panel( + title, + targets, + legend_calcs=["max", "last"], + unit="s", + draw_style="line", + line_width=1, + fill_opacity=10, + gradient_mode="opacity", + tooltip_mode="multi", + legend_display_mode="table", + legend_placement="right", + description=None, + data_source=DATASOURCE, +) -> TimeSeries: + return TimeSeries( + title=title, + dataSource=data_source, + description=description, + targets=targets, + legendCalcs=legend_calcs, + drawStyle=draw_style, + lineWidth=line_width, + fillOpacity=fill_opacity, + gradientMode=gradient_mode, + unit=unit, + tooltipMode=tooltip_mode, + legendDisplayMode=legend_display_mode, + legendPlacement=legend_placement, + ) + + +def yaxis(format: str, log_base=1) -> YAxis: + assert format not in [ + UNITS.BYTES, + UNITS.BITS, + UNITS.KILO_BYTES, + UNITS.MEGA_BYTES, + UNITS.GIGA_BYTES, + UNITS.TERA_BYTES, + UNITS.PETA_BYTES, + UNITS.BYTES_SEC, + UNITS.KILO_BYTES_SEC, + UNITS.MEGA_BYTES_SEC, + UNITS.GIGA_BYTES_SEC, + UNITS.TERA_BYTES_SEC, + UNITS.PETA_BYTES_SEC, + ], "Must not use SI bytes" + return YAxis(format=format, logBase=log_base) + + +def yaxes(left_format: str, right_format: Optional[str] = None, log_base=1) -> YAxes: + ya = YAxes(left=yaxis(left_format, log_base=log_base)) + if right_format is not None: + ya.right = yaxis(right_format, log_base=log_base) + return ya + + +def graph_legend( + avg=False, + current=True, + max=True, + min=False, + show=True, + total=False, + align_as_table=True, + hide_empty=True, + hide_zero=True, + right_side=True, + side_width=None, + sort_desc=True, +) -> Legend: + sort = "max" if max else "current" + return Legend( + avg=avg, + current=current, + max=max, + min=min, + show=show, + total=total, + alignAsTable=align_as_table, + hideEmpty=hide_empty, + hideZero=hide_zero, + rightSide=right_side, + sideWidth=side_width, + sort=sort, + sortDesc=sort_desc, + ) + + +def graph_panel( + title: str, + targets: list[Target], + description=None, + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + legend=None, + tooltip=Tooltip(shared=True, valueType="individual"), + lines=True, + line_width=1, + fill=1, + fill_gradient=1, + stack=False, + thresholds: list[GraphThreshold] = [], + series_overrides: list[SeriesOverride] = [], + data_source=DATASOURCE, + null_point_mode=NULL_AS_ZERO, +) -> Panel: + # extraJson add patches grafanalib result. + extraJson = {} + if fill_gradient != 0: + # fillGradient is only valid when fill is 1. + if fill == 0: + fill = 1 + # fillGradient is not set correctly in grafanalib(0.7.0), so we need to + # set it manually. + # TODO: remove it when grafanalib fix this. + extraJson["fillGradient"] = 1 + for target in targets: + # Make sure target is in time_series format. + target.format = TIME_SERIES_TARGET_FORMAT + + return Graph( + title=title, + dataSource=data_source, + description=description, + targets=targets, + yAxes=yaxes, + legend=legend if legend else graph_legend(), + lines=lines, + bars=not lines, + lineWidth=line_width, + fill=fill, + fillGradient=fill_gradient, + stack=stack, + nullPointMode=null_point_mode, + thresholds=thresholds, + tooltip=tooltip, + seriesOverrides=series_overrides, + # Do not specify max max data points, let Grafana decide. + maxDataPoints=None, + extraJson=extraJson, + ) + + +def series_override( + alias: str, + bars: bool = False, + lines: bool = True, + yaxis: int = 1, + fill: int = 1, + zindex: int = 0, + dashes: Optional[bool] = None, + dash_length: Optional[int] = None, + space_length: Optional[int] = None, + transform_negative_y: bool = False, +) -> SeriesOverride: + class SeriesOverridePatch(SeriesOverride): + dashes_override: Optional[bool] + dash_length_override: Optional[int] + space_length_override: Optional[int] + transform_negative_y: bool + + def __init__(self, *args, **kwargs) -> None: + self.dashes_override = kwargs["dashes"] + if self.dashes_override is None: + del kwargs["dashes"] + self.dash_length_override = kwargs["dashLength"] + if self.dash_length_override is None: + del kwargs["dashLength"] + self.space_length_override = kwargs["spaceLength"] + if self.space_length_override is None: + del kwargs["spaceLength"] + self.transform_negative_y = kwargs["transform_negative_y"] + del kwargs["transform_negative_y"] + super().__init__(*args, **kwargs) + + def to_json_data(self): + data = super().to_json_data() + # The default 'null' color makes it transparent, remove it. + del data["color"] + # The default 'null' makes it a transparent line, remove it. + if self.dashes_override is None: + del data["dashes"] + if self.dash_length_override is None: + del data["dashLength"] + if self.space_length_override is None: + del data["spaceLength"] + # Add missing transform. + if self.transform_negative_y: + data["transform"] = "negative-Y" + return data + + return SeriesOverridePatch( + alias=alias, + bars=bars, + lines=lines, + yaxis=yaxis, + fill=fill, + zindex=zindex, + dashes=dashes, + dashLength=dash_length, + spaceLength=space_length, + transform_negative_y=transform_negative_y, + ) + + +def heatmap_color() -> HeatmapColor: + return HeatmapColor( + cardColor="#b4ff00", + colorScale="sqrt", + colorScheme="interpolateSpectral", + exponent=0.5, + mode="spectrum", + max=None, + min=None, + ) + + +def heatmap_panel( + title: str, + metric: str, + description=None, + label_selectors: list[str] = [], + yaxis=yaxis(UNITS.NO_FORMAT), + tooltip=Tooltip(shared=True, valueType="individual"), + color=heatmap_color(), + decimals=1, + data_source=DATASOURCE, +) -> Panel: + assert metric.endswith( + "_bucket" + ), f"'{metric}' should be a histogram metric with '_bucket' suffix" + t = target( + expr=expr_sum_rate(metric, label_selectors=label_selectors, by_labels=["le"]), + ) + # Make sure targets are in heatmap format. + t.format = "heatmap" + # Heatmap target legendFormat should be "{{le}}" + t.legendFormat = "{{le}}" + # Overrides yaxis decimal places. + yaxis.decimals = decimals + return Heatmap( + title=title, + dataSource=data_source, + description=description, + targets=[t], + yAxis=yaxis, + color=color, + dataFormat="tsbuckets", + yBucketBound="upper", + tooltip=tooltip, + extraJson={"tooltip": {"showHistogram": True}}, + hideZeroBuckets=True, + # Limit data points, because too many data points slows browser when + # the resolution is too high. + # See: https://grafana.com/blog/2020/06/23/how-to-visualize-prometheus-histograms-in-grafana/ + maxDataPoints=512, + ) + + +def stat_panel( + title: str, + targets: list[Target], + description=None, + format=UNITS.NONE_FORMAT, + graph_mode="none", + decimals: Optional[int] = None, + mappings: Optional[StatValueMappings] = None, + text_mode: str = "auto", + data_source=DATASOURCE, +) -> Panel: + for target in targets: + # Make sure target is in time_series format. + target.format = TIME_SERIES_TARGET_FORMAT + return Stat( + title=title, + dataSource=data_source, + description=description, + targets=targets, + format=format, + graphMode=graph_mode, + reduceCalc="lastNotNull", + decimals=decimals, + mappings=mappings, + textMode=text_mode, + ) + + +def graph_panel_histogram_quantiles( + title: str, + description: str, + yaxes: YAxes, + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = [], + hide_avg=False, + hide_count=False, +) -> Panel: + """ + Return a graph panel that shows histogram quantiles of a metric. + + Targets: + - 99.99% quantile + - 99% quantile + - avg + - count + """ + + def legend(prefix, labels): + if not labels: + return prefix + else: + return "-".join([prefix] + ["{{%s}}" % lb for lb in labels]) + + return graph_panel( + title=title, + description=description, + yaxes=yaxes, + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + f"{metric}", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("99.99%", by_labels), + ), + target( + expr=expr_histogram_quantile( + 0.99, + f"{metric}", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("99%", by_labels), + ), + target( + expr=expr_histogram_avg( + metric, + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("avg", by_labels), + hide=hide_avg, + ), + target( + expr=expr_sum_rate( + f"{metric}_count", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("count", by_labels), + hide=hide_count, + ), + ], + series_overrides=[ + series_override( + alias="count", + fill=2, + yaxis=2, + zindex=-3, + dashes=True, + dash_length=1, + space_length=1, + transform_negative_y=True, + ), + series_override( + alias="avg", + fill=7, + ), + ], + ) + + +def heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title: str, + heatmap_description: str, + graph_title: str, + graph_description: str, + yaxis_format: str, + metric: str, + label_selectors=[], + graph_by_labels=[], +) -> list[Panel]: + return [ + heatmap_panel( + title=heatmap_title, + description=heatmap_description, + yaxis=yaxis(format=yaxis_format), + metric=f"{metric}_bucket", + label_selectors=label_selectors, + ), + graph_panel_histogram_quantiles( + title=graph_title, + description=graph_description, + metric=f"{metric}", + yaxes=yaxes(left_format=yaxis_format), + by_labels=graph_by_labels, + hide_count=True, + ), + ] diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py new file mode 100644 index 000000000000..ade81f717fd0 --- /dev/null +++ b/metrics/grafana/tikv_details.dashboard.py @@ -0,0 +1,8602 @@ +import os +import sys + +sys.path.append(os.path.dirname(__file__)) + +from common import ( + DATASOURCE, + DATASOURCE_INPUT, + Layout, + expr_avg, + expr_count_rate, + expr_histogram_avg, + expr_histogram_quantile, + expr_max, + expr_max_rate, + expr_min, + expr_operator, + expr_simple, + expr_sum, + expr_sum_aggr_over_time, + expr_sum_delta, + expr_sum_increase, + expr_sum_rate, + expr_topk, + graph_legend, + graph_panel, + graph_panel_histogram_quantiles, + heatmap_panel, + heatmap_panel_graph_panel_histogram_quantile_pairs, + series_override, + stat_panel, + target, + template, + yaxes, + yaxis, +) +from grafanalib import formatunits as UNITS +from grafanalib.core import ( + GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, + HIDE_VARIABLE, + NULL_AS_NULL, + SHOW, + Dashboard, + GraphThreshold, + RowPanel, + StatValueMappingItem, + StatValueMappings, + Templating, +) + +#### Metrics Definition Start #### + + +def Templates() -> Templating: + return Templating( + list=[ + template( + name="k8s_cluster", + query="label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + ), + template( + name="tidb_cluster", + query='label_values(tikv_engine_block_cache_size_bytes{k8s_cluster ="$k8s_cluster"}, tidb_cluster)', + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + ), + template( + name="db", + query='label_values(tikv_engine_block_cache_size_bytes{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"}, db)', + data_source=DATASOURCE, + hide=SHOW, + multi=True, + include_all=True, + ), + template( + name="command", + query='query_result(tikv_storage_command_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"} != 0)', + data_source=DATASOURCE, + hide=SHOW, + regex='/\\btype="([^"]+)"/', + multi=True, + include_all=True, + ), + template( + name="instance", + query='label_values(tikv_engine_size_bytes{k8s_cluster ="$k8s_cluster", tidb_cluster="$tidb_cluster"}, instance)', + data_source=DATASOURCE, + hide=SHOW, + include_all=True, + all_value=".*", + ), + template( + name="titan_db", + query='label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"}, db)', + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + multi=True, + include_all=True, + ), + ] + ) + + +def Duration() -> RowPanel: + layout = Layout(title="Duration") + layout.row( + [ + graph_panel( + title="Write Pipeline Duration", + description="Write Pipeline Composition", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_append_log_duration_seconds" + ), + legend_format="Write Raft Log .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_request_wait_time_duration_secs", + ), + legend_format="Propose Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_apply_wait_time_duration_secs" + ), + legend_format="Apply Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_commit_log_duration_seconds" + ), + legend_format="Replicate Raft Log .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_apply_log_duration_seconds" + ), + legend_format="Apply Duration .99", + ), + ], + ), + graph_panel( + title="Cop Read Duration", + description="Read Duration Composition", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_storage_engine_async_request_duration_seconds", + ['type="snapshot"'], + ), + legend_format="Get Snapshot .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_coprocessor_request_wait_seconds", + ['type="all"'], + ), + legend_format="Cop Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.95, "tikv_coprocessor_request_handle_seconds" + ), + legend_format="Cop Handle .99", + ), + ], + ), + ] + ) + return layout.row_panel + + +def Cluster() -> RowPanel: + layout = Layout(title="Cluster") + layout.row( + [ + graph_panel( + title="Store size", + description="The storage size per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type = "used"'], + ), + ), + ], + ), + graph_panel( + title="Available size", + description="The available capacity size of each TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type="available"'], + ), + ), + ], + ), + graph_panel( + title="Capacity size", + description="The capacity size per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type="capacity"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CPU", + description="The CPU usage of each TiKV instance", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "process_cpu_seconds_total", + ), + ), + ], + ), + graph_panel( + title="Memory", + description="The memory usage per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("process_resident_memory_bytes"), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="IO utilization", + description="The I/O utilization per TiKV instance", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "node_disk_io_time_seconds_total", + ), + legend_format=r"{{instance}}-{{device}}", + ), + ], + ), + graph_panel( + title="MBps", + description="The total bytes of read and write in each TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=['type="wal_file_bytes"'], + ), + legend_format=r"{{instance}}-write", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=['type=~"bytes_read|iter_bytes_read"'], + ), + legend_format=r"{{instance}}-read", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="QPS", + description="The number of leaders on each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + ), + legend_format=r"{{instance}}-{{type}}", + ), + ], + ), + graph_panel( + title="Errps", + description="The total number of the gRPC message failures", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + label_selectors=['type!="kv_gc"'], + ), + legend_format=r"{{instance}}-grpc-msg-fail", + ), + target( + expr=expr_sum_delta( + "tikv_pd_heartbeat_message_total", + label_selectors=['type="noop"'], + ).extra(extra_expr="< 1"), + legend_format=r"{{instance}}-pd-heartbeat", + ), + target( + expr=expr_sum_rate( + "tikv_critical_error_total", + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Leader", + description="The number of leaders on each TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="leader"'], + ), + ), + ], + ), + graph_panel( + title="Region", + description="The number of Regions and Buckets on each TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="buckets"'], + ), + legend_format=r"{{instance}}-buckets", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Uptime", + description="TiKV uptime since the last restart", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_operator( + "time()", "-", expr_simple("process_start_time_seconds") + ), + legend_format=r"{{instance}}", + ), + ], + ) + ] + ) + return layout.row_panel + + +def Errors() -> RowPanel: + layout = Layout(title="Errors") + layout.row( + [ + graph_panel( + title="Critical error", + targets=[ + target( + expr=expr_sum_rate( + "tikv_critical_error_total", + by_labels=["instance", "type"], + ), + ), + ], + thresholds=[GraphThreshold(value=0.0)], + ) + ] + ) + layout.row( + [ + graph_panel( + title="Server is busy", + description=""" +Indicates occurrences of events that make the TiKV instance unavailable +temporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor +Full""", + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + ), + legend_format=r"scheduler-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_channel_full_total", + by_labels=["instance", "type"], + ), + legend_format=r"channelfull-{{instance}}-{{type}}", + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + label_selectors=['type="full"'], + ), + legend_format=r"coprocessor-{{instance}}", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'type="write_stall_percentile99"', + 'db=~"$db"', + ], + by_labels=["instance", "db"], + ), + legend_format=r"stall-{{instance}}-{{db}}", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_store_write_msg_block_wait_duration_seconds_count", + ), + legend_format=r"store-write-channelfull-{{instance}}", + ), + ], + ), + graph_panel( + title="Server report failures", + description="The total number of reporting failure messages", + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_report_failure_msg_total", + by_labels=["type", "instance", "store_id"], + ), + legend_format=r"{{instance}}-{{type}}-to-{{store_id}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raftstore error", + description="The number of different raftstore errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_engine_async_request_total", + label_selectors=['status!~"success|all"'], + by_labels=["instance", "status"], + ), + ), + ], + ), + graph_panel( + title="Scheduler error", + description="The number of scheduler errors per type on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + label_selectors=['stage=~"snapshot_err|prepare_write_err"'], + by_labels=["instance", "stage"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Coprocessor error", + description="The number of different coprocessor errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + by_labels=["instance", "reason"], + ), + ), + ], + ), + graph_panel( + title="gRPC message error", + description="The number of gRPC message errors per type on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Leader drop", + description="The count of dropped leaders per TiKV instance", + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_region_count", + label_selectors=['type="leader"'], + ), + ), + ], + ), + graph_panel( + title="Leader missing", + description="The count of missing leaders per TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_leader_missing", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Damaged files", + description="RocksDB damaged SST files", + targets=[ + target( + expr=expr_simple("tikv_rocksdb_damaged_files"), + legend_format=r"{{instance}}-existed", + ), + target( + expr=expr_simple("tikv_rocksdb_damaged_files_deleted"), + legend_format=r"{{instance}}-deleted", + ), + ], + ), + graph_panel( + title="Log Replication Rejected", + description="The count of Log Replication Reject caused by follower memory insufficient", + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_append_rejects", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Server() -> RowPanel: + layout = Layout(title="Server") + layout.row( + [ + graph_panel( + title="CF size", + description="The size of each column family", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("tikv_engine_size_bytes", by_labels=["type"]), + ), + ], + ), + graph_panel( + title="Channel full", + description="The total number of channel full errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_channel_full_total", by_labels=["instance", "type"] + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Active written leaders", + description="The number of leaders being written on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_region_written_keys_count", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Approximate region size", + metric="tikv_raftstore_region_size_bucket", + yaxis=yaxis(format=UNITS.BYTES_IEC), + ), + graph_panel_histogram_quantiles( + title="Approximate region size", + description="The approximate Region size", + metric="tikv_raftstore_region_size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + hide_count=True, + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Region written bytes", + metric="tikv_region_written_bytes_bucket", + yaxis=yaxis(format=UNITS.BYTES_IEC), + ), + graph_panel( + title="Region average written bytes", + description="The average rate of writing bytes to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_avg("tikv_region_written_bytes"), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Region written keys", + metric="tikv_region_written_keys_bucket", + ), + graph_panel( + title="Region average written keys", + description="The average rate of written keys to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_avg("tikv_region_written_keys"), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Hibernate Peers", + description="The number of peers in hibernated state", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_hibernated_peer_state", + by_labels=["instance", "state"], + ), + ), + ], + ), + graph_panel( + title="Memory trace", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple( + "tikv_server_mem_trace_sum", + label_selectors=['name=~"raftstore-.*"'], + ), + legend_format="{{instance}}-{{name}}", + ), + target( + expr=expr_simple( + "raft_engine_memory_usage", + ), + legend_format="{{instance}}-raft-engine", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft Entry Cache Evicts", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raft_entries_evict_bytes", + ), + ), + ], + ), + graph_panel( + title="Resolve address duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_address_resolve_duration_secs", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% Thread Pool Schedule Wait Duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_yatp_pool_schedule_wait_duration", + by_labels=["name"], + ), + legend_format="{{name}}", + ), + ], + thresholds=[GraphThreshold(value=1.0)], + ), + graph_panel( + title="Average Thread Pool Schedule Wait Duration", + description="The average rate of written keys to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_yatp_pool_schedule_wait_duration", + by_labels=["name"], + ), + legend_format="{{name}}", + ), + ], + thresholds=[GraphThreshold(value=1.0)], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Disk IO time per second", + yaxes=yaxes(left_format=UNITS.NANO_SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_rocksdb_perf", + label_selectors=['metric="block_read_time"'], + by_labels=["req"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="block_read_time"'], + by_labels=["req"], + ), + legend_format="copr-{{req}}", + ), + ], + ), + graph_panel( + title="Disk IO bytes per second", + yaxes=yaxes(left_format=UNITS.NANO_SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_rocksdb_perf", + label_selectors=['metric="block_read_byte"'], + by_labels=["req"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="block_read_byte"'], + by_labels=["req"], + ), + legend_format="copr-{{req}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def gRPC() -> RowPanel: + layout = Layout(title="gRPC") + layout.row( + [ + graph_panel( + title="gRPC message count", + description="The count of different kinds of gRPC message", + yaxes=yaxes(left_format=UNITS.REQUESTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + by_labels=["type", "priority"], + ), + hide=True, + ), + ], + ), + graph_panel( + title="gRPC message failed", + description="The count of different kinds of gRPC message which is failed", + yaxes=yaxes(left_format=UNITS.REQUESTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title=r"99% gRPC message duration", + description=r"The 99% percentile of execution time of gRPC message", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_grpc_msg_duration_seconds", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_grpc_msg_duration_seconds", + label_selectors=['type!="kv_gc"'], + by_labels=["type", "priority"], + ), + legend_format="{{type}}-{{priority}}", + hide=True, + ), + ], + ), + graph_panel( + title="Average gRPC message duration", + description="The average execution time of gRPC message", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_grpc_msg_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_avg( + "tikv_grpc_msg_duration_seconds", + by_labels=["type", "priority"], + ), + legend_format="{{type}}-{{priority}}", + hide=True, + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC batch size", + description=r"The 99% percentile of execution time of gRPC message", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_grpc_req_batch_size", + ), + legend_format=r"99% request", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_grpc_resp_batch_size", + ), + legend_format=r"99% response", + ), + target( + expr=expr_histogram_avg( + "tikv_server_grpc_req_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg request", + ), + target( + expr=expr_histogram_avg( + "tikv_server_grpc_resp_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg response", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_request_batch_size", + ), + legend_format=r"99% kv get batch", + ), + target( + expr=expr_histogram_avg( + "tikv_server_request_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg kv batch", + ), + ], + ), + graph_panel( + title="raft message batch size", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_raft_message_batch_size", + ), + legend_format=r"99%", + ), + target( + expr=expr_histogram_avg( + "tikv_server_raft_message_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC request sources QPS", + description="The QPS of different sources of gRPC request", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_request_source_counter_vec", + by_labels=["source"], + ), + ), + ], + ), + graph_panel( + title="gRPC request sources duration", + description="The duration of different sources of gRPC request", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_request_source_duration_vec", + by_labels=["source"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC resource group QPS", + description="The QPS of different resource groups of gRPC request", + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_resource_group_total", by_labels=["name"] + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def ThreadCPU() -> RowPanel: + layout = Layout(title="Thread CPU") + layout.row( + [ + graph_panel( + title="Raft store CPU", + description="The CPU utilization of raftstore thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"(raftstore|rs)_.*"'], + ), + ), + ], + ), + graph_panel( + title="Async apply CPU", + description="The CPU utilization of async apply", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"apply_[0-9]+"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Store writer CPU", + description="The CPU utilization of store writer thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_write.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=0.8)], + ), + graph_panel( + title="gRPC poll CPU", + description="The CPU utilization of gRPC", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"grpc.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler worker CPU", + description="The CPU utilization of scheduler worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sched_.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=3.6)], + ), + graph_panel( + title="Storage ReadPool CPU", + description="The CPU utilization of readpool", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_norm.*"'], + ), + legend_format="{{instance}}-normal", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_high.*"'], + ), + legend_format="{{instance}}-high", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_low.*"'], + ), + legend_format="{{instance}}-low", + ), + ], + thresholds=[GraphThreshold(value=3.6)], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Unified read pool CPU", + description="The CPU utilization of the unified read pool", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"unified_read_po.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=7.2)], + ), + graph_panel( + title="RocksDB CPU", + description="The CPU utilization of RocksDB", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"rocksdb.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Coprocessor CPU", + description="The CPU utilization of coprocessor", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_normal.*"'], + ), + legend_format="{{instance}}-normal", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_high.*"'], + ), + legend_format="{{instance}}-high", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_low.*"'], + ), + legend_format="{{instance}}-low", + ), + ], + thresholds=[GraphThreshold(value=7.2)], + ), + graph_panel( + title="GC worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"gc_worker.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Background Worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"background.*"'], + ), + ), + ], + ), + graph_panel( + title="Raftlog fetch Worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"raftlog_fetch.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + ), + ), + ], + ), + graph_panel( + title="Backup CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"(backup-worker|bkwkr|backup_endpoint).*"' + ], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CDC worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cdcwkr.*"'], + ), + legend_format="{{instance}}-worker", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"tso"'], + ), + legend_format="{{instance}}-tso", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cdc_.*"'], + ), + legend_format="{{instance}}-endpoint", + ), + ], + ), + graph_panel( + title="TSO Worker CPU", + description="The CPU utilization of raftstore thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"tso_worker"'], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def TTL() -> RowPanel: + layout = Layout(title="TTL") + layout.row( + [ + graph_panel( + title="TTL expire count", + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_expire_kv_count_total", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TTL expire size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_expire_kv_size_total", + ) + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="TTL check progress", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_ttl_checker_processed_regions", + ), + "/", + expr_sum_rate( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TTL checker actions", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_checker_actions", by_labels=["type"] + ) + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="TTL checker compact duration", + description="The time consumed when executing GC tasks", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_ttl_checker_compact_duration", + ), + stat_panel( + title="TTL checker poll interval", + format=UNITS.MILLI_SECONDS, + targets=[ + target( + expr=expr_max( + "tikv_ttl_checker_poll_interval", + label_selectors=['type="tikv_gc_run_interval"'], + by_labels=[], # override default by instance. + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def PD() -> RowPanel: + layout = Layout(title="PD") + layout.row( + [ + graph_panel( + title="PD requests", + description="The count of requests that TiKV sends to PD", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_request_duration_seconds_count", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="PD request duration (average)", + description="The time consumed by requests that TiKV sends to PD", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_pd_request_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="PD heartbeats", + description="The total number of PD heartbeat messages", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_heartbeat_message_total", + by_labels=["type"], + ), + ), + target( + expr=expr_sum( + "tikv_pd_pending_heartbeat_total", + ), + legend_format="{{instance}}-pending", + ), + ], + ), + graph_panel( + title="PD validate peers", + description="The total number of peers validated by the PD worker", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_validate_peer_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="PD reconnection", + description="The count of reconnection between TiKV and PD", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_pd_reconnect_total", + range_selector="$__rate_interval", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="PD forward status", + description="The forward status of PD client", + targets=[ + target( + expr=expr_simple( + "tikv_pd_request_forwarded", + ), + legend_format="{{instance}}-{{host}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Pending TSO Requests", + description="The number of TSO requests waiting in the queue.", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum( + "tikv_pd_pending_tso_request_total", + ), + ), + ], + ), + graph_panel( + title="Store Slow Score", + description="The slow score of stores", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_score", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Inspected duration per server", + description="The duration that recorded by inspecting messages.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_inspect_duration_seconds", + by_labels=["instance", "type"], + ), + legend_format="{{instance}}-{{type}}", + ), + ], + ) + ] + ) + return layout.row_panel + + +def IOBreakdown() -> RowPanel: + layout = Layout(title="IO Breakdown") + layout.row( + [ + graph_panel( + title="Write IO bytes", + description="The throughput of disk write per IO type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="write"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="write"'], + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + ], + ), + graph_panel( + title="Read IO bytes", + description="The throughput of disk read per IO type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="read"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="read"'], + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="IO threshold", + description="The threshold of disk IOs per priority", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_rate_limiter_max_bytes_per_sec", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Rate Limiter Request Wait Duration", + description="IO rate limiter request wait duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_rate_limiter_request_wait_duration_seconds", + by_labels=["type"], + ), + legend_format=r"{{type}}-99%", + ), + target( + expr=expr_histogram_avg( + "tikv_rate_limiter_request_wait_duration_seconds", + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftWaterfall() -> RowPanel: + layout = Layout(title="Raft Waterfall") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Storage async write duration", + description="The time consumed by processing asynchronous write requests", + yaxes=yaxes(left_format=UNITS.SECONDS, right_format=UNITS.NONE_FORMAT), + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="write"'], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store duration", + description="The store time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_duration_secs", + ), + graph_panel_histogram_quantiles( + title="Apply duration", + description="The apply time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_apply_duration_secs", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store propose wait duration", + description="The propose wait time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_request_wait_time_duration_secs", + ), + graph_panel_histogram_quantiles( + title="Store batch wait duration", + description="The batch wait time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_batch_wait_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store send to write queue duration", + description="The send-to-write-queue time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_send_to_queue_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store send proposal duration", + description="The send raft message of the proposal duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_send_proposal_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store write kv db end duration", + description="The write kv db end duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_write_kvdb_end_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store before write duration", + description="The before write time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_before_write_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store persist duration", + description="The persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_persist_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store write end duration", + description="The write end duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_write_end_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store commit but not persist duration", + description="The commit but not persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store commit and persist duration", + description="The commit and persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_commit_log_duration_seconds", + ), + ] + ) + return layout.row_panel + + +def RaftIO() -> RowPanel: + layout = Layout(title="Raft IO") + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Process ready duration", + heatmap_description="The time consumed for peer processes to be ready in Raft", + graph_title="99% Process ready duration per server", + graph_description="The time consumed for peer processes to be ready in Raft", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_raft_process_duration_secs", + label_selectors=['type="ready"'], + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Store write loop duration", + heatmap_description="The time duration of store write loop when store-io-pool-size is not zero.", + graph_title="99% Store write loop duration per server", + graph_description="The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_store_write_loop_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Append log duration", + heatmap_description="The time consumed when Raft appends log", + graph_title="99% Commit log duration per server", + graph_description="The time consumed when Raft commits log on each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_append_log_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Commit log duration", + heatmap_description="The time consumed when Raft commits log", + graph_title="99% Commit log duration per server", + graph_description="The time consumed when Raft commits log on each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_commit_log_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Apply log duration", + heatmap_description="The time consumed when Raft applies log", + graph_title="99% Apply log duration per server", + graph_description="The time consumed for Raft to apply logs per TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_apply_log_duration_seconds", + ) + ) + layout.row( + [ + graph_panel( + title="Store io task reschedule", + description="The throughput of disk write per IO type", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_io_reschedule_region_total", + ), + legend_format="rechedule-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_raftstore_io_reschedule_pending_tasks_total", + ), + legend_format="pending-task-{{instance}}", + ), + ], + ), + graph_panel( + title="99% Write task block duration per server", + description="The time consumed when store write task block on each TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_store_write_msg_block_wait_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftPropose() -> RowPanel: + layout = Layout(title="Raft Propose") + layout.row( + [ + graph_panel( + title="Raft proposals per ready", + description="The proposal count of a Regions in a tick", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_apply_proposal", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Raft read/write proposals", + description="The number of proposals per type", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"local_read|normal|read_index"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft read proposals per server", + description="The number of read proposals which are made by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"local_read|read_index"'], + ), + ), + ], + ), + graph_panel( + title="Raft write proposals per server", + description="The number of write proposals which are made by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"normal"'], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Propose wait duration", + heatmap_description="The wait time of each proposal", + graph_title="99% Propose wait duration per server", + graph_description="The wait time of each proposal in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_request_wait_time_duration_secs", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Store write wait duration", + heatmap_description="The wait time of each store write task", + graph_title="99% Store write wait duration per server", + graph_description="The wait time of each store write task in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_store_write_task_wait_duration_secs", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Apply wait duration", + heatmap_description="The wait time of each apply task", + graph_title="99% Apply wait duration per server", + graph_description="The wait time of each apply task in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_apply_wait_time_duration_secs", + ) + ) + layout.row( + [ + heatmap_panel( + title="Store write handle msg duration", + description="The handle duration of each store write task msg", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_raftstore_store_write_handle_msg_duration_secs_bucket", + ), + heatmap_panel( + title="Store write trigger size", + description="The distribution of write trigger size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_raftstore_store_write_trigger_wb_bytes_bucket", + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft propose speed", + description="The rate at which peers propose logs", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_propose_log_size_sum", + ), + ), + ], + ), + graph_panel( + title="Perf Context duration", + description="The rate at which peers propose logs", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_store_perf_context_time_duration_secs", + by_labels=["type"], + ), + legend_format="store-{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_apply_perf_context_time_duration_secs", + by_labels=["type"], + ), + legend_format="apply-{{type}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftProcess() -> RowPanel: + layout = Layout(title="Raft Process") + layout.row( + [ + graph_panel( + title="Ready handled", + description="The count of different ready type of Raft", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_ready_handled_total", + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_process_duration_secs_count", + label_selectors=['type="ready"'], + by_labels=[], # overwrite default by instance. + ), + legend_format="count", + ), + ], + ), + graph_panel( + title="Max duration of raft store events", + description="The max time consumed by raftstore events", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999999, + "tikv_raftstore_event_duration", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.999999, + "tikv_broadcast_normal_duration_seconds", + ), + legend_format="broadcast_normal", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Replica read lock checking duration", + description="Replica read lock checking duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_replica_read_lock_check_duration_seconds_bucket", + ), + heatmap_panel( + title="Peer msg length distribution", + description="The length of peer msgs for each round handling", + metric="tikv_raftstore_peer_msg_len_bucket", + ), + ] + ) + return layout.row_panel + + +def RaftMessage() -> RowPanel: + layout = Layout(title="Raft Message") + layout.row( + [ + graph_panel( + title="Sent messages per server", + description="The number of Raft messages sent by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + ), + ), + ], + ), + graph_panel( + title="Flush messages per server", + description="The number of Raft messages flushed by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_message_flush_total", + by_labels=["instance", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Receive messages per server", + description="The number of Raft messages received by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_message_recv_total", + ), + ), + ], + ), + graph_panel( + title="Messages", + description="The number of different types of Raft messages that are sent", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Vote", + description="The total number of vote messages that are sent in Raft", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + label_selectors=['type="vote"'], + ), + ), + ], + ), + graph_panel( + title="Raft dropped messages", + description="The number of dropped Raft messages per type", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_dropped_message_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftAdmin() -> RowPanel: + layout = Layout(title="Raft Admin") + layout.row( + [ + graph_panel( + title="Admin proposals", + description="The number of admin proposals", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"conf_change|transfer_leader"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Admin apply", + description="The number of the processed apply command", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_admin_cmd_total", + label_selectors=['type!="compact"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Check split", + description="The number of raftstore split checks", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_check_split_total", + label_selectors=['type!="ignore"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="99.99% Check split duration", + description="The time consumed when running split check in .9999", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_check_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Load base split event", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_load_base_split_event", + range_selector="1m", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Load base split duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.80, + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="80%-{{instance}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Peer in Flashback State", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_peer_in_flashback_state", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftLog() -> RowPanel: + layout = Layout(title="Raft Log") + layout.row( + [ + graph_panel( + title="Raft log GC write duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_raft_log_gc_write_duration_secs", + by_labels=["instance"], + ), + legend_format="99.99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_raft_log_gc_write_duration_secs", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + graph_panel( + title="Raft log GC kv sync duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_raft_log_kv_sync_duration_secs", + by_labels=["instance"], + ), + legend_format="99.99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_raft_log_kv_sync_duration_secs", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log GC write operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_write_duration_secs_count", + ), + ), + ], + ), + graph_panel( + title="Raft log GC seek operations ", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_seek_operations_count", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log lag", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_log_lag_sum", + ), + ), + ], + ), + graph_panel( + title="Raft log gc skipped", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_skipped", + by_labels=["instance", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log GC failed", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_failed", + ), + ), + ], + ), + graph_panel( + title="Raft log fetch ", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_entry_fetches", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log async fetch task duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_entry_fetches_task_duration_seconds", + ), + legend_format="99.99%", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_entry_fetches_task_duration_seconds", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_worker_pending_task_total", + label_selectors=['name=~"raftlog-fetch-worker"'], + ), + legend_format="pending-task", + ), + ], + series_overrides=[ + series_override( + alias="/pending-task/", + yaxis=2, + transform_negative_y=True, + ), + ], + ), + ] + ) + return layout.row_panel + + +def LocalReader() -> RowPanel: + layout = Layout(title="Local Reader") + layout.row( + [ + graph_panel( + title="Raft log async fetch task duration", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_reject_total", + by_labels=["instance", "reason"], + ), + legend_format="{{instance}}-reject-by-{{reason}}", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_executed_requests", + ), + legend_format="{{instance}}-total", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_executed_stale_read_requests", + ), + legend_format="{{instance}}-stale-read", + ), + ], + series_overrides=[ + series_override( + alias="/.*-total/", + yaxis=2, + ), + ], + ), + ] + ) + return layout.row_panel + + +def UnifiedReadPool() -> RowPanel: + layout = Layout(title="Unified Read Pool") + layout.row( + [ + graph_panel( + title="Time used by level", + description="The time used by each level in the unified read pool per second. Level 0 refers to small queries.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_multilevel_level_elapsed", + label_selectors=['name="unified-read-pool"'], + by_labels=["level"], + ), + ), + ], + ), + graph_panel( + title="Level 0 chance", + description="The chance that level 0 (small) tasks are scheduled in the unified read pool.", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_simple( + "tikv_multilevel_level0_chance", + label_selectors=['name="unified-read-pool"'], + ), + legend_format="{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Running tasks", + description="The number of concurrently running tasks in the unified read pool.", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_unified_read_pool_running_tasks", + "avg", + "1m", + ), + ), + ], + ), + heatmap_panel( + title="Unified Read Pool Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_yatp_pool_schedule_wait_duration_bucket", + label_selectors=['name=~"unified-read.*"'], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Duration of One Time Slice", + description="Unified read pool task execution time during one schedule.", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_yatp_task_poll_duration", + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Task Execute Duration", + description="Unified read pool task total execution duration.", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_yatp_task_exec_duration", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Task Schedule Times", + description="Task schedule number of times.", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT, log_base=2), + metric="tikv_yatp_task_execute_times", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def Storage() -> RowPanel: + layout = Layout(title="Storage") + layout.row( + [ + graph_panel( + title="Storage command total", + description="The total count of different kinds of commands received", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC, log_base=10), + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_command_total", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Storage async request error", + description="The total number of engine asynchronous request errors", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_engine_async_request_total", + label_selectors=['status!~"all|success"'], + by_labels=["status"], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async write duration", + heatmap_description="The time consumed by processing asynchronous write requests", + graph_title="Storage async write duration", + graph_description="The storage async write duration", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="write"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async snapshot duration", + heatmap_description="The time consumed by processing asynchronous snapshot requests", + graph_title="Storage async snapshot duration", + graph_description="The storage async snapshot duration", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async snapshot duration (pure local read)", + heatmap_description="The storage async snapshot duration without the involving of raftstore", + graph_title="Storage async snapshot duration (pure local read)", + graph_description="The storage async snapshot duration without the involving of raftstore", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_local_read"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Read index propose wait duration", + heatmap_description="Read index propose wait duration associated with async snapshot", + graph_title="Read index propose wait duration", + graph_description="Read index propose wait duration associated with async snapshot", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_read_index_propose_wait"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Read index confirm duration", + heatmap_description="Read index confirm duration associated with async snapshot", + graph_title="Read index confirm duration", + graph_description="Read index confirm duration associated with async snapshot", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_read_index_confirm"'], + ), + ) + layout.row( + [ + graph_panel( + title="Process Stat Cpu Usage", + description="CPU usage measured over a 30 second window", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum( + "tikv_storage_process_stat_cpu_usage", + ), + ), + ], + ), + graph_panel_histogram_quantiles( + title="Full compaction duration seconds", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_duration_seconds", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Full compaction pause duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_pause_duration_seconds", + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Full compaction per-increment duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_increment_duration_seconds", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def FlowControl() -> RowPanel: + layout = Layout(title="Flow Control") + layout.row( + [ + graph_panel( + title="Scheduler flow", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_write_flow", + ), + legend_format="write-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_throttle_flow", + ).extra(" != 0"), + legend_format="throttle-{{instance}}", + ), + ], + ), + graph_panel( + title="Scheduler discard ratio", + description="", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_discard_ratio", + by_labels=["type"], + ).extra(" / 10000000"), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Throttle duration", + metric="tikv_scheduler_throttle_duration_seconds_bucket", + yaxis=yaxis(format=UNITS.SECONDS), + ), + graph_panel( + title="Scheduler throttled CF", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_simple( + "tikv_scheduler_throttle_cf", + ).extra(" != 0"), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow controller actions", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_throttle_action_total", + by_labels=["type", "cf"], + ), + ), + ], + ), + graph_panel( + title="Flush/L0 flow", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_l0_flow", + by_labels=["instance", "cf"], + ), + legend_format="{{cf}}_l0_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_flush_flow", + by_labels=["instance", "cf"], + ), + legend_format="{{cf}}_flush_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_l0_flow", + ), + legend_format="total_l0_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_flush_flow", + ), + legend_format="total_flush_flow-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow controller factors", + description="", + targets=[ + target( + expr=expr_max( + "tikv_scheduler_l0", + ), + legend_format="l0-{{instance}}", + ), + target( + expr=expr_max( + "tikv_scheduler_memtable", + ), + legend_format="memtable-{{instance}}", + ), + target( + expr=expr_max( + "tikv_scheduler_l0_avg", + ), + legend_format="avg_l0-{{instance}}", + ), + ], + ), + graph_panel( + title="Compaction pending bytes", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_pending_compaction_bytes", + label_selectors=['db="kv"'], + by_labels=["cf"], + ), + ), + target( + expr=expr_sum( + "tikv_scheduler_pending_compaction_bytes", + by_labels=["cf"], + ).extra(" / 10000000"), + legend_format="pending-bytes-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Txn command throttled duration", + description="Throttle time for txn storage commands in 1 minute.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_txn_command_throttle_time_total", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Non-txn command throttled duration", + description="Throttle time for non-txn related processing like analyze or dag in 1 minute.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_non_txn_command_throttle_time_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def SchedulerCommands() -> RowPanel: + layout = Layout(title="Scheduler", repeat="command") + layout.row( + [ + graph_panel( + title="Scheduler stage total", + description="The total number of commands on each stage in commit command", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + label_selectors=['type="$command"'], + ), + legend_format="busy-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + label_selectors=['type="$command"'], + by_labels=["stage"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler command duration", + description="The time consumed when executing commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_command_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Scheduler latch wait duration", + description="The time which is caused by latch wait in commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_latch_wait_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler keys read", + description="The count of keys read by a commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + metric="tikv_scheduler_kv_command_key_read", + label_selectors=['type="$command"'], + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Scheduler keys written", + description="The count of keys written by a commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + metric="tikv_scheduler_kv_command_key_write", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler scan details", + description="The keys scan details of each CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Scheduler scan details [lock]", + description="The keys scan details of lock CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="lock"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler scan details [write]", + description="The keys scan details of write CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="write"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Scheduler scan details [default]", + description="The keys scan details of default CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="default"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler command read duration", + description="The time consumed on reading when executing commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_processing_read_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + heatmap_panel( + title="Check memory locks duration", + description="The time consumed on checking memory locks", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_storage_check_mem_lock_duration_seconds_bucket", + label_selectors=['type="$command"'], + ), + ] + ) + return layout.row_panel + + +def Scheduler() -> RowPanel: + layout = Layout(title="Scheduler") + layout.row( + [ + graph_panel( + title="Scheduler stage total", + description="The total number of commands on each stage", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + by_labels=["stage"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + by_labels=["stage"], + ), + ), + ], + ), + graph_panel( + title="Scheduler writing bytes", + description="The total writing bytes of commands on each stage", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_writing_bytes", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler priority commands", + description="The count of different priority commands", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_commands_pri_total", + by_labels=["priority"], + ), + ), + ], + ), + graph_panel( + title="Scheduler pending commands", + description="The count of pending commands per TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_contex_total", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Txn Scheduler Pool Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_yatp_pool_schedule_wait_duration_bucket", + label_selectors=['name=~"sched-worker.*"'], + ), + ] + ) + return layout.row_panel + + +def GC() -> RowPanel: + layout = Layout(title="GC") + layout.row( + [ + graph_panel( + title="GC tasks", + description="The count of GC tasks processed by gc_worker", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_tasks_vec", + by_labels=["task"], + ), + legend_format="total-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_storage_gc_skipped_counter", + by_labels=["task"], + ), + legend_format="skipped-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_task_fail_vec", + by_labels=["task"], + ), + legend_format="failed-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_gc_worker_too_busy", + by_labels=[], + ), + legend_format="gcworker-too-busy", + ), + ], + ), + graph_panel_histogram_quantiles( + title="GC tasks duration", + description="The time consumed when executing GC tasks", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_gcworker_gc_task_duration_vec", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="TiDB GC seconds", + description="The GC duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 1, "tidb_tikvclient_gc_seconds", by_labels=["instance"] + ).skip_default_instance_selector(), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TiDB GC worker actions", + description="The count of TiDB GC worker actions", + targets=[ + target( + expr=expr_sum_rate( + "tidb_tikvclient_gc_worker_actions_total", + by_labels=["type"], + ).skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="ResolveLocks Progress", + description="Progress of ResolveLocks, the first phase of GC", + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_range_task_stats", + label_selectors=['type=~"resolve-locks.*"'], + by_labels=["result"], + ).skip_default_instance_selector(), + ), + ], + ), + graph_panel( + title="TiKV Auto GC Progress", + description="Progress of TiKV's GC", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum( + "tikv_gcworker_autogc_processed_regions", + label_selectors=['type="scan"'], + ), + "/", + expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="GC speed", + description="keys / second", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_mvcc_gc_delete_versions_sum", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_keys/s", + ), + ], + ), + graph_panel( + title="TiKV Auto GC SafePoint", + description="SafePoint used for TiKV's Auto GC", + yaxes=yaxes(left_format=UNITS.DATE_TIME_ISO), + targets=[ + target( + expr=expr_max( + "tikv_gcworker_autogc_safe_point", + ) + .extra("/ (2^18)") + .skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.half_row( + [ + stat_panel( + title="GC lifetime", + description="The lifetime of TiDB GC", + format=UNITS.SECONDS, + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_gc_config", + label_selectors=['type="tikv_gc_life_time"'], + by_labels=[], + ).skip_default_instance_selector(), + ), + ], + ), + stat_panel( + title="GC interval", + description="The interval of TiDB GC", + format=UNITS.SECONDS, + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_gc_config", + label_selectors=['type="tikv_gc_run_interval"'], + by_labels=[], + ).skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.half_row( + [ + graph_panel( + title="GC in Compaction Filter", + description="Keys handled in GC compaction filter", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filtered", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_filtered", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_skip", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_skipped", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_mvcc_rollback", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-rollback/mvcc-lock", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_orphan_versions", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_orphan-versions", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_perform", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_performed-times", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_failure", + by_labels=["key_mode", "type"], + ), + legend_format="{{key_mode}}_failure-{{type}}", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_met", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-met", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_handled", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-handled", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_wasted", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-wasted", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="GC scan write details", + description="GC scan write details", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_keys", + label_selectors=['cf="write"'], + by_labels=["key_mode", "tag"], + ), + ), + ], + ), + graph_panel( + title="GC scan default details", + description="GC scan default details", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_keys", + label_selectors=['cf="default"'], + by_labels=["key_mode", "tag"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Snapshot() -> RowPanel: + layout = Layout(title="Snapshot") + layout.row( + [ + graph_panel( + title="Rate snapshot message", + description="The rate of Raft snapshot messages sent", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_raft_sent_message_total", + range_selector="1m", + label_selectors=['type="snapshot"'], + ), + ), + ], + ), + graph_panel( + title="Snapshot state count", + description="The number of snapshots in different states", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_snapshot_traffic_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% Snapshot generation wait duration", + description="The time snapshot generation tasks waited to be scheduled. ", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_generation_wait_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="99% Handle snapshot duration", + description="The time consumed when handling snapshots", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_send_snapshot_duration_seconds", + ), + legend_format="send", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_duration_seconds", + label_selectors=['type="apply"'], + ), + legend_format="apply", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_duration_seconds", + label_selectors=['type="generate"'], + ), + legend_format="generate", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99.99% Snapshot size", + description="The snapshot size (P99.99).9999", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_snapshot_size", + ), + legend_format="size", + ), + ], + ), + graph_panel( + title="99.99% Snapshot KV count", + description="The number of KV within a snapshot in .9999", + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_snapshot_kv_count", + ), + legend_format="count", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Snapshot Actions", + description="Action stats for snapshot generating and applying", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_snapshot_total", + range_selector="1m", + by_labels=["type", "status"], + ), + ), + target( + expr=expr_sum_delta( + "tikv_raftstore_clean_region_count", + range_selector="1m", + by_labels=["type", "status"], + ), + legend_format="clean-region-by-{{type}}", + ), + ], + ), + graph_panel( + title="Snapshot transport speed", + description="The speed of sending or receiving snapshot", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_snapshot_limit_transport_bytes", + by_labels=["instance", "type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_snapshot_limit_generate_bytes", + ), + legend_format="{{instance}}-generate", + ), + ], + ), + ] + ) + return layout.row_panel + + +def Task() -> RowPanel: + layout = Layout(title="Task") + layout.row( + [ + graph_panel( + title="Worker handled tasks", + description="The number of tasks handled by worker", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_worker_handled_task_total", + by_labels=["name"], + ), + ), + ], + ), + graph_panel( + title="Worker pending tasks", + description="Current pending and running tasks of worker", + targets=[ + target( + expr=expr_sum( + "tikv_worker_pending_task_total", + by_labels=["name"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="FuturePool handled tasks", + description="The number of tasks handled by future_pool", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_futurepool_handled_task_total", + by_labels=["name"], + ), + ), + ], + ), + graph_panel( + title="FuturePool pending tasks", + description="Current pending and running tasks of future_pool", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_futurepool_pending_task_total", + "avg", + range_selector="1m", + by_labels=["name"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def CoprocessorOverview() -> RowPanel: + layout = Layout(title="Coprocessor Overview") + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Request duration", + heatmap_description="The time consumed to handle coprocessor read requests", + graph_title="Request duration", + graph_description="The time consumed to handle coprocessor read requests", + yaxis_format=UNITS.SECONDS, + metric="tikv_coprocessor_request_duration_seconds", + graph_by_labels=["req"], + ), + ) + layout.row( + [ + graph_panel( + title="Total Requests", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_duration_seconds_count", + by_labels=["req"], + ), + ), + ], + ), + graph_panel( + title="Total Request Errors", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + by_labels=["reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="KV Cursor Operations", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_keys_sum", + by_labels=["req"], + ), + ), + ], + ), + graph_panel_histogram_quantiles( + title="KV Cursor Operations", + description="", + metric="tikv_coprocessor_scan_keys", + yaxes=yaxes(left_format=UNITS.SHORT), + by_labels=["req"], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total RocksDB Perf Statistics", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="internal_delete_skipped_count"'], + by_labels=["req"], + ), + legend_format="delete_skipped-{{req}}", + ), + ], + ), + graph_panel( + title="Total Response Size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_response_bytes", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def CoprocessorDetail() -> RowPanel: + layout = Layout(title="Coprocessor Detail") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Handle duration", + description="The time consumed when handling coprocessor requests", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_handle_seconds", + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Handle duration by store", + description="The time consumed to handle coprocessor requests per TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_handle_seconds", + by_labels=["req", "instance"], + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Wait duration", + description="The time consumed when coprocessor requests are wait for being handled", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_wait_seconds", + label_selectors=['type="all"'], + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Wait duration by store", + description="The time consumed when coprocessor requests are wait for being handled in each TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_wait_seconds", + label_selectors=['type="all"'], + by_labels=["req", "instance"], + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total DAG Requests", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_dag_request_count", + by_labels=["vec_type"], + ), + ), + ], + ), + graph_panel( + title="Total DAG Executors", + description="The total number of DAG executors", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_executor_count", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total Ops Details (Table Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="select"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Total Ops Details (Index Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="index"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total Ops Details by CF (Table Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="select"'], + by_labels=["cf", "tag"], + ), + ), + ], + ), + graph_panel( + title="Total Ops Details by CF (Index Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="index"'], + by_labels=["cf", "tag"], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Memory lock checking duration", + heatmap_description="The time consumed on checking memory locks for coprocessor requests", + graph_title="Memory lock checking duration", + graph_description="The time consumed on checking memory locks for coprocessor requests", + yaxis_format=UNITS.SECONDS, + metric="tikv_coprocessor_mem_lock_check_duration_seconds", + ), + ) + return layout.row_panel + + +def Threads() -> RowPanel: + layout = Layout(title="Threads") + layout.row( + [ + graph_panel( + title="Threads state", + targets=[ + target( + expr=expr_sum( + "tikv_threads_state", + by_labels=["instance", "state"], + ), + ), + target( + expr=expr_sum( + "tikv_threads_state", + by_labels=["instance"], + ), + legend_format="{{instance}}-total", + ), + ], + ), + graph_panel( + title="Threads IO", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_sum_rate( + "tikv_threads_io_bytes_total", + by_labels=["name", "io"], + ).extra("> 1024"), + ), + legend_format="{{name}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Thread Voluntary Context Switches", + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_max_rate( + "tikv_thread_voluntary_context_switches", + by_labels=["name"], + ).extra("> 100"), + ), + legend_format="{{name}}", + ), + ], + ), + graph_panel( + title="Thread Nonvoluntary Context Switches", + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_max_rate( + "tikv_thread_nonvoluntary_context_switches", + by_labels=["name"], + ).extra("> 100"), + ), + legend_format="{{name}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RocksDB() -> RowPanel: + layout = Layout(title="RocksDB", repeat="db") + layout.row( + [ + graph_panel( + title="Get operations", + description="The count of get operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + legend_format="memtable", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type=~"block_cache_data_hit|block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + legend_format="block_cache", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l0"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l0", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l1"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l1", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l2_and_up"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l2_and_up", + ), + ], + ), + graph_panel( + title="Get duration", + description="The time consumed when executing get operations", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Seek operations", + description="The count of seek operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_seek"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_seek_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek_found", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_next"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_next_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next_found", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_prev"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_prev_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev_found", + ), + ], + ), + graph_panel( + title="Seek duration", + description="The time consumed when executing seek operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write operations", + description="The count of write operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type=~"write_done_by_self|write_done_by_other"', + ], + by_labels=[], # override default by instance. + ), + legend_format="done", + ), + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type="write_timeout"', + ], + by_labels=[], # override default by instance. + ), + legend_format="timeout", + ), + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type="write_with_wal"', + ], + by_labels=[], # override default by instance. + ), + legend_format="with_wal", + ), + ], + ), + graph_panel( + title="Write duration", + description="The time consumed when executing write operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="WAL sync operations", + description="The count of WAL sync operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_wal_file_synced", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="sync", + ), + ], + ), + graph_panel( + title="Write WAL duration", + description="The time consumed when executing write wal operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction operations", + description="The count of compaction and flush operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_event_total", + label_selectors=[ + 'db="$db"', + ], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="WAL sync duration", + description="The time consumed when executing WAL sync operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=10), + targets=[ + target( + expr=expr_max( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction guard actions", + description="Compaction guard actions", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_compaction_guard_action_total", + label_selectors=[ + 'cf=~"default|write"', + ], + by_labels=["cf", " type"], + ), + ), + ], + ), + graph_panel( + title="Compaction duration", + description="The time consumed when executing the compaction and flush operations", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="SST read duration", + description="The time consumed when reading SST files", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + graph_panel( + title="Compaction reason", + description=None, + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_compaction_reason", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Block cache size", + description="The block cache size. Broken down by column family if shared block cache is disabled.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_avg( + "tikv_engine_block_cache_size_bytes", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf", "instance"], + ), + ), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + graph_panel( + title="Memtable hit", + description="The hit rate of memtable", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="hit", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Block cache flow", + description="The flow of different kinds of block cache operations", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC, log_base=10), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="block_cache_byte_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="block_cache_byte_write"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="data_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_bytes_evict"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_evict", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_bytes_evict"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_evict", + ), + ], + ), + graph_panel( + title="Block cache hit", + description="The hit rate of block cache", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="all", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="data", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="filter", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="index", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="bloom_prefix_useful"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="bloom_prefix_checked"', + ], + by_labels=[], # override default by instance. + ), + ), + legend_format="bloom prefix", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Keys flow", + description="The flow of different kinds of operations on keys", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="keys_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="keys_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_compaction_num_corrupt_keys", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="corrupt", + ), + ], + ), + graph_panel( + title="Block cache operations", + description="The count of different kinds of block cache operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="data_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_add_failures"', + ], + by_labels=[], # override default by instance. + ), + legend_format="add_failures", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Read flow", + description="The flow rate of read operations per type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="get", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="iter_bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="scan", + ), + ], + ), + graph_panel( + title="Total keys", + description="The count of keys in each column family", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum( + "tikv_engine_estimate_num_keys", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write flow", + description="The flow of different kinds of write operations", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="wal_file_bytes"', + ], + by_labels=[], # override default by instance. + ), + legend_format="wal", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + ], + ), + graph_panel( + title="Bytes / Read", + description="The bytes per read", + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=10), + targets=[ + target( + expr=expr_max( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction flow", + description="The flow rate of compaction operations per type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_compaction_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_compaction_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="flush_write_bytes"', + ], + by_labels=[], # override default by instance. + ), + legend_format="flushed", + ), + ], + ), + graph_panel( + title="Bytes / Write", + description="The bytes per write", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_max( + "tikv_engine_bytes_per_write", + label_selectors=['db="$db"', 'type="bytes_per_write_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Read amplification", + description="The read amplification per TiKV instance", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_read_amp_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="read_amp_total_read_bytes"', + ], + ), + "/", + expr_sum_rate( + "tikv_engine_read_amp_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="read_amp_estimate_useful_bytes"', + ], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Compaction pending bytes", + description="The pending bytes to be compacted", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_pending_compaction_bytes", + label_selectors=['db="$db"'], + by_labels=["cf"], + ), + legend_format="{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Number of snapshots", + description="The number of snapshot of each TiKV instance", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_simple( + "tikv_engine_num_snapshots", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Compression ratio", + description="The compression ratio of each level", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "tikv_engine_compression_ratio", + label_selectors=['db="$db"'], + by_labels=["cf", "level"], + ), + legend_format="{{cf}}-L{{level}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Number files at each level", + description="The number of SST files for different column families in each level", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "tikv_engine_num_files_at_level", + label_selectors=['db="$db"'], + by_labels=["cf", "level"], + ), + legend_format="{{cf}}-L{{level}}", + ), + ], + ), + graph_panel( + title="Oldest snapshots duration", + description="The time that the oldest unreleased snapshot survivals", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_simple( + "tikv_engine_oldest_snapshot_duration", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Stall conditions changed of each CF", + description="Stall conditions changed of each column family", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_simple( + "tikv_engine_stall_conditions_changed", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}-{{cf}}-{{type}}", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Ingest SST duration seconds", + description="The time consumed when ingesting SST files", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_snapshot_ingest_sst_duration_seconds", + label_selectors=['db="$db"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write Stall Reason", + description=None, + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_write_stall_reason", + label_selectors=['db="$db"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Write stall duration", + description="The time which is caused by write stall", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_stall", + label_selectors=['db="$db"', 'type="write_stall_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'db="$db"', + 'type="write_stall_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'db="$db"', + 'type="write_stall_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=['db="$db"', 'type="write_stall_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Ingestion picked level", + description="The level that the external file ingests into", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_engine_ingestion_picked_level_bucket", + label_selectors=['db="$db"'], + ), + graph_panel( + title="Memtable size", + description="The memtable size of each column family", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_memory_bytes", + label_selectors=['db="$db"', 'type="mem-tables-all"'], + by_labels=["cf"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftEngine() -> RowPanel: + layout = Layout(title="Raft Engine") + layout.row( + [ + graph_panel( + title="Operation", + description="The count of operations per second", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "raft_engine_write_apply_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + target( + expr=expr_sum_rate( + "raft_engine_read_entry_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="read_entry", + ), + target( + expr=expr_sum_rate( + "raft_engine_read_message_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="read_message", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Write Duration", + description="The time used in write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="raft_engine_write_duration_seconds", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow", + description="The I/O flow rate", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "raft_engine_write_size_sum", + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + target( + expr=expr_sum_rate( + "raft_engine_background_rewrite_bytes_sum", + by_labels=["type"], + ), + legend_format="rewrite-{{type}}", + ), + ], + ), + graph_panel( + title="Write Duration Breakdown (99%)", + description="99% duration breakdown of write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_preprocess_duration_seconds" + ), + legend_format="wait", + ), + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_leader_duration_seconds" + ), + legend_format="wal", + ), + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_apply_duration_seconds" + ), + legend_format="apply", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Bytes / Written", + description="The bytes per write", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + metric="raft_engine_write_size", + hide_count=True, + ), + graph_panel( + title="WAL Duration Breakdown (999%)", + description="999% duration breakdown of WAL write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_write_leader_duration_seconds" + ), + legend_format="total", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_sync_log_duration_seconds" + ), + legend_format="sync", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_allocate_log_duration_seconds" + ), + legend_format="allocate", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_rotate_log_duration_seconds" + ), + legend_format="rotate", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="File Count", + description="The average number of files", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "raft_engine_log_file_count", + by_labels=["type"], + ), + ), + target( + expr=expr_avg( + "raft_engine_swap_file_count", + by_labels=[], # override default by instance. + ), + legend_format="swap", + ), + target( + expr=expr_avg( + "raft_engine_recycled_file_count", + by_labels=["type"], + ), + legend_format="{{type}}-recycle", + ), + ], + ), + graph_panel( + title="Other Durations (99%)", + description="The 99% duration of operations other than write", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_read_entry_duration_seconds" + ), + legend_format="read_entry", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_read_message_duration_seconds" + ), + legend_format="read_message", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_purge_duration_seconds" + ), + legend_format="purge", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Entry Count", + description="The average number of log entries", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "raft_engine_log_entry_count", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Titan() -> RowPanel: + layout = Layout(title="Titan", repeat="titan_db") + layout.row( + [ + graph_panel( + title="Blob file count", + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_num_live_blob_file", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="live blob file num", + ), + target( + expr=expr_sum( + "tikv_engine_titandb_num_obsolete_blob_file", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="obsolete blob file num", + ), + ], + ), + graph_panel( + title="Blob file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_live_blob_file_size", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="live blob file size", + ), + target( + expr=expr_sum( + "tikv_engine_titandb_obsolete_blob_file_size", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="obsolete blob file size", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Live blob size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_live_blob_size", + label_selectors=['db="$titan_db"'], + ), + legend_format="live blob size", + ), + ], + ), + graph_panel( + title="Blob cache hit", + description="The hit rate of block cache", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="all", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Iter touched blob file count", + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob cache size", + description="The blob cache size.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_avg( + "tikv_engine_blob_cache_size_bytes", + label_selectors=['db="$titan_db"'], + by_labels=["cf", "instance"], + ), + ), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob key size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob value size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob get operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_get"', + ], + by_labels=[], # override default by instance. + ), + legend_format="get", + ), + ], + ), + graph_panel( + title="Blob get duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=["type"], + ), + legend_format="avg-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%-{{type}}", + ), + target( + expr=expr_max( + "tikv_engine_blob_get_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=["type"], + ), + legend_format="max-{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file discardable ratio distribution", + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_blob_file_discardable_ratio", + label_selectors=['db="$titan_db"'], + by_labels=["ratio"], + ), + ), + ], + ), + graph_panel( + title="Blob iter operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_seek"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek", + ), + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_prev"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev", + ), + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_next"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob seek duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob next duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_next_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob prev duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=["type"], + ), + legend_format="avg-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%-{{type}}", + ), + target( + expr=expr_max( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=["type"], + ), + legend_format="max-{{type}}", + ), + ], + ), + graph_panel( + title="Blob keys flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"keys.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file read duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob bytes flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"bytes.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file write duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob file sync operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_file_synced", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="sync", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC action", + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_action_count", + label_selectors=['db="$titan_db"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Blob file sync duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC keys flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"keys.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC input file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC bytes flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"bytes.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC output file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC file count", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_file_count", + label_selectors=['db="$titan_db"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def PessimisticLocking() -> RowPanel: + layout = Layout(title="Pessimistic Locking") + layout.row( + [ + graph_panel( + title="Lock Manager Thread CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"waiter_manager.*"'], + by_labels=["instance", "name"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"deadlock_detect.*"'], + by_labels=["instance", "name"], + ), + ), + ], + ), + graph_panel( + title="Lock Manager Handled tasks", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_lock_manager_task_counter", + by_labels=["type"], + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Waiter lifetime duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_lock_manager_waiter_lifetime_duration", + hide_count=True, + ), + graph_panel( + title="Lock Waiting Queue", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_manager_wait_table_status", + "max", + "30s", + by_labels=["type"], + ), + ), + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_wait_queue_entries_gauge_vec", + "max", + "30s", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Deadlock detect duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_lock_manager_detect_duration", + hide_count=True, + ), + graph_panel( + title="Detect error", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_lock_manager_error_counter", by_labels=["type"] + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Deadlock detector leader", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_manager_detector_leader_heartbeat", + "max", + "30s", + ), + ) + ], + ), + graph_panel( + title="Total pessimistic locks memory size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple("tikv_pessimistic_lock_memory_size"), + legend_format="{{instance}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="In-memory pessimistic locking result", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_in_memory_pessimistic_locking", by_labels=["result"] + ), + ) + ], + ), + graph_panel( + title="Pessimistic lock activities", + description="The number of active keys and waiters.", + targets=[ + target( + expr=expr_sum( + "tikv_lock_wait_queue_entries_gauge_vec", by_labels=["type"] + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Lengths of lock wait queues when transaction enqueues", + description="The length includes the entering transaction itself", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_lock_wait_queue_length_bucket", + ) + ] + ) + return layout.row_panel + + +def PointInTimeRestore() -> RowPanel: + layout = Layout(title="Point In Time Restore") + layout.row( + [ + graph_panel( + title="CPU Usage", + description=None, + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"sst_.*"', + ], + ), + ), + ], + ), + graph_panel( + title="P99 RPC Duration", + description=None, + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=1), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_rpc_duration", + label_selectors=[ + 'request="apply"', + ], + ), + legend_format="total-99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_apply_duration", + label_selectors=[ + 'type=~"queue|exec_download"', + ], + by_labels=["le", "type"], + ), + legend_format="(DL){{type}}-99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_engine_request", + by_labels=["le", "type"], + ), + legend_format="(AP){{type}}-99", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import RPC Ops", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=[ + 'request="apply"', + ], + by_labels=["instance", "request"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=[ + 'request!="switch_mode"', + ], + by_labels=["request"], + ), + legend_format="total-{{request}}", + ), + ], + ), + graph_panel( + title="Cache Events", + description=None, + yaxes=yaxes(left_format=UNITS.COUNTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_apply_cache_event", + label_selectors=[], + by_labels=["type", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Overall RPC Duration", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_rpc_duration_bucket", + label_selectors=[ + 'request="apply"', + ], + ), + heatmap_panel( + title="Read File into Memory Duration", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_apply_duration_bucket", + label_selectors=[ + 'type="exec_download"', + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Queuing Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="queuing"', + ], + ), + graph_panel( + title="Apply Request Throughput", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_apply_bytes_sum", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Downloaded File Size", + description=None, + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_import_download_bytes_bucket", + ), + heatmap_panel( + title="Apply Batch Size", + description=None, + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_import_apply_bytes_bucket", + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Blocked by Concurrency Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="get_permit"', + ], + ), + graph_panel( + title="Apply Request Speed", + description=None, + yaxes=yaxes( + left_format=UNITS.OPS_PER_SEC, + log_base=1, + ), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_applier_event", + label_selectors=[ + 'type="begin_req"', + ], + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Cached File in Memory", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=1), + targets=[ + target( + expr=expr_sum("tikv_import_apply_cached_bytes"), + ), + ], + ), + graph_panel( + title="Engine Requests Unfinished", + description=None, + yaxes=yaxes( + left_format=UNITS.SHORT, + log_base=1, + ), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_applier_event", + label_selectors=[ + 'type!="begin_req"', + ], + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Apply Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="apply"', + ], + ), + graph_panel( + title="Raft Store Memory Usage", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=1), + targets=[ + target( + expr=expr_sum( + "tikv_server_mem_trace_sum", + label_selectors=[ + 'name=~"raftstore-.*"', + ], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def ResolvedTS() -> RowPanel: + layout = Layout(title="Resolved TS") + layout.row( + [ + graph_panel( + title="Resolved TS Worker CPU", + description="The CPU utilization of resolved ts worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"resolved_ts.*"', + ], + ), + ) + ], + ), + graph_panel( + title="Advance ts Worker CPU", + description="The CPU utilization of advance ts worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"advance_ts.*"', + ], + ), + ) + ], + ), + graph_panel( + title="Scan lock Worker CPU", + description="The CPU utilization of scan lock worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"inc_scan.*"', + ], + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Max gap of resolved-ts", + description="The gap between resolved ts (the maximum candidate of safe-ts) and current time.", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_resolved_ts_gap_millis", + ), + ) + ], + ), + graph_panel( + title="Max gap of follower safe-ts", + description="The gap between now() and the minimal (non-zero) safe ts for followers", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_follower_safe_ts_gap_millis", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Min Resolved TS Region", + description="The region that has minimal resolved ts", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_resolved_ts_region", + ), + ) + ], + ), + graph_panel( + title="Min Safe TS Follower Region", + description="The region id of the follower that has minimal safe ts", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_follower_safe_ts_region", + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Check leader duration", + description="The time consumed when handle a check leader request", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_check_leader_duration_seconds_bucket", + ), + graph_panel( + title="Max gap of resolved-ts in region leaders", + description="The gap between resolved ts of leaders and current time", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_leader_resolved_ts_gap_millis", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% CheckLeader request region count", + description="Bucketed histogram of region count in a check leader request", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_item_count", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ) + ], + ), + heatmap_panel( + title="Initial scan backoff duration", + description="The backoff duration before starting initial scan", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket", + ), + ] + ) + layout.row( + [ + graph_panel( + title="Lock heap size", + description="Total bytes in memory of resolved-ts observe regions's lock heap", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_resolved_ts_lock_heap_bytes", + ), + ) + ], + ), + graph_panel( + title="Min Leader Resolved TS Region", + description="The region that its leader has minimal resolved ts.", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_leader_resolved_ts_region", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Observe region status", + description="The status of resolved-ts observe regions", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_region_resolve_status", + by_labels=["type"], + ), + ) + ], + ), + graph_panel( + title="Fail advance ts count", + description="The count of fail to advance resolved-ts", + targets=[ + target( + expr=expr_sum_delta( + "tikv_resolved_ts_fail_advance_count", + by_labels=["instance", "reason"], + ), + ), + target( + expr=expr_sum_delta( + "tikv_raftstore_check_stale_peer", + by_labels=["instance"], + ), + legend_format="{{instance}}-stale-peer", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% CheckLeader request size", + description="Bucketed histogram of the check leader request size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_size_bytes", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_item_count", + by_labels=["instance"], + ), + legend_format="{{instance}}-check-num", + ), + ], + ), + graph_panel( + title="Pending command size", + description="Total bytes of pending commands in the channel", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_resolved_ts_channel_penging_cmd_bytes_total", + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def Memory() -> RowPanel: + layout = Layout(title="Memory") + layout.row( + [ + graph_panel( + title="Allocator Stats", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_allocator_stats", by_labels=["instance", "type"] + ) + ) + ], + ), + graph_panel( + title="Send Allocated(+) / Release Received(-) Bytes Rate", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="alloc"'], + by_labels=["thread_name"], + ), + "-", + expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="dealloc"'], + by_labels=["thread_name"], + ), + ), + legend_format="{{thread_name}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Newly Allocated Bytes by Thread", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="alloc"'], + by_labels=["thread_name"], + ), + ) + ], + ), + graph_panel( + title="Recently Released Bytes by Thread", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="dealloc"'], + by_labels=["thread_name"], + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def BackupImport() -> RowPanel: + layout = Layout(title="Backup & Import") + layout.row( + [ + graph_panel( + title="Backup CPU Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"b.*k.*w.*k.*"', + ], + ), + legend_format="backup-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"backup_io"', + ], + ), + legend_format="backup-io-{{instance}}", + ), + target( + expr=expr_simple( + "tikv_backup_softlimit", + ), + legend_format="backup-auto-throttle-{{instance}}", + ), + ], + ), + graph_panel( + title="Backup Thread Count", + targets=[ + target( + expr=expr_sum( + "tikv_backup_thread_pool_size", + ), + ), + ], + ), + graph_panel( + title="Backup Errors", + description="", + targets=[ + target( + expr=expr_sum_delta( + "tikv_backup_error_counter", + by_labels=["instance", "error"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Backup Write CF SST Size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_backup_range_size_bytes_bucket", + label_selectors=['cf="write"'], + ), + heatmap_panel( + title="Backup Default CF SST Size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_backup_range_size_bytes_bucket", + label_selectors=['cf="default"'], + ), + graph_panel( + title="Backup SST Generation Throughput", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_backup_range_size_bytes_sum", + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + target( + expr=expr_sum_rate( + "tikv_backup_range_size_bytes_sum", + by_labels=["instance", "cf"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Backup Scan SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type="snapshot"'], + ), + heatmap_panel( + title="Backup Scan SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type="scan"'], + ), + heatmap_panel( + title="Backup Save SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type=~"save.*"'], + ), + graph_panel( + title="Backup SST Duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, + "tikv_backup_range_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}-99.9%", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_backup_range_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}-99%", + ), + target( + expr=expr_operator( + expr_sum( + "tikv_backup_range_duration_seconds_sum", + by_labels=["type"], + ), + "/", + expr_sum( + "tikv_backup_range_duration_seconds_count", + by_labels=["type"], + ), + ), + legend_format="{{type}}-avg", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="External Storage Create Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_external_storage_create_seconds_bucket", + ), + graph_panel_histogram_quantiles( + title="External Storage Create Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_external_storage_create_seconds", + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Checksum Request Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_duration_seconds", + label_selectors=['req=~"analyze.*|checksum.*"'], + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel( + title="IO Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "node_disk_io_time_seconds_total", + by_labels=["instance", "device"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import CPU Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance"], + ), + legend_format="import-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance", "tid"], + ).extra("> 0"), + legend_format="import-{{instance}}-{{tid}}", + hide=True, + ), + target( + expr=expr_count_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + ), + legend_format="import-count-{{instance}}", + hide=True, + ), + ], + ), + graph_panel( + title="Import Thread Count", + targets=[ + target( + expr=expr_count_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Import Errors", + targets=[ + target( + expr=expr_sum_delta( + "tikv_import_error_counter", + by_labels=["type", "error", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Import RPC Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_import_rpc_duration", + by_labels=["request"], + hide_count=True, + ), + graph_panel( + title="Import RPC Ops", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=['request!="switch_mode"'], + by_labels=["request"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Import Write/Download RPC Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_rpc_duration_bucket", + label_selectors=['request=~"download|write"'], + ), + heatmap_panel( + title="Import Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="queue"'], + ), + heatmap_panel( + title="Import Read SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="read"'], + ), + heatmap_panel( + title="Import Rewrite SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="rewrite"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Import Ingest RPC Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_rpc_duration_bucket", + label_selectors=['request=~"ingest"'], + ), + heatmap_panel( + title="Import Ingest SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_ingest_duration_bucket", + label_selectors=['type=~"ingest"'], + ), + heatmap_panel( + title="Import Ingest SST Bytes", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_ingest_byte_bucket", + ), + graph_panel( + title="Import Download SST Throughput", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_download_bytes_sum", + ), + ), + target( + expr=expr_sum_rate( + "tikv_import_download_bytes_sum", + by_labels=[], + ), + legend_format="total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import Local Write keys", + targets=[ + target( + expr=expr_sum_delta( + "tikv_import_local_write_keys", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Import Local Write bytes", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_local_write_bytes", + by_labels=["type", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="TTL Expired", + targets=[ + target( + expr=expr_sum( + "tikv_backup_raw_expired_count", + ), + ), + target( + expr=expr_sum( + "tikv_backup_raw_expired_count", + by_labels=[], + ), + legend_format="sum", + ), + ], + ), + graph_panel( + title="cloud request", + description="", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_cloud_request_duration_seconds_count", + by_labels=["cloud", "req"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Encryption() -> RowPanel: + layout = Layout(title="Encryption") + layout.row( + [ + graph_panel( + title="Encryption data keys", + description="Total number of encryption data keys in use", + targets=[ + target( + expr=expr_sum( + "tikv_encryption_data_key_storage_total", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Encrypted files", + description="Number of files being encrypted", + targets=[ + target( + expr=expr_sum( + "tikv_encryption_file_num", + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Encryption initialized", + description="Flag to indicate if encryption is initialized", + targets=[ + target( + expr=expr_simple( + "tikv_encryption_is_initialized", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Encryption meta files size", + description="Total size of encryption meta files", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple( + "tikv_encryption_meta_file_size_bytes", + ), + legend_format="{{name}}-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Encrypt/decrypt data nanos", + description="", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=[ + 'metric="encrypt_data_nanos"', + ], + by_labels=["req"], + ), + legend_format="encrypt-{{req}}", + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=[ + 'metric="decrypt_data_nanos"', + ], + by_labels=["req"], + ), + legend_format="decrypt-{{req}}", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Read/write encryption meta duration", + description="Writing or reading file duration (second)", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_encryption_write_read_file_duration_seconds", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def BackupLog() -> RowPanel: + layout = Layout(title="Backup Log") + layout.row( + [ + stat_panel( + title="Endpoint Status", + targets=[ + target( + expr=expr_simple("tikv_log_backup_enabled"), + legend_format="{{ instance }}", + ), + ], + mappings=[ + StatValueMappings( + StatValueMappingItem("Disabled", "0", "red"), + StatValueMappingItem("Enabled", "1", "green"), + ), + ], + ), + stat_panel( + title="Task Status", + targets=[ + target( + expr=expr_min("tikv_log_backup_task_status"), + ), + ], + mappings=[ + StatValueMappings( + StatValueMappingItem("Running", "0", "green"), + StatValueMappingItem("Paused", "1", "yellow"), + StatValueMappingItem("Error", "2", "red"), + ), + ], + ), + stat_panel( + title="Advancer Owner", + text_mode="name", + targets=[ + target( + expr="tidb_log_backup_advancer_owner > 0", + legend_format="{{ instance }}", + ), + ], + ), + stat_panel( + title="Average Flush Size", + description="The average flush size of last 30mins.", + format=UNITS.BYTES_IEC, + targets=[ + target( + expr=expr_operator( + expr_sum_increase( + "tikv_log_backup_flush_file_size_sum", + range_selector="30m", + ), + "/", + expr_sum_increase( + "tikv_log_backup_flush_duration_sec_count", + label_selectors=['stage=~"save_files"'], + range_selector="30m", + ), + ), + legend_format="{{ instance }}", + ), + ], + ), + ] + ) + layout.row( + [ + stat_panel( + title="Flushed Files (Last 30m) Per Host", + description="The current total flushed file number of this run.", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_count", + range_selector="30m", + ).extra("> 0"), + ), + ], + ), + stat_panel( + title="Flush Times (Last 30m)", + description="This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_duration_sec_count", + range_selector="30m", + label_selectors=['stage=~"save_files"'], + ), + ), + ], + ), + stat_panel( + title="Total Flushed Size (Last 30m)", + description="This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + format=UNITS.BYTES_IEC, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_sum", + range_selector="30m", + ), + ), + ], + ), + stat_panel( + title="Flush Files (Last 30m)", + description="This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_count", + range_selector="30m", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CPU Usage", + description="The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"backup_stream|log-backup-scan(-[0-9]+)?"' + ], + ), + ) + ], + ), + graph_panel( + title="Handle Event Rate", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_handle_kv_batch_sum", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scan Generate Event Throughput", + description="The data rate of initial scanning emitting events.", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_incremental_scan_bytes_sum", + ), + ) + ], + ), + graph_panel( + title="Abnormal Checkpoint TS Lag", + description=None, + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_operator( + "time() * 1000", + "-", + expr_max( + "tidb_log_backup_last_checkpoint", by_labels=["task"] + ).extra("/ 262144 > 0"), + ), + legend_format="{{ task }}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Memory Of Events", + description="The estimated memory usage by the streaming backup module.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("tikv_log_backup_heap_memory"), + ) + ], + ), + graph_panel( + title="Observed Region Count", + description="", + targets=[ + target( + expr=expr_sum("tikv_log_backup_observed_region"), + ), + target( + expr=expr_sum( + "tikv_log_backup_observed_region", + ), + legend_format="{{instance}}-total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Errors", + description="The errors met when backing up.\n**They are retryable, don't worry.**", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_errors", + range_selector="1m", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Fatal Errors", + description="The errors met when backing up.", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_fatal_errors", + range_selector="1m", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Checkpoint TS of Tasks", + description=None, + yaxes=yaxes(left_format=UNITS.DATE_TIME_ISO_TODAY), + null_point_mode=NULL_AS_NULL, + targets=[ + target( + expr=expr_max( + "tidb_log_backup_last_checkpoint", by_labels=["task"] + ).extra("/ 262144 > 0"), + ), + target(expr="time() * 1000", legend_format="Current Time"), + ], + series_overrides=[ + series_override( + alias="Current Time", + fill=0, + dashes=True, + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Flush Duration", + description="The duration of flushing a batch of file.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_flush_duration_sec_bucket", + label_selectors=['stage=~"save_files"'], + ), + heatmap_panel( + title="Initial scanning duration", + description="The duration of scanning the initial data from local DB and transform them into apply events.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_initial_scan_duration_sec_bucket", + ), + heatmap_panel( + title="Convert Raft Event duration", + description="The duration of converting a raft request into a apply event.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"to_stream_event"'], + ), + heatmap_panel( + title="Wait for Lock Duration", + description="The duration of waiting the mutex of the controller.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"get_router_lock"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Command Batch Size", + description="The number of KV-modify of each raft command observed.", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_log_backup_handle_kv_batch_bucket", + ), + heatmap_panel( + title="Save to Temp File Duration", + description="The total cost of saving an event into temporary file.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"save_to_temp_file"'], + ), + heatmap_panel( + title="Write to Temp File Duration", + description="The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_on_event_duration_seconds_bucket", + label_selectors=['stage="write_to_tempfile"'], + ), + heatmap_panel( + title="System Write Call Duration", + description="The duration of collecting metadata and call the UNIX system call *write* for each event.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_on_event_duration_seconds_bucket", + label_selectors=['stage="syscall_write"'], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Internal Message Type", + description="The internal message type count.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC, log_base=2), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_interal_actor_acting_duration_sec_count", + by_labels=["message"], + ), + ) + ], + ), + graph_panel( + title="Internal Message Handling Duration (P99)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_log_backup_interal_actor_acting_duration_sec", + by_labels=["message"], + ), + legend_format="{{message}}", + ) + ], + ), + graph_panel( + title="Internal Message Handling Duration (P90)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9, + "tikv_log_backup_interal_actor_acting_duration_sec", + by_labels=["message"], + ), + legend_format="{{message}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scan RocksDB Throughput", + description="The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_operations", + label_selectors=['op=~"read_bytes"'], + by_labels=["cf"], + ), + ) + ], + ), + graph_panel( + title="Initial Scan RocksDB Operation", + description="Misc statistics of RocksDB during initial scanning.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_operations", + label_selectors=['op!~"read_bytes"'], + by_labels=["cf", "op"], + ).extra("> 0"), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scanning Trigger Reason", + description="The reason of triggering initial scanning.", + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_reason", + by_labels=["reason"], + ), + ) + ], + ), + graph_panel( + title="Region Checkpoint Key Putting", + description="", + yaxes=yaxes(left_format=UNITS.COUNTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_metadata_key_operation", + by_labels=["type"], + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Request Checkpoint Batch Size", + metric="tidb_log_backup_advancer_batch_size_bucket", + label_selectors=['type="checkpoint"'], + ), + heatmap_panel( + title="Tick Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tidb_log_backup_advancer_tick_duration_sec_bucket", + label_selectors=['step="tick"'], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Region Checkpoint Failure Reason", + description="The reason of advancer failed to be advanced.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_region_request_failure", + label_selectors=['reason!="retryable-scan-region"'], + by_labels=["reason"], + ), + ), + ], + ), + graph_panel( + title="Request Result", + description="The result of getting region checkpoints.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_region_request", + by_labels=["result"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Tick Duration (P99)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tidb_log_backup_advancer_tick_duration_sec", + by_labels=["step"], + ), + legend_format="{{ step }}", + ) + ], + ), + graph_panel( + title="Tick Duration (P90)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9, + "tidb_log_backup_advancer_tick_duration_sec", + by_labels=["step"], + ), + legend_format="{{ step }}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Get Region Operation Count", + description="The frequent of getting region level checkpoint.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_advancer_tick_duration_sec_count", + label_selectors=['step="get-regions-in-range"'], + by_labels=["step", "instance"], + ), + ) + ], + ), + graph_panel( + title="Try Advance Trigger Time", + description="The variant of checkpoint group.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_advancer_tick_duration_sec_count", + label_selectors=['step="try-advance"'], + by_labels=["step", "instance"], + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def SlowTrendStatistics() -> RowPanel: + layout = Layout(title="Slow Trend Statistics") + layout.row( + [ + graph_panel( + title="Slow Trend", + description="The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend", + ), + ), + ], + ), + graph_panel( + title="QPS Changing Trend", + description="The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_result", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="AVG Sampling Latency", + description="The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_l0", + ), + ), + ], + ), + graph_panel( + title="QPS of each store", + description="The QPS of each store.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_result_value", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +#### Metrics Definition End #### + + +dashboard = Dashboard( + title="Test-Cluster-TiKV-Details", + uid="RDVQiEzZz", + timezone="browser", + refresh="1m", + inputs=[DATASOURCE_INPUT], + editable=True, + graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, + templating=Templates(), + panels=[ + Duration(), + Cluster(), + Errors(), + Server(), + gRPC(), + ThreadCPU(), + TTL(), + PD(), + IOBreakdown(), + RaftWaterfall(), + RaftIO(), + RaftPropose(), + RaftProcess(), + RaftMessage(), + RaftAdmin(), + RaftLog(), + LocalReader(), + UnifiedReadPool(), + Storage(), + FlowControl(), + SchedulerCommands(), + Scheduler(), + GC(), + Snapshot(), + Task(), + CoprocessorOverview(), + CoprocessorDetail(), + Threads(), + RocksDB(), + RaftEngine(), + Titan(), + PessimisticLocking(), + PointInTimeRestore(), + ResolvedTS(), + Memory(), + BackupImport(), + Encryption(), + BackupLog(), + SlowTrendStatistics(), + ], +).auto_panel_ids() diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f73a59cf3779..c36a81d522aa 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1,122 +1,102 @@ { "__inputs": [ { - "name": "DS_TEST-CLUSTER", - "label": "test-cluster", "description": "", - "type": "datasource", + "label": "test-cluster", + "name": "DS_TEST-CLUSTER", "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "7.5.11" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "panel", - "id": "heatmap", - "name": "Heatmap", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" + "pluginName": "Prometheus", + "type": "datasource" } ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "${DS_TEST-CLUSTER}", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] + "list": [] }, + "description": "", "editable": true, "gnetId": null, "graphTooltip": 1, + "hideControls": false, "id": null, - "iteration": 1689914850671, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, "y": 0 }, - "id": 13620, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Write Duration Composition", + "description": "Write Pipeline Composition", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 12842, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -124,85 +104,115 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "Write Raft Log .99", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Propose Wait .99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Apply Wait .99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Replicate Raft Log .99", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Apply Duration .99", - "refId": "E" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "hide": false, - "interval": "", - "legendFormat": "Raft Message Wait .99", - "refId": "F" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Pipeline Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -210,6 +220,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -218,6 +229,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -228,42 +240,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Read Duration Composition", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 12970, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -271,61 +297,85 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "Get Snapshot .99", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Cop Wait .99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.95,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Cop Handle .99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.95,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cop Read Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -333,6 +383,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -341,6 +392,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -351,54 +403,92 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Duration", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 1 + "y": 0 }, - "id": 2742, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The storage size per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 0, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -410,72 +500,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"used\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-used", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"kv_size\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-kv_size", - "refId": "B", - "step": 10, - "hide": true - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"raft_size\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-raft_size", - "refId": "C", - "step": 10, - "hide": true - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"import_size\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type = \"used\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-import_size", - "refId": "D", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type = \"used\"}\n \n)) by (instance) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -483,14 +558,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -501,38 +578,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The available capacity size of each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 8, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 1706, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -544,45 +633,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"available\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"available\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"available\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Available size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -590,14 +691,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -608,38 +711,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The capacity size per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 16, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 1707, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -651,45 +766,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"capacity\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"capacity\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"capacity\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Capacity size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -697,14 +824,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -715,38 +844,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU usage of each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 9 + "y": 7 }, - "hiddenSeries": false, - "id": 1708, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -760,43 +901,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=~\".*tikv\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -804,14 +957,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -822,44 +977,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The memory usage per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 9 + "y": 7 }, - "hiddenSeries": false, - "id": 1709, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -867,43 +1034,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(process_resident_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=~\".*tikv\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -911,14 +1090,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -929,38 +1110,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The I/O utilization per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1710, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -974,43 +1167,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{device}}", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{device}}", + "metric": "", + "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1018,14 +1223,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1036,44 +1243,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total bytes of read and write in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1711, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1081,52 +1300,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\", type=\"wal_file_bytes\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-write", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\", type=~\"bytes_read|iter_bytes_read\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"bytes_read|iter_bytes_read\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-read", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"bytes_read|iter_bytes_read\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "MBps", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1134,14 +1371,16 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1152,46 +1391,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The QPS per command in each TiKV instance", + "description": "The number of leaders on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 21 }, - "hiddenSeries": false, - "id": 1713, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1199,44 +1448,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance,type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "refId": "A", - "step": 10 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "QPS", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1244,14 +1504,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1262,34 +1524,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of the gRPC message failures", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 21 }, - "hiddenSeries": false, - "id": 1712, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -1301,7 +1573,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1309,58 +1581,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-grpc-msg-fail", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_pd_heartbeat_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"noop\"}[1m])) by (instance) < 1", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"noop\"}\n [$__rate_interval]\n)) by (instance) < 1", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-pd-heartbeat", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"noop\"}\n [$__rate_interval]\n)) by (instance) < 1", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_critical_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Errps", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1368,14 +1667,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1386,44 +1687,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe number of leaders on each TiKV instance", + "description": "The number of leaders on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 1715, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1431,57 +1744,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - }, - { - "expr": "delta(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}[30s]) < -10", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1489,14 +1800,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1507,44 +1820,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Regions and Buckets on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 1714, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1552,52 +1877,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"buckets\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - buckets", - "refId": "B", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-buckets", + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1605,62 +1948,76 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, "description": "TiKV uptime since the last restart", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 41 + "y": 35 }, - "hiddenSeries": false, - "id": 4106, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1668,43 +2025,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "(time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=~\".*tikv\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Uptime", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1712,14 +2081,16 @@ }, "yaxes": [ { - "format": "dtdurations", - "label": "", + "decimals": null, + "format": "s", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1730,87 +2101,107 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Cluster", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 2 + "y": 0 }, - "id": 2743, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Critical error alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "hiddenSeries": false, - "id": 2741, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1818,28 +2209,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_critical_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -1848,22 +2250,23 @@ "fill": true, "line": true, "op": "gt", - "value": 0, - "visible": true + "value": 0.0, + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Critical error", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1871,14 +2274,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1889,34 +2294,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Indicates occurrences of events that make the TiKV instance unavailable temporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor Full", + "description": "\nIndicates occurrences of events that make the TiKV instance unavailable\ntemporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor\nFull", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 10 + "y": 7 }, - "hiddenSeries": false, - "id": 1584, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -1928,7 +2343,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1936,77 +2351,115 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "scheduler-{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "channelfull-{{instance}}-{{type}}", "metric": "", - "refId": "B", - "step": 4 + "query": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='full'}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"full\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "coprocessor-{{instance}}", "metric": "", - "refId": "C", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"full\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write_stall_percentile99\", db=~\"$db\"}) by (instance, db)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write_stall_percentile99\",db=~\"$db\"}\n \n)) by (instance, db) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "stall-{{instance}}-{{db}}", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write_stall_percentile99\",db=~\"$db\"}\n \n)) by (instance, db) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_write_msg_block_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "store-write-channelfull-{{instance}}", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Server is busy", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2014,86 +2467,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "10s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "message": "TiKV server report failures", - "name": "server report failures alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of reporting failure messages", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 10 + "y": 7 }, - "hiddenSeries": false, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2113,53 +2544,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_report_failure_msg_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,instance,store_id)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_report_failure_msg_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance, store_id) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}} - to - {{store_id}}", - "metric": "tikv_server_raft_store_msg_total", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "visible": true + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}-to-{{store_id}}", + "metric": "", + "query": "sum(rate(\n tikv_server_report_failure_msg_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance, store_id) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Server report failures", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2167,14 +2600,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2185,34 +2620,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different raftstore errors on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1718, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2232,44 +2677,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_engine_async_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status!~\"success|all\"}[1m])) by (instance, status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"success|all\"}\n [$__rate_interval]\n)) by (instance, status) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{status}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"success|all\"}\n [$__rate_interval]\n)) by (instance, status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raftstore error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2277,52 +2733,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of scheduler errors per type on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1719, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2342,44 +2810,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"snapshot_err|prepare_write_err\"}[1m])) by (instance, stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"snapshot_err|prepare_write_err\"}\n [$__rate_interval]\n)) by (instance, stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{stage}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"snapshot_err|prepare_write_err\"}\n [$__rate_interval]\n)) by (instance, stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2387,52 +2866,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different coprocessor errors on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 21 }, - "hiddenSeries": false, - "id": 1720, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2452,44 +2943,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Coprocessor error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2497,52 +2999,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of gRPC message errors per type on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 21 }, - "hiddenSeries": false, - "id": 1721, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2562,44 +3076,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2607,52 +3132,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of dropped leaders per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 28 }, - "hiddenSeries": false, - "id": 1722, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2664,7 +3201,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -2672,48 +3209,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader drop", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2721,14 +3265,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2739,34 +3285,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of missing leaders per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 28 }, - "hiddenSeries": false, - "id": 1723, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2778,7 +3334,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -2786,48 +3342,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_leader_missing{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_leader_missing\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_leader_missing\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader missing", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2835,14 +3398,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2853,88 +3418,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "RocksDB damaged SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572510, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_rocksdb_damaged_files{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_rocksdb_damaged_files\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}-existed", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "((\n tikv_rocksdb_damaged_files\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "tikv_rocksdb_damaged_files_deleted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_rocksdb_damaged_files_deleted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-deleted", - "refId": "B" + "metric": "", + "query": "((\n tikv_rocksdb_damaged_files_deleted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Damaged files", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2942,7 +3546,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -2950,6 +3555,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2960,80 +3566,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "the count of Log Replication Reject caused by follower memory insufficient", + "description": "The count of Log Replication Reject caused by follower memory insufficient", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572588, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_server_raft_append_rejects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_append_rejects\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-memory", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_append_rejects\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Log Replication Rejected", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3041,7 +3679,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -3049,6 +3688,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3059,55 +3699,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Errors", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "id": 2744, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The size of each column family", "editable": true, "error": false, - "fill": 3, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 3 + "y": 0 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3115,39 +3798,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CF size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3155,14 +3854,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3173,71 +3874,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "datasourceId": 1, - "model": { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "metric": "", - "refId": "A", - "step": 10 - }, - "params": [ - "A", - "10s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "message": "TiKV channel full", - "name": "TiKV channel full alert", - "noDataState": "ok", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", "description": "The total number of channel full errors on each TiKV instance", "editable": true, "error": false, - "fill": 3, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 3 + "y": 0 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -3249,7 +3923,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3257,48 +3931,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 + "query": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Channel full", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3306,14 +3987,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3324,39 +4007,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of leaders being written on each TiKV instance", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 11 + "y": 7 }, - "id": 75, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3364,40 +4064,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Active written leaders", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3405,90 +4120,180 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1073741824 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "approximate region size alert", - "noDataState": "no_data", - "notifications": [] + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 33, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Approximate region size", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The approximate Region size", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 14 }, - "id": 1481, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3496,58 +4301,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "C", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_region_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_region_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_region_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "(sum(rate(\n tikv_raftstore_region_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Approximate Region size", + "title": "Approximate region size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3555,320 +4425,180 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 3638, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": false, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Approximate Region size Histogram", - "tooltip": { - "msResolution": false, - "shared": false, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "histogram", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The average rate of writing bytes to Regions per TiKV instance", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 58, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_region_written_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance) / sum(rate(tikv_region_written_bytes_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_regi", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Region average written bytes", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dataFormat": "timeseries", + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 21 }, "heatmap": {}, - "hideZeroBuckets": false, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, "highlightCards": true, - "id": 3646, + "id": 35, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_region_written_bytes_bucket[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_regi", - "refId": "A", - "step": 10 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_region_written_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Region written bytes", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The average rate of written keys to Regions per TiKV instance", + "description": "The average rate of writing bytes to Regions per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 21 }, - "id": 57, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3876,40 +4606,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_region_written_keys_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_region_written_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_bytes_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_region_written_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_bytes_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Region average written keys", + "title": "Region average written bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3917,7 +4662,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -3925,6 +4671,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3935,10 +4682,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -3946,135 +4694,204 @@ "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dataFormat": "timeseries", + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 28 }, "heatmap": {}, - "hideZeroBuckets": false, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, "highlightCards": true, - "id": 3647, + "id": 37, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_region_written_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Region written keys", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "short", + "decimals": 1, + "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The size of requests into request batch per TiKV instance", + "description": "The average rate of written keys to Regions per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 28 }, - "id": 3720, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_request_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_server_request_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{type}} avg", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_request_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_region_written_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} 99", - "refId": "B" + "legendFormat": "{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_region_written_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Request batch input", + "title": "Region average written keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4082,14 +4899,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, - "logBase": 10, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4100,38 +4919,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe number of peers in hibernated state", + "description": "The number of peers in hibernated state", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 35 }, - "id": 3730, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4139,45 +4976,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_hibernated_peer_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, state)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_hibernated_peer_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{state}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_raftstore_hibernated_peer_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Hibernate Peers", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4185,14 +5032,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4203,24 +5052,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 35 }, - "id": 7266, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -4231,50 +5100,79 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "raft_engine_memory_usage{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n raft_engine_memory_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-raft-engine", - "refId": "B" + "metric": "", + "query": "((\n raft_engine_memory_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory trace", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4282,6 +5180,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -4290,78 +5189,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 42 }, - "id": 9560, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(tikv_raft_entries_evict_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raft_entries_evict_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raft_entries_evict_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft Entry Cache Evicts", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4369,7 +5313,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -4377,53 +5322,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 42 }, - "hiddenSeries": false, - "id": 12971, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4431,44 +5390,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_address_resolve_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_address_resolve_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_address_resolve_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Resolve address duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4476,6 +5446,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -4484,6 +5455,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4494,79 +5466,130 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572581, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, name))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Thread Pool Schedule Wait Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4574,17 +5597,19 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, - "max": "30", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -4592,79 +5617,130 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The average rate of written keys to Regions per TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572692, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name) / sum(rate(tikv_yatp_pool_schedule_wait_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Average Thread Pool Schedule Wait Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4672,17 +5748,19 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, - "max": "30", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -4690,92 +5768,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 68 + "y": 56 }, - "hiddenSeries": false, - "id": 23763572784, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": false, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*/", - "stack": "A" - } - ], - "spaceLength": 10, - "stack": false, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "copr-{{req}}", - "queryType": "randomWalk", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Disk IO time per second", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4783,6 +5896,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ns", "label": null, "logBase": 1, @@ -4791,6 +5905,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4801,90 +5916,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 68 + "y": 56 }, - "hiddenSeries": false, - "id": 23763572785, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": false, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*/", - "stack": "A" - } - ], - "spaceLength": 10, - "stack": false, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "copr-{{req}}", - "queryType": "randomWalk", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Disk IO bytes per second", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4892,7 +6044,8 @@ }, "yaxes": [ { - "format": "binBps", + "decimals": null, + "format": "ns", "label": null, "logBase": 1, "max": null, @@ -4900,6 +6053,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4910,57 +6064,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Server", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 4 + "y": 0 }, - "id": 2745, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 95, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4968,53 +6163,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type,priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5022,7 +6234,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "reqps", "label": null, "logBase": 1, "max": null, @@ -5030,6 +6243,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5040,41 +6254,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of gRPC message which is failed", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 107, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5082,43 +6311,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_fail_total", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message failed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5126,7 +6367,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "reqps", "label": null, "logBase": 1, "max": null, @@ -5134,6 +6376,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5144,43 +6387,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The execution time of gRPC message", + "description": "The 99% percentile of execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 7 }, - "hiddenSeries": false, - "id": 98, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5188,53 +6444,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type,priority))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority, le) \n \n \n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "refId": "B", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority, le) \n \n \n)) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% gRPC message duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5242,14 +6515,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 10, + "logBase": 2, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5260,42 +6535,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The average execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 7 }, - "hiddenSeries": false, - "id": 2532, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5303,53 +6592,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) )", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "refId": "B", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) )", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Average gRPC message duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5357,6 +6663,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -5365,6 +6672,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5375,42 +6683,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The 99% percentile of execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 14 }, - "hiddenSeries": false, - "id": 2533, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5418,78 +6740,130 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_grpc_req_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_req_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99% request", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_req_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_grpc_resp_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_resp_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99% response", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_resp_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_grpc_req_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_grpc_req_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_grpc_req_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_req_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg request", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_server_grpc_req_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_req_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_grpc_resp_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_grpc_resp_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_grpc_resp_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_resp_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg response", - "refId": "D" + "metric": "", + "query": "(sum(rate(\n tikv_server_grpc_resp_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_resp_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_request_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_request_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99% kv get batch", - "refId": "E" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_request_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_request_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_request_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_request_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_request_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg kv batch", - "refId": "F" + "metric": "", + "query": "(sum(rate(\n tikv_server_request_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_request_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC batch size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5497,7 +6871,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -5505,6 +6880,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5515,42 +6891,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 14 }, - "hiddenSeries": false, - "id": 2534, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5558,50 +6948,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_raft_message_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_raft_message_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_raft_message_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_message_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_raft_message_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_message_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_raft_message_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "raft message batch size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5609,7 +7019,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -5617,6 +7028,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5627,41 +7039,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The QPS of different sources of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572858, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5669,45 +7096,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_request_source_counter_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_request_source_counter_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{source}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_request_source_counter_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC request sources QPS", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5715,6 +7152,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -5723,6 +7161,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5733,41 +7172,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The duration of different sources of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572859, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5775,45 +7229,55 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_request_source_duration_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_request_source_duration_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{source}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_request_source_duration_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC request sources duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5821,7 +7285,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -5829,6 +7294,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5839,41 +7305,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The QPS of different resource groups of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 37 + "y": 28 }, - "hiddenSeries": false, - "id": 23763573090, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5881,150 +7362,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_resource_group_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_resource_group_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{name}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_resource_group_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC resource group QPS", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of gRPC raft message", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 37 - }, - "hiddenSeries": false, - "id": 24763573092, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_raftstore_message_recv_by_store{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, store)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{store}}", - "metric": "tikv_raftstore_message_recv_by_store", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "gRPC message count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6032,7 +7418,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -6040,6 +7427,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6050,105 +7438,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "gRPC", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 5 + "y": 0 }, - "id": 2746, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1.7 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "datasourceId": 1, - "model": { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 20 - }, - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "message": "TiKV raftstore thread CPU usage is high", - "name": "TiKV raft store CPU alert", - "noDataState": "ok", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6156,53 +7537,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1.7, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft store CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6210,6 +7593,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6218,6 +7602,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6228,79 +7613,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1.8 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV async apply thread CPU usage is high", - "name": "TiKV async apply CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of async apply", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 79, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6308,53 +7670,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"apply_[0-9]+\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1.8, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Async apply CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6362,14 +7726,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6380,81 +7746,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 0.8 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "message": "TiKV store writer thread CPU usage is high", - "name": "Store writer CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of store writer thread", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 12 + "y": 7 }, - "hiddenSeries": false, - "id": 13115, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6462,30 +7812,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_write.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6495,22 +7854,22 @@ "line": true, "op": "gt", "value": 0.8, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store writer CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6518,6 +7877,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6526,6 +7886,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6536,76 +7897,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV gRPC poll thread CPU usage is high", - "name": "TiKV gRPC poll CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of gRPC", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 12 + "y": 7 }, - "hiddenSeries": false, - "id": 105, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6613,51 +7954,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"grpc.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC poll CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6665,6 +8010,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6673,6 +8019,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6683,69 +8030,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV scheduler worker thread CPU usage is high", - "name": "TiKV scheduler worker CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of scheduler worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 19 + "y": 14 }, - "hiddenSeries": false, - "id": 64, + "height": null, + "hideTimeOverride": false, + "id": 62, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -6757,7 +8088,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6765,28 +8096,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6796,22 +8138,22 @@ "line": true, "op": "gt", "value": 3.6, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6819,6 +8161,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6827,6 +8170,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6837,69 +8181,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV Storage ReadPool thread CPU usage is high", - "name": "TiKV Storage ReadPool CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of readpool", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 19 + "y": 14 }, - "hiddenSeries": false, - "id": 1908, + "height": null, + "hideTimeOverride": false, + "id": 63, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -6911,7 +8239,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6919,46 +8247,69 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_norm.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - normal", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-normal", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_high.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - high", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_low.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - low", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6968,22 +8319,22 @@ "line": true, "op": "gt", "value": 3.6, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage ReadPool CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6991,6 +8342,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6999,6 +8351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7009,80 +8362,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 7.2 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV unified read pool thread CPU usage is high", - "name": "Unified read pool CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of the unified read pool", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 26 + "y": 21 }, - "hiddenSeries": false, - "id": 4287, + "height": null, + "hideTimeOverride": false, + "id": 64, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7090,28 +8428,39 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified_read_po.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -7121,22 +8470,22 @@ "line": true, "op": "gt", "value": 7.2, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Unified read pool CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7144,6 +8493,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7152,6 +8502,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7162,44 +8513,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of RocksDB", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 26 + "y": 21 }, - "hiddenSeries": false, - "id": 69, + "height": null, + "hideTimeOverride": false, + "id": 65, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7207,59 +8570,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"rocksdb.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "warning", - "fill": true, - "line": true, - "op": "gt", - "value": 1 - }, - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "RocksDB CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7267,6 +8626,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7275,6 +8635,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7285,80 +8646,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 7.2 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV Coprocessor thread CPU alert", - "name": "TiKV Coprocessor CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of coprocessor", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 78, + "height": null, + "hideTimeOverride": false, + "id": 66, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7366,46 +8712,69 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_normal.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - normal", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-normal", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_high.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - high", - "refId": "B", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_low.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - low", - "refId": "C", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -7415,22 +8784,22 @@ "line": true, "op": "gt", "value": 7.2, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Coprocessor CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7438,6 +8807,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7446,6 +8816,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7456,40 +8827,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 2531, + "height": null, + "hideTimeOverride": false, + "id": 67, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7497,42 +8884,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"gc_worker.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7540,6 +8940,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7548,6 +8949,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7558,42 +8960,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of split check", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 35 }, + "height": null, + "hideTimeOverride": false, "id": 68, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7601,41 +9017,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"background.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "BackGround Worker CPU", + "title": "Background Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7643,14 +9073,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7661,42 +9093,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 35 }, - "id": 692, + "height": null, + "hideTimeOverride": false, + "id": 69, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7704,67 +9150,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/import-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "import-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "import-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 - }, - { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "import-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Import CPU", + "title": "Raftlog fetch Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7772,6 +9206,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7780,6 +9215,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7790,42 +9226,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 42 }, - "id": 691, + "height": null, + "hideTimeOverride": false, + "id": 70, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7833,77 +9283,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(backup-worker|bkwkr).*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_endpoint\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-endpoint", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(backup-worker|bkwkr).*\"}[1m])) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 - }, - { - "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Backup Worker CPU", + "title": "Import CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7911,6 +9339,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7919,6 +9348,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7929,38 +9359,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 42 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 71, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7968,53 +9416,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*tso/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdcwkr.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - worker", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"tso\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - tso", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "CDC worker CPU", + "title": "Backup CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8022,6 +9472,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8030,7 +9481,8 @@ "show": true }, { - "format": "percentunit", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8040,38 +9492,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 49 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 72, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8079,40 +9549,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdc_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-worker", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-tso", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-endpoint", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "CDC endpoint CPU", + "title": "CDC worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8120,6 +9635,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8128,6 +9644,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8138,40 +9655,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The CPU utilization of raftstore thread", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 55 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572511, + "height": null, + "hideTimeOverride": false, + "id": 73, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8179,45 +9712,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, - "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftlog_fetch.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raftlog fetch Worker CPU", + "title": "TSO Worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8225,6 +9768,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8233,6 +9777,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8243,44 +9788,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread CPU", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 74, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of TSO worker", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 0 }, - "hiddenSeries": false, - "id": 9962, + "height": null, + "hideTimeOverride": false, + "id": 75, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8288,63 +9887,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"tso_worker\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "warning", - "fill": true, - "line": true, - "op": "gt", - "value": 0.3, - "yaxis": "left" - }, - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" + "metric": "", + "query": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TSO Worker CPU", + "title": "TTL expire count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8352,7 +9943,8 @@ }, "yaxes": [ { - "format": "percentunit", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -8360,6 +9952,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8370,100 +9963,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - } - ], - "repeat": null, - "title": "Thread CPU", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 6946, - "panels": [ + }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 62 + "x": 12, + "y": 0 }, - "hiddenSeries": false, - "id": 23763573818, + "height": null, + "hideTimeOverride": false, + "id": 76, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_ttl_expire_kv_count_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 10, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL expire count", + "title": "TTL expire size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8471,16 +10076,16 @@ }, "yaxes": [ { - "$$hashKey": "object:35", - "format": "short", - "label": "", + "decimals": null, + "format": "bytes", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:36", + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8491,89 +10096,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 62 + "x": 0, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573819, + "height": null, + "hideTimeOverride": false, + "id": 77, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_ttl_expire_kv_size_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 10, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "E" + "metric": "", + "query": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL expire size", + "title": "TTL check progress", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8581,16 +10209,16 @@ }, "yaxes": [ { - "$$hashKey": "object:35", - "format": "decbytes", - "label": "", + "decimals": null, + "format": "percentunit", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:36", + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8601,82 +10229,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 69 + "x": 12, + "y": 7 }, - "id": 6985, + "height": null, + "hideTimeOverride": false, + "id": 78, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_ttl_checker_processed_regions{instance=~\"$instance\"}) by (instance) / sum(tikv_raftstore_region_count{instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 10, - "legendFormat": "{{instance}}", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL check progress", + "title": "TTL checker actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8684,14 +10342,16 @@ }, "yaxes": [ { - "format": "percentunit", - "label": "", + "decimals": null, + "format": "ops", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8702,194 +10362,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": "The time consumed when executing GC tasks", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 69 + "x": 0, + "y": 14 }, - "id": 6987, + "height": null, + "hideTimeOverride": false, + "id": 79, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_ttl_checker_actions{instance=~\"$instance\"}[30s])) by (type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "E" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TTL checker actions", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ { - "format": "ops", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The time consumed when executing GC tasks", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 76 - }, - "id": 6986, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_ttl_checker_compact_duration_sum{instance=~\"$instance\"}[1m])) / sum(rate(tikv_ttl_checker_compact_duration_count{instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TTL checker compact duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8897,6 +10543,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -8905,6 +10552,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8915,144 +10563,173 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "ms" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 76 + "y": 14 }, - "id": 7326, + "height": null, + "hideTimeOverride": false, + "id": 80, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "pluginVersion": "6.1.6", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tikv_ttl_checker_poll_interval{instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", "timeFrom": null, "timeShift": null, "title": "TTL checker poll interval", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "TTL", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 7 + "y": 0 }, - "id": 2747, + "height": null, + "hideTimeOverride": false, + "id": 81, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of requests that TiKV sends to PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 0 }, - "hiddenSeries": false, - "id": 1069, + "height": null, + "hideTimeOverride": false, + "id": 82, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9060,42 +10737,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD requests", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9103,7 +10793,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -9111,6 +10802,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9121,41 +10813,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed by requests that TiKV sends to PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 0 }, - "hiddenSeries": false, - "id": 1070, + "height": null, + "hideTimeOverride": false, + "id": 83, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9163,42 +10870,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_request_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_pd_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD request duration (average)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9206,6 +10926,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -9214,6 +10935,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9224,41 +10946,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe total number of PD heartbeat messages", + "description": "The total number of PD heartbeat messages", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 15 + "y": 7 }, - "hiddenSeries": false, - "id": 1215, + "height": null, + "hideTimeOverride": false, + "id": 84, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9266,54 +11003,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "pending", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_heartbeat_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "tikv_pd_pending_heartbeat_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "pending", - "refId": "B" + "legendFormat": "{{instance}}-pending", + "metric": "", + "query": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD heartbeats", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9321,6 +11074,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -9329,51 +11083,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of peers validated by the PD worker", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 15 + "y": 7 }, - "hiddenSeries": false, - "id": 1396, + "height": null, + "hideTimeOverride": false, + "id": 85, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9381,43 +11151,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_validate_peer_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD validate peers", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9425,6 +11207,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -9433,6 +11216,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9443,41 +11227,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe count of reconnections between TiKV and PD", + "description": "The count of reconnection between TiKV and PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 23 + "y": 14 }, - "hiddenSeries": false, - "id": 7985, + "height": null, + "hideTimeOverride": false, + "id": 86, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9485,42 +11284,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_pd_reconnect_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "PD reconnections", + "title": "PD reconnection", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9528,6 +11340,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -9536,7 +11349,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9546,42 +11360,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe forward status of PD client", + "description": "The forward status of PD client", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 23 + "y": 14 }, - "hiddenSeries": false, - "id": 8376, + "height": null, + "hideTimeOverride": false, + "id": 87, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9589,42 +11417,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_pd_request_forwarded{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{host}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD forward status", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9632,7 +11473,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9640,7 +11482,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9650,44 +11493,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of TSO requests waiting in the queue.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 9963, + "height": null, + "hideTimeOverride": false, + "id": 88, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9695,46 +11550,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_pd_pending_tso_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pending TSO Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9742,7 +11606,8 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -9750,6 +11615,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9760,42 +11626,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The slow score of stores", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 10365, + "height": null, + "hideTimeOverride": false, + "id": 89, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9803,44 +11683,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_raftstore_slow_score{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store Slow Score", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9848,7 +11739,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9856,7 +11748,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9866,88 +11759,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The duration that recorded by inspecting messages.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 38 + "y": 28 }, - "hiddenSeries": false, - "id": 10366, + "height": null, + "hideTimeOverride": false, + "id": 90, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_inspect_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Inspected duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9955,6 +11872,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -9963,6 +11881,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9973,58 +11892,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "PD", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 8 + "y": 0 }, - "id": 5265, + "height": null, + "hideTimeOverride": false, + "id": 91, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The throughput of disk write per IO type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 5993, + "height": null, + "hideTimeOverride": false, + "id": 92, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10032,51 +11991,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"write\"}[45s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"write\"}[45s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write IO bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10084,14 +12062,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10102,42 +12082,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The throughput of disk read per IO type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 5994, + "height": null, + "hideTimeOverride": false, + "id": 93, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10145,51 +12139,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"read\"}[45s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"read\"}[45s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read IO bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10197,14 +12210,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10215,42 +12230,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The threshold of disk IOs per priority", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 7 }, - "hiddenSeries": false, - "id": 5995, + "height": null, + "hideTimeOverride": false, + "id": 94, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10258,43 +12287,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_rate_limiter_max_bytes_per_sec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO threshold", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10302,14 +12343,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10320,41 +12363,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "IO rate limiter request wait duration.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 7 }, - "hiddenSeries": false, - "id": 7225, + "height": null, + "hideTimeOverride": false, + "id": 95, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -10363,50 +12420,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_rate_limiter_request_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": " {{type}}-99%", - "refId": "A", - "step": 4 + "legendFormat": "{{type}}-99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "rate(tikv_rate_limiter_request_wait_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s]) / rate(tikv_rate_limiter_request_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": " {{type}}-avg", - "refId": "B" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Rate Limiter Request Wait Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10414,6 +12491,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10422,6 +12500,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10432,75 +12511,128 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "IO Breakdown", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 9 + "y": 0 }, - "id": 13117, + "height": null, + "hideTimeOverride": false, + "id": 96, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed by processing asynchronous write requests", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 24, "x": 0, - "y": 10 + "y": 0 }, - "hiddenSeries": false, - "id": 13132, + "height": null, + "hideTimeOverride": false, + "id": 97, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10508,60 +12640,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{instance=~\"$instance\", type=\"write\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{instance=~\"$instance\", type=\"write\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{instance=~\"$instance\", type=\"write\"}[30s])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{instance=~\"$instance\", type=\"write\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_count{instance=~\"$instance\", type=\"write\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10569,6 +12734,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10577,7 +12743,8 @@ "show": true }, { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -10587,60 +12754,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The store time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 19 + "y": 7 }, - "hiddenSeries": false, - "id": 13257, + "height": null, + "hideTimeOverride": false, + "id": 98, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10648,60 +12841,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10709,6 +12935,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10717,6 +12944,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10727,60 +12955,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The apply time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 19 + "y": 7 }, - "hiddenSeries": false, - "id": 13259, + "height": null, + "hideTimeOverride": false, + "id": 99, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10788,60 +13042,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_apply_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_apply_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_apply_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_apply_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10849,6 +13136,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10857,6 +13145,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10867,60 +13156,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The propose wait time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 14 }, - "hiddenSeries": false, - "id": 13261, + "height": null, + "hideTimeOverride": false, + "id": 100, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10928,60 +13243,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_request_wait_time_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_request_wait_time_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_request_wait_time_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store propose wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10989,6 +13337,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10997,6 +13346,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11007,60 +13357,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The batch wait time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 14 }, - "hiddenSeries": false, - "id": 13263, + "height": null, + "hideTimeOverride": false, + "id": 101, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11068,60 +13444,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store batch wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11129,6 +13538,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11137,6 +13547,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11147,60 +13558,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The send-to-write-queue time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 21 }, - "hiddenSeries": false, - "id": 13265, + "height": null, + "hideTimeOverride": false, + "id": 102, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11208,60 +13645,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store send to write queue duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11269,6 +13739,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11277,6 +13748,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11287,60 +13759,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The send raft message of the proposal duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572857, + "height": null, + "hideTimeOverride": false, + "id": 103, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11348,60 +13846,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store send proposal duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11409,6 +13940,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11417,6 +13949,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11427,60 +13960,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The write kv db end duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 28 }, - "hiddenSeries": false, - "id": 13269, + "height": null, + "hideTimeOverride": false, + "id": 104, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11488,60 +14047,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store write kv db end duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11549,6 +14141,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11557,6 +14150,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11567,60 +14161,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The before write time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 28 }, - "hiddenSeries": false, - "id": 13267, + "height": null, + "hideTimeOverride": false, + "id": 105, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11628,60 +14248,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store before write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11689,6 +14342,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11697,6 +14351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11707,60 +14362,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 35 }, - "hiddenSeries": false, - "id": 13273, + "height": null, + "hideTimeOverride": false, + "id": 106, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11768,60 +14449,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11829,6 +14543,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11837,6 +14552,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11847,60 +14563,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The write end duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 35 }, - "hiddenSeries": false, - "id": 13271, + "height": null, + "hideTimeOverride": false, + "id": 107, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11908,60 +14650,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store write end duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11969,6 +14744,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11977,6 +14753,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11987,60 +14764,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The commit but not persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 59 + "y": 42 }, - "hiddenSeries": false, - "id": 13277, + "height": null, + "hideTimeOverride": false, + "id": 108, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -12048,60 +14851,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store commit but not persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12109,6 +14945,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12117,6 +14954,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12127,60 +14965,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The commit and persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 59 + "y": 42 }, - "hiddenSeries": false, - "id": 13275, + "height": null, + "hideTimeOverride": false, + "id": 109, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -12188,60 +15052,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store commit and persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12249,6 +15146,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12257,6 +15155,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12267,25 +15166,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Waterfall", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 10 + "y": 0 }, - "id": 2748, + "height": null, + "hideTimeOverride": false, + "id": 110, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12295,61 +15222,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed for peer processes to be ready in Raft", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 10 + "y": 0 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13279, + "id": 111, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_raft_process_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='ready'}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Process ready duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12358,32 +15318,44 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed for peer processes to be ready in Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 10 + "y": 0 }, - "hiddenSeries": false, - "id": 13281, + "height": null, + "hideTimeOverride": false, + "id": 112, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -12397,54 +15369,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='ready'}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "C", - "step": 4 - } - ], - "thresholds": [ + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Process ready duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12452,14 +15493,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12470,10 +15513,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12483,65 +15527,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time duration of store write loop when store-io-pool-size is not zero.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13283, + "id": 113, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_loop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write loop duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12550,36 +15623,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 7 }, - "hiddenSeries": false, - "id": 13285, + "height": null, + "hideTimeOverride": false, + "id": 114, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -12587,45 +15674,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_loop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} ", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Store write loop duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12633,6 +15798,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12641,6 +15807,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12651,10 +15818,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12664,62 +15832,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft appends log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 39, + "id": 115, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Append log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12728,36 +15928,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft appends log on each TiKV instance", + "description": "The time consumed when Raft commits log on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 14 }, - "hiddenSeries": false, - "id": 13376, + "height": null, + "hideTimeOverride": false, + "id": 116, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -12765,78 +15979,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} ", - "refId": "A", - "step": 4 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_kvdb_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, - "interval": "", - "legendFormat": "kvdb-{{instance}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_raftdb_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "raftdb-{{instance}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_send_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "sendmsg-{{instance}}", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_callback_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": true, + "instant": false, "interval": "", - "legendFormat": "callback-{{instance}}", - "refId": "E" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "99% Append log duration per server", + "title": "99% Commit log duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12844,6 +16103,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12852,6 +16112,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12862,10 +16123,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12875,135 +16137,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft commits log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3690, + "id": 117, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Commit log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft commits log on each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 3688, + "height": null, + "hideTimeOverride": false, + "id": 118, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Commit log duration per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13011,6 +16408,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13019,6 +16417,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13029,10 +16428,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -13042,62 +16442,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "mode": "spectrum" + "max": null, + "min": null, + "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft applies log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31, + "id": 119, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Apply log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -13106,36 +16538,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed for Raft to apply logs per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 28 }, - "hiddenSeries": false, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 120, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -13143,43 +16589,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": " {{instance}}", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Apply log duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13187,6 +16713,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13195,6 +16722,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13205,89 +16733,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The throughput of disk write per IO type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 35 }, - "hiddenSeries": false, - "id": 13382, + "height": null, + "hideTimeOverride": false, + "id": 121, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_raftstore_io_reschedule_region_total{instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "rechedule-{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_raftstore_io_reschedule_pending_tasks_total{instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "pending-task-{{instance}}", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store io task reschedule", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13295,7 +16861,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13303,6 +16870,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13313,81 +16881,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when store write task block on each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 35 }, - "hiddenSeries": false, - "id": 13380, + "height": null, + "hideTimeOverride": false, + "id": 122, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket{instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Write task block duration per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13395,6 +16994,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13403,6 +17003,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13413,57 +17014,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft IO", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 11 + "y": 0 }, - "id": 2751, + "height": null, + "hideTimeOverride": false, + "id": 123, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The proposal count of all Regions in a mio tick", + "description": "The proposal count of a Regions in a tick", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 11 + "y": 0 }, - "hiddenSeries": false, - "id": 108, + "height": null, + "hideTimeOverride": false, + "id": 124, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -13471,43 +17113,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_proposal_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft proposals per ready", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13515,7 +17169,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13523,6 +17178,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13533,89 +17189,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of proposals per type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 0 }, - "hiddenSeries": false, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 125, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"local_read|normal|read_index\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft read/write proposals", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13623,6 +17302,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13631,6 +17311,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13641,89 +17322,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of read proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 18 + "y": 7 }, - "hiddenSeries": false, - "id": 119, + "height": null, + "hideTimeOverride": false, + "id": 126, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"local_read|read_index\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft read proposals per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13731,6 +17435,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13739,6 +17444,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13749,89 +17455,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of write proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 18 + "y": 7 }, - "hiddenSeries": false, - "id": 120, + "height": null, + "hideTimeOverride": false, + "id": 127, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"normal\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft write proposals per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13839,6 +17568,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13847,6 +17577,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13857,10 +17588,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -13870,62 +17602,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each proposal", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 41, + "id": 128, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Propose wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -13934,80 +17698,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each proposal in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 14 }, - "hiddenSeries": false, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 129, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Propose wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14015,6 +17873,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14023,6 +17882,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14033,10 +17893,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14046,65 +17907,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each store write task", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13524, + "id": 130, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_task_wait_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -14113,82 +18003,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each store write task in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 21 }, - "hiddenSeries": false, - "id": 13522, + "height": null, + "hideTimeOverride": false, + "id": 131, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_task_wait_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Store write wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14196,6 +18178,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14204,6 +18187,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14214,10 +18198,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14227,61 +18212,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 2535, + "id": 132, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_apply_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Apply wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -14290,79 +18308,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 28 }, - "hiddenSeries": false, - "id": 2536, + "height": null, + "hideTimeOverride": false, + "id": 133, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Apply wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14370,6 +18483,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14378,6 +18492,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14388,10 +18503,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14401,71 +18517,101 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The handle duration of each store write task msg", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572700, + "id": 134, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_handle_msg_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_handle_msg_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_store_write_handle_msg_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_handle_msg_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write handle msg duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14475,58 +18621,88 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The distribution of write trigger size", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572701, + "id": 135, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_trigger_wb_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_trigger_wb_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_store_write_trigger_wb_bytes_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_trigger_wb_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write trigger size", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { + "decimals": 1, "format": "bytes", "label": null, "logBase": 1, @@ -14541,32 +18717,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The rate at which peers propose logs", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 42 }, - "hiddenSeries": false, - "id": 1975, + "height": null, + "hideTimeOverride": false, + "id": 136, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -14575,41 +18768,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(rate(tikv_raftstore_propose_log_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_propose_log_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_propose_log_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft propose speed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14618,112 +18825,146 @@ "yaxes": [ { "decimals": null, - "format": "short", - "label": "bytes/s", + "format": "binBps", + "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The rate at which peers propose logs", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 54 + "y": 42 }, - "hiddenSeries": false, - "id": 1976, + "height": null, + "hideTimeOverride": false, + "id": 137, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_perf_context_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "store-{{type}}", - "metric": "tikv_raftstore_store_perf_context_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_perf_context_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "apply-{{type}}", - "refId": "B", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Perf Context duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14731,6 +18972,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14739,6 +18981,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14749,184 +18992,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each raft message", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 62 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 1977, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "tikv_raftstore_raft_msg_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Raft message wait duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Propose", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 12 + "y": 0 }, - "id": 2749, + "height": null, + "hideTimeOverride": false, + "id": 138, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different ready type of Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 12 + "y": 0 }, - "hiddenSeries": false, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 139, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_ready_handled_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_ready_handled_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_raft_ready_handled_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_ready_handled_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_raft_process_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"ready\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Ready handled", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14934,7 +19162,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -14942,6 +19171,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14952,38 +19182,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The max time consumed by raftstore events", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 12 + "y": 0 }, - "hiddenSeries": false, - "id": 123, + "height": null, + "hideTimeOverride": false, + "id": 140, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -14997,64 +19239,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(1.0, sum(rate(tikv_raftstore_event_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_raftstore_event_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_raftstore_event_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(1.0, sum(rate(tikv_broadcast_normal_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_broadcast_normal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "broadcast_normal", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1 + "metric": "", + "query": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_broadcast_normal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max duration of raft store events", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15062,14 +19310,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15080,10 +19330,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -15093,67 +19344,101 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for checking memory locks for replica reads", + "description": "Replica read lock checking duration", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7235, + "id": 141, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_replica_read_lock_check_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_replica_read_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_replica_read_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Replica read lock checking duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -15163,163 +19448,244 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The length of peer msgs for each round handling", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572958, + "id": 142, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_peer_msg_len_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_peer_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_peer_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Peer msg length distribution", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Process", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 13 + "y": 0 }, - "id": 2750, + "height": null, + "hideTimeOverride": false, + "id": 143, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages sent by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 0 }, - "hiddenSeries": false, - "id": 1615, + "height": null, + "hideTimeOverride": false, + "id": 144, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Sent messages per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15327,6 +19693,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15335,6 +19702,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15345,89 +19713,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages flushed by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 0 }, - "hiddenSeries": false, - "id": 1616, + "height": null, + "hideTimeOverride": false, + "id": 145, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_raft_message_flush_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_flush_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", - "metric": "tikv_server_raft_message_flush_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_flush_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flush messages per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15435,6 +19826,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15443,6 +19835,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15453,41 +19846,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages received by each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 7 }, - "hiddenSeries": false, - "id": 106, + "height": null, + "hideTimeOverride": false, + "id": 146, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -15495,42 +19903,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_raft_message_recv_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_recv_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_recv_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Receive messages per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15538,6 +19959,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15546,6 +19968,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15556,88 +19979,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different types of Raft messages that are sent", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 7 }, - "hiddenSeries": false, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 147, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Messages", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15645,6 +20092,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15653,6 +20101,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15663,88 +20112,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of vote messages that are sent in Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 14 }, - "hiddenSeries": false, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 148, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"vote\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"vote\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"vote\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Vote", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15752,14 +20225,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15770,88 +20245,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of dropped Raft messages per type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 14 }, - "hiddenSeries": false, - "id": 1309, + "height": null, + "hideTimeOverride": false, + "id": 149, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_dropped_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_dropped_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_dropped_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft dropped messages", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15859,6 +20358,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15867,6 +20367,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15877,95 +20378,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Message", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 14 + "y": 0 }, - "id": 2752, + "height": null, + "hideTimeOverride": false, + "id": 150, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of admin proposals", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 11 + "y": 0 }, - "id": 76, + "height": null, + "hideTimeOverride": false, + "id": 151, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"conf_change|transfer_leader\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"conf_change|transfer_leader\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"conf_change|transfer_leader\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Admin proposals", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15973,6 +20533,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15981,6 +20542,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15991,79 +20553,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of the processed apply command", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 0 }, - "id": 77, + "height": null, + "hideTimeOverride": false, + "id": 152, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_admin_cmd_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status=\"success\", type!=\"compact\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_admin_cmd_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"compact\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_admin_cmd_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_admin_cmd_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"compact\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Admin apply", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16071,14 +20666,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16089,79 +20686,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The number of raftstore split checksss", + "description": "The number of raftstore split checks", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 18 + "y": 7 }, - "id": 70, + "height": null, + "hideTimeOverride": false, + "id": 153, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_check_split_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"ignore\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_check_split_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"ignore\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_check_split_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"ignore\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Check split", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16169,6 +20799,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -16177,6 +20808,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16187,80 +20819,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when running split check in .9999", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 18 + "y": 7 }, - "id": 71, + "height": null, + "hideTimeOverride": false, + "id": 154, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_raftstore_check_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_check_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_duration_seconds_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_check_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Check split duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16268,14 +20932,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16286,94 +20952,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {} - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 14 }, - "hiddenSeries": false, - "id": 3636, + "height": null, + "hideTimeOverride": false, + "id": 155, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 400, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_load_base_split_event[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_load_base_split_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 - }, - { - "expr": "", - "interval": "", - "legendFormat": "", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_load_base_split_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Load base split event", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16381,6 +21065,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -16389,6 +21074,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16399,105 +21085,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 36 + "x": 12, + "y": 14 }, - "hiddenSeries": false, - "id": 23763572060, + "height": null, + "hideTimeOverride": false, + "id": 156, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.80, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.8,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "80%-{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.8,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.90, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%-{{instance}}", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_load_base_split_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_load_base_split_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_load_base_split_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "avg-{{instance}}", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_load_base_split_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_load_base_split_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Load base split duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16505,6 +21228,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -16513,6 +21237,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16523,87 +21248,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 36 + "w": 24, + "x": 0, + "y": 21 }, - "hiddenSeries": false, - "id": 23763573619, + "height": null, + "hideTimeOverride": false, + "id": 157, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 300, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_raftstore_peer_in_flashback_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_peer_in_flashback_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_peer_in_flashback_state", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_raftstore_peer_in_flashback_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Peer in Flashback State", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16611,7 +21361,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -16619,6 +21370,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16629,166 +21381,98 @@ ], "yaxis": { "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 25 - }, - "hiddenSeries": false, - "id": 3637, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.3.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "tikv_read_qps_topn{order=\"0\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TopN QPS exceeds threshold", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Admin", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 15 + "y": 0 }, - "id": 12797, + "height": null, + "hideTimeOverride": false, + "id": 158, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 16 + "y": 0 }, - "hiddenSeries": false, - "id": 12882, + "height": null, + "hideTimeOverride": false, + "id": 159, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -16796,66 +21480,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16863,6 +21551,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 10, @@ -16871,6 +21560,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16881,43 +21571,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 16 + "y": 0 }, - "hiddenSeries": false, - "id": 12886, + "height": null, + "hideTimeOverride": false, + "id": 160, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -16925,66 +21628,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC kv sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16992,6 +21699,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 10, @@ -17000,6 +21708,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17010,91 +21719,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 7 }, - "hiddenSeries": false, - "id": 12881, + "height": null, + "hideTimeOverride": false, + "id": 161, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raft log GC write operations ", + "title": "Raft log GC write operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17102,6 +21832,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17110,6 +21841,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17120,91 +21852,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 7 }, - "hiddenSeries": false, - "id": 12884, + "height": null, + "hideTimeOverride": false, + "id": 162, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_seek_operations_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_seek_operations_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_seek_operations_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC seek operations ", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17212,6 +21965,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17220,6 +21974,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17230,91 +21985,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 14 }, - "hiddenSeries": false, - "id": 12887, + "height": null, + "hideTimeOverride": false, + "id": 163, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_log_lag_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_log_lag_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_log_lag_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log lag", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17322,6 +22098,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -17330,6 +22107,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17340,91 +22118,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 14 }, - "hiddenSeries": false, - "id": 12975, + "height": null, + "hideTimeOverride": false, + "id": 164, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_skipped{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_skipped\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{reason}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{reason}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_skipped\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log gc skipped", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17432,6 +22231,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -17440,6 +22240,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17450,91 +22251,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 21 }, - "hiddenSeries": false, - "id": 12974, + "height": null, + "hideTimeOverride": false, + "id": 165, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_failed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_failed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_failed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC failed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17542,6 +22364,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17550,6 +22373,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17560,91 +22384,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572229, + "height": null, + "hideTimeOverride": false, + "id": 166, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_entry_fetches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_entry_fetches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_entry_fetches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raft log fetch", + "title": "Raft log fetch ", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17652,6 +22497,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17660,6 +22506,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17670,124 +22517,153 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 48 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572555, + "height": null, + "hideTimeOverride": false, + "id": 167, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": false + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "/pending-task/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, "transform": "negative-Y", - "yaxis": 2 + "yaxis": 2, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_worker_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\", name=~\"raftlog-fetch-worker\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog-fetch-worker\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "pending-task", - "refId": "D" + "metric": "", + "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog-fetch-worker\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log async fetch task duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17804,6 +22680,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17814,48 +22691,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Log", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 16 + "y": 0 }, - "id": 2753, + "height": null, + "hideTimeOverride": false, + "id": 168, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of rejections from the local read thread and The number of total requests", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 12 + "y": 0 }, - "id": 2292, + "height": null, + "hideTimeOverride": false, + "id": 169, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -17863,56 +22790,95 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { - "alias": "/.*-total/i", - "yaxis": 2 + "alias": "/.*-total/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, + "yaxis": 2, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_local_read_reject_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-reject-by-{{reason}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_local_read_executed_requests{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-total", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_local_read_executed_stale_read_requests{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-stale-read", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Local reader requests", + "title": "Raft log async fetch task duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17920,7 +22886,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -17928,6 +22895,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17938,86 +22906,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, - "title": "Local reader", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Local Reader", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 17 + "y": 0 }, - "id": 4200, + "height": null, + "hideTimeOverride": false, + "id": 170, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time used by each level in the unified read pool per second. Level 0 refers to small queries.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 0 }, - "id": 4194, + "height": null, + "hideTimeOverride": false, + "id": 171, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 250, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_multilevel_level_elapsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{level}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Time used by level", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18025,7 +23061,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -18033,6 +23070,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18043,69 +23081,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The chance that level 0 (small) tasks are scheduled in the unified read pool.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 0 }, - "id": 4196, + "height": null, + "hideTimeOverride": false, + "id": 172, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_multilevel_level0_chance{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{type}}", + "metric": "", + "query": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Level 0 chance", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18113,6 +23194,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -18121,6 +23203,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18131,70 +23214,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of concurrently running tasks in the unified read pool.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 7 }, - "id": 4198, + "height": null, + "hideTimeOverride": false, + "id": 173, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(avg_over_time(tikv_unified_read_pool_running_tasks{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Running tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18202,6 +23327,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -18210,6 +23336,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18220,10 +23347,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -18233,158 +23361,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 26 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572469, + "id": 174, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified-read.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Unified Read Pool Wait Duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Unified read pool task execution time during one schedule.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 34 + "y": 14 }, - "id": 4199, + "height": null, + "hideTimeOverride": false, + "id": 175, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Duration of One Time Slice", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18392,6 +23632,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -18400,6 +23641,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18410,101 +23652,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Unified read pool task total execution duration.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 34 + "y": 14 }, - "id": 4202, + "height": null, + "hideTimeOverride": false, + "id": 176, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Task Execute Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18512,6 +23833,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -18520,6 +23842,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18530,101 +23853,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Task schedule number of times.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 42 + "y": 21 }, - "id": 4204, + "height": null, + "hideTimeOverride": false, + "id": 177, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Task Schedule Times", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18632,6 +24034,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 2, @@ -18640,6 +24043,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18650,43 +24054,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Unified Read Pool", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 18 + "y": 0 }, - "id": 2754, + "height": null, + "hideTimeOverride": false, + "id": 178, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total count of different kinds of commands received", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 0 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 179, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -18698,7 +24145,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -18706,39 +24153,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage command total", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18746,14 +24209,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18764,28 +24229,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of engine asynchronous request errors", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 180, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -18797,7 +24278,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -18805,40 +24286,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_engine_async_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status!~\"all|success\"}[1m])) by (status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{status}}", - "metric": "tikv_raftstore_raft_process_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async request error", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18846,7 +24342,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -18854,6 +24351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18864,10 +24362,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -18877,215 +24376,575 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous snapshot requests", + "description": "The time consumed by processing asynchronous write requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15, + "id": 181, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration", + "title": "Storage async write duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "cards": { - "cardPadding": null, - "cardRound": null + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async write duration", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous write requests", "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 22 + "x": 12, + "y": 7 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 109, + "height": null, + "hideTimeOverride": false, + "id": 182, + "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, - "targets": [ + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Storage async write duration", "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, "show": true, - "showHistogram": false + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed by processing asynchronous snapshot requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 183, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage async snapshot duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The storage async snapshot duration", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 14 }, - "hiddenSeries": false, - "id": 20000, + "height": null, + "hideTimeOverride": false, + "id": 184, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "99%", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async snapshot duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19093,6 +24952,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19101,6 +24961,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19111,10 +24972,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -19124,129 +24986,302 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The storage async snapshot duration without the involving of raftstore", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 48 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31111, + "id": 185, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Storage async snapshot duration (pure local read)", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "cards": { - "cardPadding": null, - "cardRound": null + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration without the involving of raftstore", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 60 + "x": 12, + "y": 21 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 31112, + "height": null, + "hideTimeOverride": false, + "id": 186, + "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index propose wait duration", + "title": "Storage async snapshot duration (pure local read)", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -19256,232 +25291,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "Read index propose wait duration associated with async snapshot", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 72 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31113, + "id": 187, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Read index confirm duration", + "title": "Read index propose wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index propose wait duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 67 + "x": 12, + "y": 28 }, - "hiddenSeries": false, - "id": 24763574239, + "height": null, + "hideTimeOverride": false, + "id": 188, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Full compaction duration seconds", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, { - "$$hashKey": "object:86", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "$$hashKey": "object:87", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - }, - "timeFrom": null, - "timeShift": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "unit": "s" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 75 - }, - "hiddenSeries": false, - "id": 24763574241, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_pause_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Full compaction pause duration ", + "title": "Read index propose wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19489,7 +25562,7 @@ }, "yaxes": [ { - "$$hashKey": "object:86", + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19498,190 +25571,295 @@ "show": true }, { - "$$hashKey": "object:87", - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 75 + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 24763574240, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 189, + "interval": null, "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true + "show": false }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_increment_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Full compaction per-increment duration ", + "title": "Read index confirm duration", "tooltip": { + "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, - "type": "graph", - "xaxis": { - "buckets": null, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "$$hashKey": "object:86", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:87", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "percentunit" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 35 }, - "hiddenSeries": false, - "id": 24763574242, + "height": null, + "hideTimeOverride": false, + "id": 190, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "tikv_storage_process_stat_cpu_usage", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Process Stat Cpu Usage", + "title": "Read index confirm duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19689,8 +25867,8 @@ }, "yaxes": [ { - "$$hashKey": "object:86", - "format": "percentunit", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19698,106 +25876,123 @@ "show": true }, { - "$$hashKey": "object:87", - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async write duration", + "description": "CPU usage measured over a 30 second window", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 22 + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 20001, + "height": null, + "hideTimeOverride": false, + "id": 191, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "refId": "C" + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Storage async write duration", + "title": "Process Stat Cpu Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19805,7 +26000,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -19813,6 +26009,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19823,95 +26020,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async snapshot duration without the involving of raftstore", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 48 + "y": 42 }, - "hiddenSeries": false, - "id": 31114, + "height": null, + "hideTimeOverride": false, + "id": 192, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Storage async snapshot duration (pure local read)", + "title": "Full compaction duration seconds", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19919,6 +26201,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19927,6 +26210,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19937,95 +26221,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 60 + "x": 0, + "y": 49 }, - "hiddenSeries": false, - "id": 31115, + "height": null, + "hideTimeOverride": false, + "id": 193, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read index propose wait duration", + "title": "Full compaction pause duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20033,6 +26402,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -20041,6 +26411,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20051,95 +26422,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 72 + "y": 49 }, - "hiddenSeries": false, - "id": 31116, + "height": null, + "hideTimeOverride": false, + "id": 194, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read index confirm duration", + "title": "Full compaction per-increment duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20147,6 +26603,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -20155,6 +26612,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20165,116 +26623,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Storage", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 19 + "y": 0 }, - "id": 9160, + "height": null, + "hideTimeOverride": false, + "id": 195, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 20 + "y": 0 }, - "height": "", - "hiddenSeries": false, - "id": 9552, + "height": null, + "hideTimeOverride": false, + "id": 196, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_write_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "write-{{instance}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_throttle_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "throttle-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler flow", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20282,6 +26793,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -20290,6 +26802,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20300,92 +26813,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 20 + "y": 0 }, - "height": "", - "hiddenSeries": false, - "id": 9553, + "height": null, + "hideTimeOverride": false, + "id": 197, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_discard_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) / 10000000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler discard ratio", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20393,14 +26926,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20411,10 +26946,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -20424,150 +26960,202 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 11512, + "id": 198, + "interval": null, "legend": { - "show": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_scheduler_throttle_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Throttle duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, - "bars": true, + "bars": false, "cacheTimeout": null, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of pending commands per TiKV instance", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 7 }, - "height": "", - "hiddenSeries": false, - "id": 11906, + "height": null, + "hideTimeOverride": false, + "id": 199, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, - "linewidth": 2, + "lines": true, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_scheduler_throttle_cf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"} != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "refId": "A", - "step": 40 + "query": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler throttled CF", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20575,14 +27163,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20593,35 +27183,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 9947, + "height": null, + "hideTimeOverride": false, + "id": 200, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -20635,43 +27240,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_throttle_action_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cf, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}-{{cf}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow controller actions", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20679,6 +27296,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -20687,6 +27305,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20697,120 +27316,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 14 }, - "height": "", - "hiddenSeries": false, - "id": 10338, + "height": null, + "hideTimeOverride": false, + "id": 201, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_l0_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}_l0_flow-{{instance}}", "metric": "", - "refId": "D", - "step": 40 + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_flush_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}_flush_flow-{{instance}}", "metric": "", - "refId": "E", - "step": 40 + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_l0_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_l0_flow-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_flush_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_flush_flow-{{instance}}", "metric": "", - "refId": "C", - "step": 40 + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flush/L0 flow", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20818,6 +27474,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -20826,6 +27483,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20836,109 +27494,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "height": "", - "hiddenSeries": false, - "id": 9944, + "height": null, + "hideTimeOverride": false, + "id": 202, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_scheduler_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l0-{{instance}}", "metric": "", - "refId": "A", - "step": 40 + "query": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_scheduler_memtable{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "memtable-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_scheduler_l0_avg{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg_l0-{{instance}}", "metric": "", - "refId": "C", - "step": 40 + "query": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow controller factors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20946,7 +27637,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -20954,6 +27646,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20964,41 +27657,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "hiddenSeries": false, - "id": 9946, + "height": null, + "hideTimeOverride": false, + "id": 203, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -21006,65 +27714,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cf", - "yaxis": 2 - }, - { - "alias": "pending-bytes", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "max(tikv_scheduler_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) / 10000000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "pending-bytes-{{instance}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction pending bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21072,6 +27785,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -21080,90 +27794,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "30", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Throttle time for txn storage commands in 1 minute.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572363, + "height": null, + "hideTimeOverride": false, + "id": 204, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "total": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_txn_command_throttle_time_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Txn command throttled duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21171,7 +27918,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -21179,6 +27927,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21189,80 +27938,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Throttle time for non-txn related processing like analyze or dag in 1 minute.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572365, + "height": null, + "hideTimeOverride": false, + "id": 205, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "total": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_non_txn_command_throttle_time_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Non-txn command throttled duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21270,7 +28051,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -21278,6 +28060,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21288,57 +28071,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Flow Control", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 20 + "y": 0 }, - "id": 2756, + "height": null, + "hideTimeOverride": false, + "id": 206, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of commands on each stage in commit command", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 24, "x": 0, - "y": 18 + "y": 0 }, - "height": "400", - "hiddenSeries": false, - "id": 168, + "height": null, + "hideTimeOverride": false, + "id": 207, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -21346,58 +28170,70 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "busy", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "busy-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{stage}}", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler stage total", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21405,6 +28241,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -21413,6 +28250,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21423,116 +28261,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 7 }, - "hiddenSeries": false, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 208, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler command duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21540,6 +28442,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -21548,6 +28451,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21558,116 +28462,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time which is caused by latch wait in commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 7 }, - "hiddenSeries": false, - "id": 194, + "height": null, + "hideTimeOverride": false, + "id": 209, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_latch_wait_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_latch_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler latch wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21675,6 +28643,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -21683,6 +28652,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21693,116 +28663,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys read by a commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 195, + "height": null, + "hideTimeOverride": false, + "id": 210, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_kv_command_key_read_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "kv_command_key", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_kv_command_key_read_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_kv_command_key_read_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_kv_command_key_read_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler keys read", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21810,7 +28844,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -21818,6 +28853,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21828,116 +28864,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys written by a commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 373, + "height": null, + "hideTimeOverride": false, + "id": 211, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_kv_command_key_write_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "kv_command_key", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_kv_command_key_write_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_kv_command_key_write_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_kv_command_key_write_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler keys written", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21945,7 +29045,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -21953,6 +29054,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21963,93 +29065,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of each CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 560, + "height": null, + "hideTimeOverride": false, + "id": 212, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22057,7 +29178,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22065,6 +29187,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22075,93 +29198,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of lock CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 675, + "height": null, + "hideTimeOverride": false, + "id": 213, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"lock\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [lock]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22169,7 +29311,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22177,6 +29320,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22187,93 +29331,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of write CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 28 }, - "id": 829, + "height": null, + "hideTimeOverride": false, + "id": 214, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"write\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [write]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22281,7 +29444,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22289,6 +29453,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22299,93 +29464,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of default CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 28 }, - "id": 830, + "height": null, + "hideTimeOverride": false, + "id": 215, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"default\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [default]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22393,7 +29577,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22401,6 +29586,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22411,125 +29597,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed on reading when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572710, + "height": null, + "hideTimeOverride": false, + "id": 216, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", + "intervalFactor": 1, + "legendFormat": "avg", "metric": "", - "refId": "B", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_scheduler_processing_read_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_processing_read_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", + "intervalFactor": 1, + "legendFormat": "count", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler command read duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22537,6 +29778,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -22545,6 +29787,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22555,10 +29798,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -22568,72 +29812,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed on checking memory locks", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7236, + "id": 217, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, "repeat": null, "repeatDirection": null, - "repeatedByRow": true, "reverseYBuckets": false, - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_check_mem_lock_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Check memory locks duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -22641,47 +29907,93 @@ } ], "repeat": "command", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Scheduler - $command", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 26 + "y": 0 }, - "id": 2755, + "height": null, + "hideTimeOverride": false, + "id": 218, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of commands on each stage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 12, "x": 0, - "y": 15 + "y": 0 }, - "height": "400", - "id": 167, + "height": null, + "hideTimeOverride": false, + "id": 219, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -22689,47 +30001,70 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "busy", - "refId": "A", - "step": 20 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{stage}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{stage}}", - "refId": "B", - "step": 20 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler stage total", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22737,6 +30072,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -22745,6 +30081,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22755,36 +30092,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total writing bytes of commands on each stage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 12, "x": 12, - "y": 15 + "y": 0 }, - "height": "400", - "id": 3834, + "height": null, + "hideTimeOverride": false, + "id": 220, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -22792,39 +30149,55 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_writing_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 20 + "metric": "", + "query": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler writing bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22832,6 +30205,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -22840,7 +30214,8 @@ "show": true }, { - "format": "bytes", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -22850,81 +30225,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different priority commands", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 7 }, - "height": "", - "id": 1, + "height": null, + "hideTimeOverride": false, + "id": 221, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_commands_pri_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{priority}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler priority commands", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22932,6 +30338,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -22940,6 +30347,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22950,123 +30358,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 300 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "120s", - "handler": 1, - "message": "TiKV scheduler context total", - "name": "scheduler pending commands alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of pending commands per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 7 }, - "height": "", - "id": 193, + "height": null, + "hideTimeOverride": false, + "id": 222, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_contex_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 40 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 300 + "query": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler pending commands", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23074,6 +30471,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -23082,6 +30480,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23092,10 +30491,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -23105,108 +30505,188 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 45 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572468, + "id": 223, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Txn Scheduler Pool Wait Duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Scheduler", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 27 + "y": 0 }, - "id": 2758, + "height": null, + "hideTimeOverride": false, + "id": 224, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of GC tasks processed by gc_worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 0 }, - "id": 121, + "height": null, + "hideTimeOverride": false, + "id": 225, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23214,62 +30694,100 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_tasks_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total-{{task}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_storage_gc_skipped_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "skipped-{{task}}", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gcworker_gc_task_fail_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "failed-{{task}}", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_worker_too_busy{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "gcworker-too-busy", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23277,7 +30795,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -23285,6 +30804,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23295,35 +30815,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing GC tasks", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 0 }, - "id": 2224, + "height": null, + "hideTimeOverride": false, + "id": 226, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23331,62 +30872,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max-{{task}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%-{{task}}", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%-{{task}}", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gcworker_gc_task_duration_vec_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task) / sum(rate(tikv_gcworker_gc_task_duration_vec_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average-{{task}}", - "refId": "D" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC tasks duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23394,6 +30996,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -23402,6 +31005,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23412,76 +31016,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The GC duration", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 7 }, - "id": 969, + "height": null, + "hideTimeOverride": false, + "id": 227, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1.0, sum(rate(tidb_tikvclient_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance, le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiDB GC seconds", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23489,6 +31129,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -23497,6 +31138,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23507,79 +31149,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, "description": "The count of TiDB GC worker actions", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 7 }, - "id": 966, + "height": null, + "hideTimeOverride": false, + "id": 228, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_tikvclient_gc_worker_actions_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiDB GC worker actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23587,14 +31262,16 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23605,35 +31282,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Progress of ResolveLocks, the first phase of GC", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 14 }, - "id": 2823, + "height": null, + "hideTimeOverride": false, + "id": 229, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23641,41 +31339,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tidb_tikvclient_range_task_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"resolve-locks.*\"}) by (result)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{result}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "ResolveLocks Progress", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23683,7 +31395,7 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -23692,45 +31404,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Progress of TiKV's GC", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 14 }, - "id": 2821, + "height": null, + "hideTimeOverride": false, + "id": 230, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23738,41 +31472,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_gcworker_autogc_processed_regions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}) by (instance) / sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiKV Auto GC Progress", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23784,92 +31532,128 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": "1.1", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "keys / second", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 21 }, - "id": 2589, + "height": null, + "hideTimeOverride": false, + "id": 231, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_mvcc_gc_delete_versions_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{key_mode}}_keys/s", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC speed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23877,14 +31661,16 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23895,35 +31681,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "SafePoint used for TiKV's Auto GC", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 21 }, - "id": 2822, + "height": null, + "hideTimeOverride": false, + "id": 232, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23931,41 +31738,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_gcworker_autogc_safe_point{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}) by (instance) / (2^18)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiKV Auto GC SafePoint", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23982,216 +31803,217 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, - "description": " \tThe lifetime of TiDB GC", + "description": "The lifetime of TiDB GC", "editable": true, "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 74 + "y": 28 }, - "id": 27, + "height": null, + "hideTimeOverride": false, + "id": 233, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tidb_tikvclient_gc_config{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"tikv_gc_life_time\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "refId": "A", - "step": 60 + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", + "timeFrom": null, + "timeShift": null, "title": "GC lifetime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "The interval of TiDB GC", "editable": true, "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 74 + "y": 28 }, - "id": 28, + "height": null, + "hideTimeOverride": false, + "id": 234, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tidb_tikvclient_gc_config{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"tikv_gc_run_interval\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 60 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", + "timeFrom": null, + "timeShift": null, "title": "GC interval", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Keys handled in GC compaction filter", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 74 + "y": 35 }, - "id": 6596, + "height": null, + "hideTimeOverride": false, + "id": 235, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24199,97 +32021,175 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{key_mode}}_filtered", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_skip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_skipped", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_mvcc_rollback{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-rollback/mvcc-lock", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_orphan_versions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_orphan-versions", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_perform{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_performed-times", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_failure{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode,type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_failure-{{type}}", - "refId": "F" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_met{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-met", - "refId": "G" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_handled{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-handled", - "refId": "H" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_wasted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-wasted", - "refId": "I" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC in Compaction Filter", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24298,7 +32198,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24306,45 +32206,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "GC scan write details", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 86 + "y": 42 }, - "id": 8767, + "height": null, + "hideTimeOverride": false, + "id": 236, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24352,37 +32274,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"write\"}[1m])) by (key_mode,tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_{{tag}}", - "refId": "A" + "legendFormat": "{{key_mode}}-{{tag}}", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC scan write details", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24391,7 +32331,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24399,45 +32339,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "GC scan default details", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 86 + "y": 42 }, - "id": 8768, + "height": null, + "hideTimeOverride": false, + "id": 237, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24445,37 +32407,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"default\"}[1m])) by (key_mode,tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_{{tag}}", - "refId": "A" + "legendFormat": "{{key_mode}}-{{tag}}", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC scan default details", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24484,7 +32464,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24492,69 +32472,109 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "GC", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 28 + "y": 0 }, - "id": 2759, + "height": null, + "hideTimeOverride": false, + "id": 238, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The rate of Raft snapshot messages sent", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 0 }, - "hiddenSeries": false, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 239, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24562,43 +32582,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": " ", - "refId": "A", - "step": 60 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Rate snapshot message", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24606,6 +32638,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -24614,6 +32647,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24624,43 +32658,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of snapshots in different states", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 0 }, - "hiddenSeries": false, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 240, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24668,44 +32715,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, - "steppedLine": true, + "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_snapshot_traffic_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 60 + "query": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot state count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24713,7 +32771,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24721,6 +32780,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24731,43 +32791,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time snapshot generation tasks waited to be scheduled. ", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 7 }, - "hiddenSeries": false, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 241, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24775,45 +32848,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Snapshot generation wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24821,6 +32904,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -24829,6 +32913,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24839,43 +32924,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when handling snapshots", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573704, + "height": null, + "hideTimeOverride": false, + "id": 242, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24883,59 +32981,85 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_send_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "send", - "refId": "A", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "apply", - "refId": "B", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"generate\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "generate", - "refId": "C", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Handle snapshot duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24943,6 +33067,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -24951,6 +33076,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24961,43 +33087,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The snapshot size (P99.99).9999", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 14 }, - "hiddenSeries": false, - "id": 44, + "height": null, + "hideTimeOverride": false, + "id": 243, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25005,44 +33144,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_snapshot_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "size", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Snapshot size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25050,6 +33200,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -25058,6 +33209,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25068,43 +33220,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of KV within a snapshot in .9999", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 14 }, - "hiddenSeries": false, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 244, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25112,44 +33277,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_snapshot_kv_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "metric": "tikv_snapshot_kv_count_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Snapshot KV count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25157,7 +33333,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -25165,6 +33342,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25175,43 +33353,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Action stats for snapshot generating and applying", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 21 }, - "hiddenSeries": false, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 245, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25219,49 +33410,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_snapshot_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type, status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}-{{status}}", - "refId": "A" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_raftstore_clean_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "clean-region-by-{{type}}", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot Actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25269,61 +33481,76 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The speed of sending or receiving snapshot", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 21 }, - "hiddenSeries": false, - "id": 4201, + "height": null, + "hideTimeOverride": false, + "id": 246, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25331,52 +33558,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(tikv_snapshot_limit_transport_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}--{{type}}", - "metric": "tikv_snapshot_limit_transport_bytes", - "refId": "A", - "step": 40 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "rate(tikv_snapshot_limit_generate_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}--generate", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}-generate", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot transport speed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25384,74 +33629,118 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Snapshot", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 29 + "y": 0 }, - "id": 2760, + "height": null, + "hideTimeOverride": false, + "id": 247, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of tasks handled by worker", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 0 }, - "id": 59, + "height": null, + "hideTimeOverride": false, + "id": 248, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25459,39 +33748,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_worker_handled_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Worker handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25499,14 +33804,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25517,40 +33824,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tCurrent pending and running tasks of worker", + "description": "Current pending and running tasks of worker", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 0 }, - "id": 1395, + "height": null, + "hideTimeOverride": false, + "id": 249, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25558,39 +33881,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_worker_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Worker pending tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25598,14 +33937,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25616,40 +33957,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of tasks handled by future_pool", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 7 }, - "id": 1876, + "height": null, + "hideTimeOverride": false, + "id": 250, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25657,39 +34014,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_handled_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "FuturePool handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25697,14 +34070,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25715,40 +34090,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Current pending and running tasks of future_pool", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 7 }, - "id": 1877, + "height": null, + "hideTimeOverride": false, + "id": 251, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25756,39 +34147,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "FuturePool pending tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25796,14 +34203,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25814,100 +34223,150 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Task", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 30 + "y": 0 }, - "id": 2757, + "height": null, + "hideTimeOverride": false, + "id": 252, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#5195ce", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed to handle coprocessor read requests", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 22 + "y": 0 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3062, + "id": 253, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Request duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -25916,33 +34375,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": "The time consumed to handle coprocessor read requests", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 22 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 254, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25950,46 +34426,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Request duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25997,56 +34550,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 30 + "y": 7 }, - "id": 74, + "height": null, + "hideTimeOverride": false, + "id": 255, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26054,41 +34627,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{req}}", - "metric": "tikv_coprocessor_request_error", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26096,56 +34683,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "ops", - "label": "", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 30 + "y": 7 }, - "id": 3128, + "height": null, + "hideTimeOverride": false, + "id": 256, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26153,41 +34760,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{reason}}", - "metric": "tikv_coprocessor_request_error", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Request Errors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26195,56 +34816,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 14 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 257, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26252,38 +34893,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Total KV Cursor Operations", + "title": "KV Cursor Operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26291,56 +34949,76 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 14 }, - "id": 3129, + "height": null, + "hideTimeOverride": false, + "id": 258, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26348,45 +35026,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, avg(rate(tikv_coprocessor_scan_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, req)) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "100%-{{req}}", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, avg(rate(tikv_coprocessor_scan_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, req)) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%-{{req}}", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "KV Cursor Operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26394,57 +35150,76 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 2118, + "height": null, + "hideTimeOverride": false, + "id": 259, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26452,46 +35227,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "key_skipped", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"internal_delete_skipped_count\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "delete_skipped-{{req}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total RocksDB Perf Statistics", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26499,57 +35283,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { "decimals": null, "format": "short", - "label": "", + "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 551, + "height": null, + "hideTimeOverride": false, + "id": 260, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26557,41 +35360,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_response_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "size", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Response Size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26599,72 +35416,118 @@ }, "yaxes": [ { - "decimals": 0, - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Coprocessor Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 31 + "y": 0 }, - "id": 3197, + "height": null, + "hideTimeOverride": false, + "id": 261, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when handling coprocessor requests", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 23 + "y": 0 }, - "id": 113, + "height": null, + "hideTimeOverride": false, + "id": 262, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26672,46 +35535,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Handle duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26719,58 +35659,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", - "label": "", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "decimals": 1, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The time consumed to handle coprocessor requests per TiKV instance (P95)", + "description": "The time consumed to handle coprocessor requests per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 23 + "y": 0 }, - "id": 117, + "height": null, + "hideTimeOverride": false, + "id": 263, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26778,39 +35736,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{req}}", - "refId": "B", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "95% Handle duration by store", + "title": "Handle duration by store", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26818,56 +35860,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when coprocessor requests are wait for being handled", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 30 + "y": 7 }, - "id": 111, + "height": null, + "hideTimeOverride": false, + "id": 264, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26875,46 +35937,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "D" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26922,56 +36061,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when coprocessor requests are wait for being handled in each TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 30 + "y": 7 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 265, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26979,39 +36138,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{req}}", - "refId": "B", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "95% Wait duration by store", + "title": "Wait duration by store", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27019,57 +36262,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "decimals": 1, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 14 }, - "id": 3195, + "height": null, + "hideTimeOverride": false, + "id": 266, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27077,41 +36339,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_dag_request_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (vec_type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{vec_type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total DAG Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27119,57 +36395,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of DAG executors", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 14 }, - "id": 3264, + "height": null, + "hideTimeOverride": false, + "id": 267, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27177,41 +36472,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_executor_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total DAG Executors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27219,56 +36528,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 552, + "height": null, + "hideTimeOverride": false, + "id": 268, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27276,41 +36605,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"select\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details (Table Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27318,56 +36661,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 3263, + "height": null, + "hideTimeOverride": false, + "id": 269, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27375,41 +36738,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"index\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details (Index Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27417,56 +36794,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 28 }, - "id": 122, + "height": null, + "hideTimeOverride": false, + "id": 270, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27474,42 +36871,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"select\"}[1m])) by (tag,cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{cf}}-{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details by CF (Table Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27517,56 +36927,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 28 }, - "id": 554, + "height": null, + "hideTimeOverride": false, + "id": 271, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27574,43 +37004,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": "cf", - "repeatDirection": "h", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"index\"}[1m])) by (tag,cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{cf}}-{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details by CF (Index Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27618,144 +37060,423 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#5195ce", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed on checking memory locks for coprocessor requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 119 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7594, + "id": 272, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_mem_lock_check_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Memory lock checking duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed on checking memory locks for coprocessor requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 273, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory lock checking duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Coprocessor Detail", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 32 + "y": 0 }, - "id": 2761, + "height": null, + "hideTimeOverride": false, + "id": 274, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 0 }, - "id": 2108, + "height": null, + "hideTimeOverride": false, + "id": 275, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27763,45 +37484,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_threads_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, state)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{state}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_threads_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-total", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Threads state", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27809,6 +37555,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -27817,6 +37564,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -27827,28 +37575,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 0 }, - "id": 2258, + "height": null, + "hideTimeOverride": false, + "id": 276, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -27862,40 +37632,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, sum(rate(tikv_threads_io_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name, io) > 1024)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}-{{io}}", - "refId": "A", - "step": 4 + "legendFormat": "{{name}}", + "metric": "", + "query": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Threads IO", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27903,7 +37688,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -27911,6 +37697,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -27921,28 +37708,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 7 }, - "id": 2660, + "height": null, + "hideTimeOverride": false, + "id": 277, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -27956,40 +37765,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, max(rate(tikv_thread_voluntary_context_switches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name) > 200)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Thread Voluntary Context Switches", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27997,6 +37821,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -28005,6 +37830,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28015,28 +37841,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 7 }, - "id": 2661, + "height": null, + "hideTimeOverride": false, + "id": 278, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -28050,40 +37898,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, max(rate(tikv_thread_nonvoluntary_context_switches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name) > 100)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Thread Nonvoluntary Context Switches", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28091,6 +37954,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -28099,6 +37963,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28109,57 +37974,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Threads", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 33 + "y": 0 }, - "id": 2762, + "height": null, + "hideTimeOverride": false, + "id": 279, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of get operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 59 + "y": 0 }, - "hiddenSeries": false, - "id": 138, + "height": null, + "hideTimeOverride": false, + "id": 280, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28167,83 +38073,115 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"memtable_hit\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "memtable", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=~\"block_cache_data_hit|block_cache_filter_hit\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "block_cache", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l0\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l0", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l1\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l1", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l2_and_up\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l2_and_up", - "refId": "F", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28251,6 +38189,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28259,6 +38198,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28269,41 +38209,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing get operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 59 + "y": 0 }, - "hiddenSeries": false, - "id": 82, + "height": null, + "hideTimeOverride": false, + "id": 281, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28311,73 +38266,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28385,7 +38367,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28393,6 +38376,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28403,41 +38387,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of seek operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 7 }, - "hiddenSeries": false, - "id": 129, + "height": null, + "hideTimeOverride": false, + "id": 282, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28445,95 +38444,130 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_seek\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "seek", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_seek_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "seek_found", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_next\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "next", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_next_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "next_found", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_prev\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "prev", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_prev_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "prev_found", "metric": "", - "refId": "F", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Seek operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28541,6 +38575,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28549,6 +38584,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28559,41 +38595,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing seek operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 7 }, - "hiddenSeries": false, - "id": 125, + "height": null, + "hideTimeOverride": false, + "id": 283, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28601,73 +38652,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Seek duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28675,7 +38753,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28683,6 +38762,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28693,41 +38773,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of write operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 75 + "y": 14 }, - "hiddenSeries": false, - "id": 139, + "height": null, + "hideTimeOverride": false, + "id": 284, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28735,65 +38830,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=~\"write_done_by_self|write_done_by_other\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "done", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_timeout\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "timeout", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_with_wal\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "with_wal", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28801,6 +38916,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28809,6 +38925,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28819,41 +38936,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 75 + "y": 14 }, - "hiddenSeries": false, - "id": 126, + "height": null, + "hideTimeOverride": false, + "id": 285, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28861,73 +38993,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28935,7 +39094,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28943,6 +39103,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28953,41 +39114,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe count of WAL sync operations", + "description": "The count of WAL sync operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 83 + "y": 21 }, - "hiddenSeries": false, - "id": 137, + "height": null, + "hideTimeOverride": false, + "id": 286, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28995,50 +39171,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_wal_file_synced{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "sync", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL sync operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29046,6 +39227,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29054,6 +39236,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29064,41 +39247,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing write wal operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 83 + "y": 21 }, - "hiddenSeries": false, - "id": 130, + "height": null, + "hideTimeOverride": false, + "id": 287, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29106,73 +39304,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write WAL duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29180,7 +39405,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -29188,6 +39414,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29198,41 +39425,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of compaction and flush operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 28 }, - "hiddenSeries": false, - "id": 128, + "height": null, + "hideTimeOverride": false, + "id": 288, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29240,50 +39482,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_event_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_engine_event_total", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29291,6 +39538,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29299,6 +39547,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29309,41 +39558,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing WAL sync operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 28 }, - "hiddenSeries": false, - "id": 135, + "height": null, + "hideTimeOverride": false, + "id": 289, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29351,74 +39615,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29426,14 +39716,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29444,41 +39736,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Compaction guard actions", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 99 + "y": 35 }, - "hiddenSeries": false, - "id": 2453, + "height": null, + "hideTimeOverride": false, + "id": 290, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29486,50 +39793,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_compaction_guard_action_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=~\"default|write\"}[1m])) by (cf, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cf}}-{{type}}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{ type}}", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction guard actions", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29537,6 +39849,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29545,6 +39858,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29555,41 +39869,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing the compaction and flush operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 99 + "y": 35 }, - "hiddenSeries": false, - "id": 136, + "height": null, + "hideTimeOverride": false, + "id": 291, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29597,74 +39926,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29672,7 +40027,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -29680,6 +40036,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29690,41 +40047,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when reading SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 107 + "y": 42 }, - "hiddenSeries": false, - "id": 140, + "height": null, + "hideTimeOverride": false, + "id": 292, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29732,77 +40104,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", "metric": "", - "refId": "C", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "SST read duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29810,14 +40205,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 10, + "logBase": 2, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29828,40 +40225,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 107 + "y": 42 }, - "hiddenSeries": false, - "id": 2451, + "height": null, + "hideTimeOverride": false, + "id": 293, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29869,51 +40282,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_compaction_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (cf, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{cf}} - {{reason}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{reason}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29921,59 +40338,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The block cache size. Broken down by column family if shared block cache is disabled.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 115 + "y": 49 }, - "hiddenSeries": false, - "id": 102, + "height": null, + "hideTimeOverride": false, + "id": 294, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29981,49 +40415,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, avg(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by(cf, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30031,6 +40471,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -30039,6 +40480,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -30049,41 +40491,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of memtable", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 115 + "y": 49 }, - "hiddenSeries": false, - "id": 88, + "height": null, + "hideTimeOverride": false, + "id": 295, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30091,49 +40548,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"memtable_hit\"}[1m])) / (sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"memtable_hit\"}[1m])) + sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"memtable_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "hit", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memtable hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30141,60 +40604,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of block cache operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 123 + "y": 56 }, - "height": "", - "hiddenSeries": false, - "id": 467, + "height": null, + "hideTimeOverride": false, + "id": 296, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30202,116 +40681,145 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_byte_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "total_read", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_byte_write\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "total_written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "data_insert", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "filter_insert", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_bytes_evict\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "filter_evict", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "index_insert", "metric": "", - "refId": "F", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_bytes_evict\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "index_evict", "metric": "", - "refId": "G", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30319,59 +40827,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of block cache", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 123 + "y": 56 }, - "hiddenSeries": false, - "id": 80, + "height": null, + "hideTimeOverride": false, + "id": 297, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30379,87 +40904,115 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "all", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "data", "metric": "", - "refId": "D", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "filter", "metric": "", - "refId": "B", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "index", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_bloom_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bloom_prefix_useful\"}[1m])) / sum(rate(tikv_engine_bloom_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bloom_prefix_checked\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "bloom prefix", "metric": "", - "refId": "E", - "step": 10 + "query": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30467,60 +41020,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of operations on keys", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 131 + "y": 63 }, - "height": "", - "hiddenSeries": false, - "id": 132, + "height": null, + "hideTimeOverride": false, + "id": 298, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30528,72 +41097,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"keys_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "read", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"keys_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_compaction_num_corrupt_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "corrupt", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30601,59 +41183,76 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of block cache operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 131 + "y": 63 }, - "hiddenSeries": false, - "id": 468, + "height": null, + "hideTimeOverride": false, + "id": 299, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30661,86 +41260,115 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_add", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "data_add", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "filter_add", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "index_add", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_add_failures\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "add_failures", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30748,6 +41376,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -30756,6 +41385,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -30766,42 +41396,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow rate of read operations per type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 139 + "y": 70 }, - "height": "", - "hiddenSeries": false, - "id": 85, + "height": null, + "hideTimeOverride": false, + "id": 300, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30809,61 +41453,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "get", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"iter_bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "scan", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30871,59 +41524,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys in each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 139 + "y": 70 }, - "hiddenSeries": false, - "id": 131, + "height": null, + "hideTimeOverride": false, + "id": 301, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30931,51 +41601,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_estimate_num_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_estimate_num_keys", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30983,60 +41657,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of write operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 147 + "y": 77 }, - "height": "", - "hiddenSeries": false, - "id": 86, + "height": null, + "hideTimeOverride": false, + "id": 302, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31044,59 +41734,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"wal_file_bytes\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "wal", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31104,59 +41805,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The bytes per read", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 147 + "y": 77 }, - "hiddenSeries": false, - "id": 133, + "height": null, + "hideTimeOverride": false, + "id": 303, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31164,74 +41882,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Read", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31239,14 +41983,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -31257,41 +42003,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow rate of compaction operations per type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 155 + "y": 84 }, - "hiddenSeries": false, - "id": 90, + "height": null, + "hideTimeOverride": false, + "id": 304, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31299,68 +42060,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_compaction_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "read", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_compaction_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"flush_write_bytes\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "flushed", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31368,59 +42146,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The bytes per write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 155 + "y": 84 }, - "hiddenSeries": false, - "id": 134, + "height": null, + "hideTimeOverride": false, + "id": 305, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31428,74 +42223,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Write", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31503,14 +42324,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, - "logBase": 10, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -31521,41 +42344,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The read amplification per TiKV instance \t", + "description": "The read amplification per TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 163 + "y": 91 }, - "hiddenSeries": false, - "id": 518, + "height": null, + "hideTimeOverride": false, + "id": 306, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31563,51 +42401,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_read_amp_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"read_amp_total_read_bytes\"}[1m])) by (instance) / sum(rate(tikv_engine_read_amp_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"read_amp_estimate_useful_bytes\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read amplication", + "title": "Read amplification", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31615,59 +42457,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The pending bytes to be compacted", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 163 + "y": 91 }, - "hiddenSeries": false, - "id": 127, + "height": null, + "hideTimeOverride": false, + "id": 307, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31675,51 +42534,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction pending bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31727,59 +42590,76 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of snapshot of each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 171 + "y": 98 }, - "hiddenSeries": false, - "id": 516, + "height": null, + "hideTimeOverride": false, + "id": 308, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31787,51 +42667,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_num_snapshots{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 10 + "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Number of snapshots", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31839,59 +42723,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The compression ratio of each level", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 171 + "y": 98 }, - "hiddenSeries": false, - "id": 863, + "height": null, + "hideTimeOverride": false, + "id": 309, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31899,51 +42800,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_compression_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf, level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{cf}} - level - {{level}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-L{{level}}", "metric": "", - "refId": "A", - "step": 10 + "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compression ratio", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31951,57 +42856,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of SST files for different column families in each level", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 179 + "y": 105 }, - "hiddenSeries": false, - "id": 2002, + "height": null, + "hideTimeOverride": false, + "id": 310, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32009,48 +42933,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_num_files_at_level{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf, level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "cf-{{cf}}, level-{{level}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-L{{level}}", + "metric": "", + "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Number files at each level", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32058,6 +42989,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32066,6 +42998,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32076,41 +43009,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time that the oldest unreleased snapshot survivals", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 179 + "y": 105 }, - "hiddenSeries": false, - "id": 517, + "height": null, + "hideTimeOverride": false, + "id": 311, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32118,51 +43066,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_oldest_snapshot_duration{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_engine_oldest_snapshot_duration", - "refId": "A", - "step": 10 + "metric": "", + "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Oldest snapshots duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32170,58 +43122,76 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Stall conditions changed of each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 187 + "y": 112 }, - "hiddenSeries": false, - "id": 2381, + "height": null, + "hideTimeOverride": false, + "id": 312, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32229,48 +43199,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_stall_conditions_changed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}-{{type}}", - "refId": "B" + "metric": "", + "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Stall conditions changed of each CF", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32278,6 +43255,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32286,6 +43264,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32296,39 +43275,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when ingesting SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 187 + "y": 112 }, - "hiddenSeries": false, - "id": 2003, + "height": null, + "hideTimeOverride": false, + "id": 313, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32336,55 +43332,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_snapshot_ingest_sst_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_snapshot_ingest_sst_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) / sum(rate(tikv_snapshot_ingest_sst_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average", - "refId": "B" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Ingest SST duration seconds", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32392,6 +43456,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -32400,6 +43465,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32410,40 +43476,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 195 + "y": 119 }, - "hiddenSeries": false, - "id": 2452, + "height": null, + "hideTimeOverride": false, + "id": 314, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32451,51 +43533,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(increase(tikv_engine_write_stall_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Stall Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32503,59 +43589,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time which is caused by write stall", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 195 + "y": 119 }, - "hiddenSeries": false, - "id": 87, + "height": null, + "hideTimeOverride": false, + "id": 315, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32563,77 +43666,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", "metric": "", - "refId": "C", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write stall duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32641,14 +43767,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32659,10 +43787,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -32672,71 +43801,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The level that the external file ingests into", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 203 + "y": 126 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12712, + "id": 316, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_engine_ingestion_picked_level_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Ingestion picked level", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -32745,35 +43897,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The memtable size of each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 203 + "y": 126 }, - "hiddenSeries": false, - "id": 103, + "height": null, + "hideTimeOverride": false, + "id": 317, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32781,49 +43948,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables-all\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memtable size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32831,6 +44004,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -32839,6 +44013,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32849,54 +44024,97 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": "db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "RocksDB - $db", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 34 + "y": 0 }, - "id": 12802, + "height": null, + "hideTimeOverride": false, + "id": 318, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The count of operations per second", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 12892, + "height": null, + "hideTimeOverride": false, + "id": 319, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -32904,58 +44122,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_apply_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_read_entry_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_entry", - "refId": "B" + "metric": "", + "query": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_read_message_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_message", - "refId": "C" + "metric": "", + "query": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Operation", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32963,6 +44209,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -32971,6 +44218,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32981,38 +44229,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time used in write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 12893, + "height": null, + "hideTimeOverride": false, + "id": 320, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33020,66 +44285,124 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(raft_engine_write_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "avg", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "95%", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "99%", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "hide": false, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33087,14 +44410,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33105,38 +44430,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The I/O flow rate", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 7 }, - "hiddenSeries": false, - "id": 12896, + "height": null, + "hideTimeOverride": false, + "id": 321, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33144,50 +44486,71 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_background_rewrite_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "rewrite {{type}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "rewrite-{{type}}", + "metric": "", + "query": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33195,6 +44558,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -33203,6 +44567,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33213,38 +44578,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "99% duration breakdown of write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 7 }, - "hiddenSeries": false, - "id": 12895, + "height": null, + "hideTimeOverride": false, + "id": 322, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33252,58 +44634,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_preprocess_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "wait", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "wal", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_apply_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "apply", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Duration Breakdown (99%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33311,14 +44721,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33329,38 +44741,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The bytes per write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 48 + "y": 14 }, - "hiddenSeries": false, - "id": 12898, + "height": null, + "hideTimeOverride": false, + "id": 323, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33368,58 +44797,124 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(raft_engine_write_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "avg", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Written", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33427,6 +44922,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -33435,6 +44931,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33445,38 +44942,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "999% duration breakdown of WAL write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 48 + "y": 14 }, - "hiddenSeries": false, - "id": 12933, + "height": null, + "hideTimeOverride": false, + "id": 324, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33484,66 +44998,101 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "total", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_sync_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "sync", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_allocate_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "allocate", - "refId": "G" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_rotate_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "rotate", - "refId": "H" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL Duration Breakdown (999%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33551,14 +45100,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33569,38 +45120,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The average number of files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 21 }, - "hiddenSeries": false, - "id": 12899, + "height": null, + "hideTimeOverride": false, + "id": 325, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33608,55 +45176,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "avg(raft_engine_log_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "avg(raft_engine_swap_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "swap", - "refId": "B" + "metric": "", + "query": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "avg(raft_engine_recycled_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} - recycle", - "refId": "C" + "legendFormat": "{{type}}-recycle", + "metric": "", + "query": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "File Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33664,6 +45263,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33672,6 +45272,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33682,38 +45283,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The 99% duration of operations other than write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 56 + "y": 21 }, - "hiddenSeries": false, - "id": 12897, + "height": null, + "hideTimeOverride": false, + "id": 326, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33721,58 +45339,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_read_entry_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_entry", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_read_message_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_message", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_purge_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "purge", - "refId": "E" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Other Durations (99%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33780,6 +45426,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -33788,6 +45435,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33798,38 +45446,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The average number of log entries", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 64 + "y": 28 }, - "hiddenSeries": false, - "id": 12934, + "height": null, + "hideTimeOverride": false, + "id": 327, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33837,43 +45502,56 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "avg(raft_engine_log_entry_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Entry Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33881,6 +45559,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33889,6 +45568,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33899,99 +45579,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Engine", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 35 + "y": 0 }, - "id": 3301, + "height": null, + "hideTimeOverride": false, + "id": 328, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 0 }, - "id": 3555, + "height": null, + "hideTimeOverride": false, + "id": 329, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "live blob file num", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_engine_titandb_num_obsolete_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "obsolete blob file num", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33999,7 +45749,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -34007,6 +45758,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34017,83 +45769,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 0 }, - "id": 3557, + "height": null, + "hideTimeOverride": false, + "id": 330, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_live_blob_file_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "live blob file size", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_engine_titandb_obsolete_blob_file_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "obsolete blob file size", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34101,6 +45897,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -34109,6 +45906,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34119,130 +45917,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 3523, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_engine_titandb_live_blob_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "live blob size", - "refId": "A" + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Live blob size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The blob cache size.", "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 34 + "x": 0, + "y": 7 }, - "id": 4655, + "height": null, + "hideTimeOverride": false, + "id": 331, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34250,38 +45974,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, avg(tikv_engine_blob_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}) by(cf, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{cf}}", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "live blob size", + "metric": "", + "query": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob cache size", + "title": "Live blob size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34289,6 +46030,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -34297,6 +46039,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34307,35 +46050,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of block cache", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 7 }, - "id": 4020, + "height": null, + "hideTimeOverride": false, + "id": 332, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34343,47 +46107,55 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_cache_hit\"}[1m])) / (sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$titan_db\", type=\"blob_cache_hit\"}[1m])) + sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$titan_db\", type=\"blob_cache_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "all", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob cache hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34391,50 +46163,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 14 }, - "id": 4023, + "height": null, + "hideTimeOverride": false, + "id": 333, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34442,69 +46240,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "B" + "metric": "", + "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Iter touched blob file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34512,7 +46341,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -34520,6 +46350,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34530,76 +46361,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The blob cache size.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 14 }, - "id": 4025, + "height": null, + "hideTimeOverride": false, + "id": 334, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_blob_file_discardable_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}) by (ratio)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ratio}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{cf}}", + "metric": "", + "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file discardable ratio distribution", + "title": "Blob cache size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34607,7 +46474,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -34615,6 +46483,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34625,32 +46494,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 21 }, - "id": 3414, + "height": null, + "hideTimeOverride": false, + "id": 335, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34658,69 +46551,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "B" + "metric": "", + "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob key size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34728,14 +46652,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34746,97 +46672,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 21 }, - "id": 3446, + "height": null, + "hideTimeOverride": false, + "id": 336, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob value size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34844,14 +46830,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34862,32 +46850,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 28 }, - "id": 3746, + "height": null, + "hideTimeOverride": false, + "id": 337, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34895,47 +46907,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_get\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "get", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob get operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34943,6 +46963,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -34951,6 +46972,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34961,97 +46983,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 28 }, - "id": 3655, + "height": null, + "hideTimeOverride": false, + "id": 338, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob get duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35059,14 +47141,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35077,94 +47161,275 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 57 + "y": 35 }, - "id": 3338, + "height": null, + "hideTimeOverride": false, + "id": 339, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ratio}}", + "metric": "", + "query": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blob file discardable ratio distribution", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 340, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_seek\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "seek", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_prev\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "prev", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_next\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "next", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob iter operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35172,6 +47437,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -35180,6 +47446,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35190,32 +47457,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 57 + "x": 0, + "y": 42 }, - "id": 3412, + "height": null, + "hideTimeOverride": false, + "id": 341, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35223,66 +47514,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob seek duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35290,14 +47615,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35308,32 +47635,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 63 + "x": 12, + "y": 42 }, - "id": 4092, + "height": null, + "hideTimeOverride": false, + "id": 342, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35341,66 +47692,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob next duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35408,14 +47793,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35426,32 +47813,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 63 + "x": 0, + "y": 49 }, - "id": 4093, + "height": null, + "hideTimeOverride": false, + "id": 343, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35459,66 +47870,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob prev duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35526,14 +47971,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35544,76 +47991,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 69 + "x": 12, + "y": 49 }, - "id": 3645, + "height": null, + "hideTimeOverride": false, + "id": 344, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"keys.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35621,7 +48104,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -35629,6 +48113,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35639,76 +48124,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 69 + "x": 0, + "y": 56 }, - "id": 3643, + "height": null, + "hideTimeOverride": false, + "id": 345, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"bytes.*\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob bytes flow", + "title": "Blob file read duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35716,7 +48282,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -35724,6 +48291,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35734,97 +48302,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 75 + "x": 12, + "y": 56 }, - "id": 3657, + "height": null, + "hideTimeOverride": false, + "id": 346, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_average\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file read duration", + "title": "Blob bytes flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35832,14 +48415,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "binBps", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35850,97 +48435,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 75 + "x": 0, + "y": 63 }, - "id": 3408, + "height": null, + "hideTimeOverride": false, + "id": 347, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "B" + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "C" + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35948,14 +48593,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35966,76 +48613,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 81 + "x": 12, + "y": 63 }, - "id": 3651, + "height": null, + "hideTimeOverride": false, + "id": 348, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_file_synced{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "sync", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file sync operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36043,6 +48726,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -36051,6 +48735,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36061,97 +48746,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 81 + "x": 0, + "y": 70 }, - "id": 3653, + "height": null, + "hideTimeOverride": false, + "id": 349, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_average\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file sync duration", + "title": "Blob GC action", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36159,14 +48859,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36177,77 +48879,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 87 + "x": 12, + "y": 70 }, - "id": 5018, + "height": null, + "hideTimeOverride": false, + "id": 350, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_action_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "B" + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC action", + "title": "Blob file sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36255,7 +49037,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -36263,6 +49046,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36273,97 +49057,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 87 + "x": 0, + "y": 77 }, - "id": 3410, + "height": null, + "hideTimeOverride": false, + "id": 351, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36371,14 +49215,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36389,76 +49235,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 93 + "x": 12, + "y": 77 }, - "id": 3649, + "height": null, + "hideTimeOverride": false, + "id": 352, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"keys.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36466,7 +49348,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -36474,8 +49357,9 @@ "show": true }, { - "format": "decbytes", - "label": "", + "decimals": null, + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -36484,76 +49368,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 93 + "x": 0, + "y": 84 }, - "id": 3340, + "height": null, + "hideTimeOverride": false, + "id": 353, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"bytes.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC bytes flow", + "title": "Blob GC input file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36561,7 +49526,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36569,8 +49535,9 @@ "show": true }, { - "format": "decbytes", - "label": "", + "decimals": null, + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -36579,97 +49546,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 99 + "x": 12, + "y": 84 }, - "id": 4021, + "height": null, + "hideTimeOverride": false, + "id": 354, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_average\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC input file size", + "title": "Blob GC bytes flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36677,7 +49659,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -36685,6 +49668,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36695,97 +49679,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 99 + "x": 0, + "y": 91 }, - "id": 4022, + "height": null, + "hideTimeOverride": false, + "id": 355, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_gc_outputt_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC output file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36793,7 +49837,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36801,6 +49846,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36811,77 +49857,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 105 + "x": 12, + "y": 91 }, - "id": 3344, + "height": null, + "hideTimeOverride": false, + "id": 356, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36889,6 +49970,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -36897,6 +49979,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36907,43 +49990,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": "titan_db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Titan - $titan_db", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 36 + "y": 0 }, - "id": 2820, + "height": null, + "hideTimeOverride": false, + "id": 357, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 0 }, - "id": 2991, + "height": null, + "hideTimeOverride": false, + "id": 358, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -36955,7 +50081,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -36963,47 +50089,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"waiter_manager.*\"}[1m])) by (instance, name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"deadlock_detect.*\"}[1m])) by (instance, name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Manager Thread CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37011,6 +50160,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -37019,6 +50169,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37029,39 +50180,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 0 }, - "id": 2877, + "height": null, + "hideTimeOverride": false, + "id": 359, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37069,39 +50237,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_task_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Manager Handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37109,14 +50293,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37127,31 +50313,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 7 }, - "id": 2993, - "interval": "", + "height": null, + "hideTimeOverride": false, + "id": 360, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -37165,53 +50370,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_waiter_lifetime_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_lock_manager_waiter_lifetime_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_lock_manager_waiter_lifetime_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_lock_manager_waiter_lifetime_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", - "refId": "C" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Waiter lifetime duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37219,6 +50494,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -37227,6 +50503,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37237,39 +50514,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 7 }, - "id": 4018, + "height": null, + "hideTimeOverride": false, + "id": 361, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37277,46 +50571,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(max_over_time(tikv_lock_manager_wait_table_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(max_over_time(tikv_lock_wait_queue_entries_gauge_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "B" + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Waiting Queue", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37324,14 +50642,16 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37342,30 +50662,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 14 }, - "id": 2995, + "height": null, + "hideTimeOverride": false, + "id": 362, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -37379,44 +50719,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_detect_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_lock_manager_detect_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_lock_manager_detect_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Deadlock detect duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37424,6 +50843,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -37432,6 +50852,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37442,39 +50863,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 14 }, - "id": 2934, + "height": null, + "hideTimeOverride": false, + "id": 363, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37482,39 +50920,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Detect error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37522,14 +50976,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37540,84 +50996,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, + "bars": false, "cacheTimeout": null, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, + "description": null, "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 21 }, - "id": 4019, + "height": null, + "hideTimeOverride": false, + "id": 364, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "6.1.6", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(max_over_time(tikv_lock_manager_detector_leader_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Deadlock detector leader", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37625,57 +51109,76 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "none", - "label": "", + "label": null, "logBase": 1, - "max": "2", - "min": "0", - "show": false + "max": null, + "min": null, + "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 65 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572093, + "height": null, + "hideTimeOverride": false, + "id": 365, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37683,44 +51186,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_pessimistic_lock_memory_size{tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total pessimistic locks memory size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37728,6 +51242,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -37736,6 +51251,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37746,45 +51262,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 73 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572094, + "height": null, + "hideTimeOverride": false, + "id": 366, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37792,45 +51319,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_in_memory_pessimistic_locking{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (result)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{result}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "In-memory pessimistic locking result", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37838,14 +51375,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37856,46 +51395,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of active keys and waiters.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 75 + "y": 28 }, - "hiddenSeries": false, - "id": 23763573091, + "height": null, + "hideTimeOverride": false, + "id": 367, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37903,45 +51452,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_lock_wait_queue_entries_gauge_vec{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pessimistic lock activities", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37949,14 +51508,16 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37967,10 +51528,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -37978,159 +51540,246 @@ "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The length includes the entering transaction itself", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 83 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573092, + "id": 368, + "interval": null, "legend": { "show": false }, "links": [], - "pluginVersion": "7.5.11", + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_lock_wait_queue_length_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Lengths of lock wait queues when transaction enqueues", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Pessimistic Locking", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 37 + "y": 0 }, - "id": 23763573235, + "height": null, + "hideTimeOverride": false, + "id": 369, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "percentunit" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 45 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573350, + "height": null, + "hideTimeOverride": false, + "id": 370, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "hide": false, - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38138,6 +51787,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -38146,6 +51796,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38156,117 +51807,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "(AP)apply-99": "#88509f", - "(AP)get_permit-99": "#922870", - "(AP)queuing-99": "#9d0041", - "(DL)exec_download-99": "#73a0fe", - "(DL)queue-99": "#7d78ce", - "exec_download-99": "light-orange", - "get_permit-99": "red", - "queuing-99": "blue", - "total-99": "rgb(252, 252, 252)" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { "h": 7, - "w": 9, - "x": 6, - "y": 45 + "w": 12, + "x": 12, + "y": 0 }, - "hiddenSeries": false, - "id": 23763573351, + "height": null, + "hideTimeOverride": false, + "id": 371, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total-99", - "bars": false, - "fill": 2, - "lines": true, - "linewidth": 0, - "stack": false, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": true, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[1m])) by (le, request))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "total-99", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"queue|exec_download\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "(DL){{type}}-99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "(AP){{type}}-99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "P99 RPC Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38274,6 +51950,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -38282,7 +51959,8 @@ "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38292,41 +51970,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 9, - "x": 15, - "y": 45 + "w": 12, + "x": 0, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573352, + "height": null, + "hideTimeOverride": false, + "id": 372, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -38334,56 +52027,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (instance, request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} :: {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "total - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", - "step": 10 + "legendFormat": "total-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Ops", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38391,6 +52098,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -38399,6 +52107,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38409,78 +52118,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "cps" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 52 + "h": 7, + "w": 12, + "x": 12, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573032, + "height": null, + "hideTimeOverride": false, + "id": 373, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_import_apply_cache_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cache Events", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38488,6 +52231,7 @@ }, "yaxes": [ { + "decimals": null, "format": "cps", "label": null, "logBase": 1, @@ -38496,6 +52240,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38506,285 +52251,424 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 52 + "h": 7, + "w": 12, + "x": 0, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573348, + "id": 374, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Overall RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 52 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573558, + "id": 375, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"exec_download\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Read File into Memory Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { - "cardColor": "#37872D", + "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 52 + "h": 7, + "w": 12, + "x": 0, + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573229, + "id": 376, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queuing\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Queuing Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "bytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 60 + "h": 7, + "w": 12, + "x": 12, + "y": 21 }, - "hiddenSeries": false, - "id": 23763573349, + "height": null, + "hideTimeOverride": false, + "id": 377, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_apply_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply Request Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38792,6 +52676,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -38800,6 +52685,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38810,287 +52696,424 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateBlues", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 60 + "h": 7, + "w": 12, + "x": 0, + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573344, + "id": 378, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_download_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Downloaded File Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolatePurples", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 60 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573233, + "id": 379, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Apply Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": null, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 60 + "h": 7, + "w": 12, + "x": 0, + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573230, + "id": 380, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"get_permit\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Blocked by Concurrency Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "ops" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 0, - "y": 66 + "h": 7, + "w": 12, + "x": 12, + "y": 35 }, - "hiddenSeries": false, - "id": 23763573118, + "height": null, + "hideTimeOverride": false, + "id": 381, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_applier_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"begin_req\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply Request Speed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39098,6 +53121,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -39106,6 +53130,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39116,78 +53141,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "decbytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 23763573346, + "height": null, + "hideTimeOverride": false, + "id": 382, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_import_apply_cached_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cached File in Memory", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39195,7 +53254,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -39203,6 +53263,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39213,76 +53274,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 68 + "y": 42 }, - "hiddenSeries": false, - "id": 23763573119, + "height": null, + "hideTimeOverride": false, + "id": 383, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_import_applier_event{instance=~\"$instance\", type!=\"begin_req\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 3, - "legendFormat": "{{instance}} :: {{type}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Engine Requests Unfinished", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39290,6 +53387,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39298,6 +53396,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39308,149 +53407,216 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573231, + "id": 384, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Apply Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "bytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 0, - "y": 71 + "h": 7, + "w": 12, + "x": 12, + "y": 49 }, - "hiddenSeries": false, - "id": 23763573449, + "height": null, + "hideTimeOverride": false, + "id": 385, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft Store Memory Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39458,6 +53624,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -39466,6 +53633,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39476,59 +53644,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Point In Time Restore", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 38 + "y": 0 }, - "id": 8389, + "height": null, + "hideTimeOverride": false, + "id": 386, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of resolved ts worker", + "description": "The CPU utilization of resolved ts worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 0, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 8385, + "height": null, + "hideTimeOverride": false, + "id": 387, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39536,44 +53743,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Resolved TS Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39581,14 +53799,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39599,44 +53819,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of advance ts worker", + "description": "The CPU utilization of advance ts worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 8, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 9162, + "height": null, + "hideTimeOverride": false, + "id": 388, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39644,44 +53876,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-tso", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Advance ts Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39689,14 +53932,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39707,44 +53952,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of scan lock worker", + "description": "The CPU utilization of scan lock worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 16, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 9164, + "height": null, + "hideTimeOverride": false, + "id": 389, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39752,44 +54009,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-scan", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scan lock Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39797,14 +54065,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39815,43 +54085,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 7 }, - "hiddenSeries": false, - "id": 8387, + "height": null, + "hideTimeOverride": false, + "id": 390, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39859,43 +54142,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of resolved-ts", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39903,6 +54198,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -39911,6 +54207,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39921,43 +54218,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573805, + "height": null, + "hideTimeOverride": false, + "id": 391, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39965,45 +54275,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of follower safe-ts", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40011,6 +54331,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -40019,6 +54340,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40029,44 +54351,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The region that has minimal resolved ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 55 + "y": 14 }, - "hiddenSeries": false, - "id": 23763572078, + "height": null, + "hideTimeOverride": false, + "id": 392, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40074,51 +54408,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Min Resolved TS Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40126,15 +54464,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40144,44 +54484,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The region id of the follower that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 55 + "y": 14 }, - "hiddenSeries": false, - "id": 23763573804, + "height": null, + "hideTimeOverride": false, + "id": 393, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40189,51 +54541,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Min Safe TS Follower Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40241,15 +54597,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40259,10 +54617,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -40272,135 +54631,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when handle a check leader request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 9168, + "id": 394, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Check leader duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The backoff duration before starting initial scan", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 70 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 23763573950, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Initial scan backoff duration", - "tooltip": { + "mode": "time", + "name": null, "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -40409,37 +54727,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between resolved ts of leaders and current time", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572077, + "height": null, + "hideTimeOverride": false, + "id": 395, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40447,45 +54778,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of resolved-ts in region leaders", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40493,6 +54834,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -40501,6 +54843,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40511,43 +54854,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 71 + "y": 28 }, - "hiddenSeries": false, - "id": 12308, + "height": null, + "hideTimeOverride": false, + "id": 396, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40555,45 +54911,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% CheckLeader request region count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40601,7 +54967,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -40609,6 +54976,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40619,44 +54987,160 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The backoff duration before starting initial scan", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 397, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Initial scan backoff duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 71 + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 23763572079, + "height": null, + "hideTimeOverride": false, + "id": 398, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40664,51 +55148,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "Lock heap size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40716,15 +55204,17 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40734,44 +55224,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "description": "The region that its leader has minimal resolved ts.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 79 + "x": 12, + "y": 35 }, - "hiddenSeries": false, - "id": 8379, + "height": null, + "hideTimeOverride": false, + "id": 399, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40779,43 +55281,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Lock heap size", + "title": "Min Leader Resolved TS Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40823,14 +55337,16 @@ }, "yaxes": [ { - "format": "bytes", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40841,44 +55357,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 79 + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 8377, + "height": null, + "hideTimeOverride": false, + "id": 400, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40886,49 +55414,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Observe region status", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40936,14 +55470,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40954,45 +55490,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 87 + "x": 12, + "y": 42 }, - "hiddenSeries": false, - "id": 9166, + "height": null, + "hideTimeOverride": false, + "id": 401, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41000,58 +55547,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_raftstore_check_stale_peer{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-stale-peer", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Fail advance ts count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41059,14 +55618,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41077,43 +55638,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 87 + "x": 0, + "y": 49 }, - "hiddenSeries": false, - "id": 8383, + "height": null, + "hideTimeOverride": false, + "id": 402, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41121,53 +55695,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-check-num", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% CheckLeader request size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41175,6 +55766,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -41183,6 +55775,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41193,44 +55786,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Total bytes of pending commands in the channel", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 95 + "x": 12, + "y": 49 }, - "hiddenSeries": false, - "id": 8381, + "height": null, + "hideTimeOverride": false, + "id": 403, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41238,43 +55843,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pending command size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41282,14 +55899,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41300,53 +55919,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], - "title": "Resolved-TS", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Resolved TS", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 39 + "y": 0 }, - "id": 2763, + "height": null, + "hideTimeOverride": false, + "id": 404, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573729, + "height": null, + "hideTimeOverride": false, + "id": 405, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41354,42 +56018,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Allocator Stats", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41397,7 +56074,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41405,6 +56083,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41415,85 +56094,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "binBps" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573730, + "height": null, + "hideTimeOverride": false, + "id": 406, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name) - sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"dealloc\"}[$__rate_interval])) by (thread_name) != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Send Allocated(+) / Release Received(-) Bytes Rate", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41501,6 +56207,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -41509,6 +56216,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41519,83 +56227,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 7 }, - "hiddenSeries": false, - "id": 2696, + "height": null, + "hideTimeOverride": false, + "id": 407, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, - "show": false, - "sort": "current", + "show": true, + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Newly Allocated Bytes by Thread", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41603,7 +56340,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41611,6 +56349,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41621,83 +56360,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573731, + "height": null, + "hideTimeOverride": false, + "id": 408, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, - "show": false, - "sort": "current", + "show": true, + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Recently Released Bytes by Thread", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41705,7 +56473,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41713,6 +56482,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41723,60 +56493,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Memory", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 40 + "y": 0 }, - "id": 3922, + "height": null, + "hideTimeOverride": false, + "id": 409, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 65 + "y": 0 }, - "hiddenSeries": false, - "id": 3924, + "height": null, + "hideTimeOverride": false, + "id": 410, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41784,71 +56592,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-auto-throttle/", - "fill": 5, - "fillGradient": 2, - "linewidth": 0 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "backup-io-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "backup-auto-throttle-{{instance}}", - "refId": "C" + "metric": "", + "query": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup CPU Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41856,6 +56678,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -41864,6 +56687,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41874,144 +56698,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 65 - }, - "id": 3926, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Backup Thread Count", - "transform": "timeseries_aggregations", - "type": "table" - }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 + "h": 7, + "w": 8, + "x": 8, + "y": 0 }, - "hiddenSeries": false, - "id": 23763571993, + "height": null, + "hideTimeOverride": false, + "id": 411, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_cloud_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cloud, req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{cloud}}-{{req}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "cloud request", + "title": "Backup Thread Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42019,7 +56811,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -42027,6 +56820,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -42037,81 +56831,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 65 + "y": 0 }, - "hiddenSeries": false, - "id": 5264, + "height": null, + "hideTimeOverride": false, + "id": 412, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_backup_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{error}}", - "refId": "D" + "metric": "", + "query": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42119,184 +56944,232 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 72 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3927, + "id": 413, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"write\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Write CF SST Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 72 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5266, + "id": 414, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"default\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Default CF SST Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42305,34 +57178,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 72 + "y": 7 }, - "hiddenSeries": false, - "id": 3929, + "height": null, + "hideTimeOverride": false, + "id": 415, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -42341,60 +57229,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} {{cf}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "refId": "B", - "step": 4 + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup SST Generation Throughput", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42402,7 +57300,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -42410,7 +57309,8 @@ "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -42420,247 +57320,316 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 4, + "w": 6, "x": 0, - "y": 79 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5597, + "id": 416, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Scan SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 4, - "y": 79 + "x": 6, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3931, + "id": 417, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Scan SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 10, - "y": 79 + "x": 12, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 6905, + "id": 418, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"save.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Save SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42669,34 +57638,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 79 + "w": 6, + "x": 18, + "y": 14 }, - "hiddenSeries": false, - "id": 3928, + "height": null, + "hideTimeOverride": false, + "id": 419, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -42705,60 +57689,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - 99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99.9%", "metric": "", - "refId": "A", - "step": 4 + "query": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - 95%", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_backup_range_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_backup_range_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - avg", - "refId": "C", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-avg", + "metric": "", + "query": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup SST Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42766,6 +57775,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -42774,6 +57784,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -42784,85 +57795,108 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 86 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3930, + "id": 420, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "External Storage Create Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42871,37 +57905,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 86 + "y": 21 }, - "id": 4936, + "height": null, + "hideTimeOverride": false, + "id": 421, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -42909,47 +57956,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-100%", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-99%", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "External Storage Create Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42966,53 +58089,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 93 + "y": 28 }, - "id": 5267, + "height": null, + "hideTimeOverride": false, + "id": 422, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43020,47 +58157,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Checksum Request Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43077,94 +58290,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 93 + "y": 28 }, - "id": 5269, + "height": null, + "hideTimeOverride": false, + "id": 423, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{device}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{device}}", + "metric": "", + "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43172,6 +58414,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -43180,6 +58423,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43190,42 +58434,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 100 + "y": 35 }, - "id": 5925, + "height": null, + "hideTimeOverride": false, + "id": 424, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43233,67 +58491,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/import-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "import-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", "format": "time_series", "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "import-{{instance}}-{{tid}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "import-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "metric": "", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import CPU Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43301,6 +58577,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -43309,6 +58586,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43319,145 +58597,245 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fontSize": "100%", "gridPos": { "h": 7, - "w": 4, - "x": 12, - "y": 100 + "w": 8, + "x": 8, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 425, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, - "id": 5926, + "lines": true, + "linewidth": 1, "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Import Thread Count", - "transform": "timeseries_aggregations", - "type": "table" + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 100 + "y": 35 }, - "id": 5932, + "height": null, + "hideTimeOverride": false, + "id": 426, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{error}} {{instance}}", - "refId": "D" + "legendFormat": "{{type}}-{{error}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43465,100 +58843,200 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 107 + "y": 42 }, - "id": 5931, + "height": null, + "hideTimeOverride": false, + "id": 427, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{request}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{request}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{request}}-99%", - "refId": "A" + "legendFormat": "avg-{{request}}", + "metric": "", + "query": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.5, sum(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", "hide": true, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{request}}-50%", - "refId": "B" + "legendFormat": "count-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43566,6 +59044,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -43574,6 +59053,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43584,39 +59064,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 107 + "y": 42 }, - "id": 6267, + "height": null, + "hideTimeOverride": false, + "id": 428, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43624,51 +59121,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (instance, request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "total - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", - "step": 10 + "legendFormat": "{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Ops", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43676,6 +59177,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -43684,6 +59186,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43694,571 +59197,732 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5930, + "id": 429, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"download|write\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Write/Download RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5929, + "id": 430, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queue\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Wait Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 6906, + "id": 431, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"read\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Read SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5928, + "id": 432, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"rewrite\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Rewrite SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5939, + "id": 433, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"ingest\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5938, + "id": 434, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_ingest_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5937, + "id": 435, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_ingest_byte{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest SST Bytes", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -44267,79 +59931,121 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 121 + "y": 56 }, - "id": 5927, + "height": null, + "hideTimeOverride": false, + "id": 436, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Download SST Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44347,7 +60053,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -44355,7 +60062,8 @@ "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44365,77 +60073,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 128 + "y": 63 }, - "id": 12309, + "height": null, + "hideTimeOverride": false, + "id": 437, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Local Write keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44443,95 +60186,280 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "height": null, + "hideTimeOverride": false, + "id": 438, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Import Local Write bytes", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 128 + "x": 0, + "y": 70 }, - "id": 12310, + "height": null, + "hideTimeOverride": false, + "id": 439, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "legendFormat": "sum", + "metric": "", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Import Local Write bytes", + "title": "TTL Expired", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44539,104 +60467,132 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The accumulated TTL expired KV count during backup", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 135 + "x": 12, + "y": 70 }, - "hiddenSeries": false, - "id": 23763572861, + "height": null, + "hideTimeOverride": false, + "id": 440, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": true, - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "sum", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{cloud}}-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL Expired", + "title": "cloud request", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44644,6 +60600,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44652,6 +60609,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44662,84 +60620,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Backup & Import", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 41 + "y": 0 }, - "id": 4466, + "height": null, + "hideTimeOverride": false, + "id": 441, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Total number of encryption data keys in use", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 58 + "y": 0 }, - "id": 4464, + "height": null, + "hideTimeOverride": false, + "id": 442, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption data keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44747,8 +60775,8 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44756,7 +60784,7 @@ "show": true }, { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44767,69 +60795,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Number of files being encrypted", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 58 + "y": 0 }, - "id": 4554, + "height": null, + "hideTimeOverride": false, + "id": 443, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encrypted files", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44837,7 +60908,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44845,6 +60917,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44855,69 +60928,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Flag to indicate if encryption is initialized", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 66 + "y": 7 }, - "id": 4555, + "height": null, + "hideTimeOverride": false, + "id": 444, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption initialized", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44925,8 +61041,8 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44934,7 +61050,7 @@ "show": true }, { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44945,69 +61061,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Total size of encryption meta files", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 66 + "y": 7 }, - "id": 4556, + "height": null, + "hideTimeOverride": false, + "id": 445, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}-{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption meta files size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45015,7 +61174,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -45023,6 +61183,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45033,76 +61194,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 74 + "y": 14 }, - "id": 4557, + "height": null, + "hideTimeOverride": false, + "id": 446, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "encrypt-{{req}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "decrypt-{{req}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encrypt/decrypt data nanos", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45110,7 +61322,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -45118,6 +61331,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45128,87 +61342,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Writing or reading file duration (second)", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 74 + "y": 14 }, - "id": 4559, + "height": null, + "hideTimeOverride": false, + "id": 447, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "max-{{type}}-{{operation}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%-{{type}}-{{operation}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-{{type}}-{{operation}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read/write encryption meta duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45225,6 +61532,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45235,139 +61543,197 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Encryption", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 42 + "y": 0 }, - "id": 13016, + "height": null, + "hideTimeOverride": false, + "id": 448, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, + "custom": {}, + "decimals": null, "mappings": [ { - "from": "", - "id": 1, - "text": "Disabled", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Enabled", - "to": "", - "type": 1, - "value": "1" + "options": { + "0": { + "color": "red", + "index": null, + "text": "Disabled" + }, + "1": { + "color": "green", + "index": null, + "text": "Enabled" + } + }, + "type": "value" } ], - "noValue": "Disabled", + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "dark-green", - "value": 1 - } - ] - } + "steps": "" + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 5, + "h": 7, + "w": 6, "x": 0, - "y": 55 + "y": 0 }, - "id": 14361, + "height": null, + "hideTimeOverride": false, + "id": 449, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Endpoint Status", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The average flush size of last 30mins.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "index": null, + "text": "Running" + }, + "1": { + "color": "yellow", + "index": null, + "text": "Paused" + }, + "2": { + "color": "red", + "index": null, + "text": "Error" + } + }, + "type": "value" + } + ], + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 5, - "y": 55 + "h": 7, + "w": 6, + "x": 6, + "y": 0 }, - "id": 14507, + "height": null, + "hideTimeOverride": false, + "id": 450, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45380,55 +61746,69 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Average Flush Size ", + "title": "Task Status", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The current total flushed file number of this run.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 13, - "y": 55 + "h": 7, + "w": 6, + "x": 12, + "y": 0 }, - "id": 14363, + "height": null, + "hideTimeOverride": false, + "id": 451, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45441,55 +61821,69 @@ "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "name" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "tidb_log_backup_advancer_owner > 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "tidb_log_backup_advancer_owner > 0", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Flushed Files (Last 30m) Per Host", + "title": "Advancer Owner", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The average flush size of last 30mins.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 55 + "h": 7, + "w": 6, + "x": 18, + "y": 0 }, - "id": 14508, + "height": null, + "hideTimeOverride": false, + "id": 452, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45502,55 +61896,69 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "refId": "B" + "metric": "", + "query": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Flush Times (Last 30m)", + "title": "Average Flush Size", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The current total flushed file number of this run.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 57 + "h": 7, + "w": 6, + "x": 0, + "y": 7 }, - "id": 14362, + "height": null, + "hideTimeOverride": false, + "id": 453, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45563,213 +61971,219 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total Flushed Size (Last 30m)", + "title": "Flushed Files (Last 30m) Per Host", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Running", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Paused", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 3, - "text": "Error", - "to": "", - "type": 1, - "value": "2" - } - ], - "noValue": "Disabled", + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1 - }, - { - "color": "dark-red", - "value": 2 - } - ] - } + "steps": "" + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 59 + "h": 7, + "w": 6, + "x": 6, + "y": 7 }, - "id": 14907, + "height": null, + "hideTimeOverride": false, + "id": 454, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Task Status", + "title": "Flush Times (Last 30m)", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 1, + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "dark-blue", - "value": null - } - ] + "steps": "" }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 3, - "x": 2, - "y": 59 + "h": 7, + "w": 6, + "x": 12, + "y": 7 }, - "id": 15361, + "height": null, + "hideTimeOverride": false, + "id": 455, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", - "justifyMode": "center", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, - "textMode": "name" + "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "tidb_log_backup_advancer_owner > 0", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Advancer Owner", + "title": "Total Flushed Size (Last 30m)", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 60 + "h": 7, + "w": 6, + "x": 18, + "y": 7 }, - "id": 14911, + "height": null, + "hideTimeOverride": false, + "id": 456, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45782,62 +62196,82 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Flush Files (Last 30m)", + "transformations": [], + "transparent": false, "type": "stat" }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 10, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 63 + "y": 14 }, - "hiddenSeries": false, - "id": 13262, + "height": null, + "hideTimeOverride": false, + "id": 457, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -45845,47 +62279,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU Usage", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45893,14 +62335,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45911,81 +62355,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 63 + "h": 7, + "w": 12, + "x": 12, + "y": 14 }, - "hiddenSeries": false, - "id": 12843, + "height": null, + "hideTimeOverride": false, + "id": 458, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Handle Event Rate", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45993,14 +62468,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46011,79 +62488,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The data rate of initial scanning emitting events.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 63 + "h": 7, + "w": 12, + "x": 0, + "y": 21 }, - "hiddenSeries": false, - "id": 14135, + "height": null, + "hideTimeOverride": false, + "id": 459, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, - "current": false, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Initial Scan Generate Event Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46091,6 +62601,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -46099,140 +62610,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 600000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "name": "Checkpoint Lag Too Huge", - "noDataState": "no_data", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "ms" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 63 + "h": 7, + "w": 12, + "x": 12, + "y": 21 }, - "hiddenSeries": false, - "id": 14774, + "height": null, + "hideTimeOverride": false, + "id": 460, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", - "hide": true, - "interval": "", - "legendFormat": "Current Time", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 600000, - "visible": true + "metric": "", + "query": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Abnormal Checkpoint TS Lag", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46240,14 +62734,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, - "max": "3000000", - "min": "0", + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46258,81 +62754,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The estimated memory usage by the streaming backup module.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 73 + "y": 28 }, - "hiddenSeries": false, - "id": 13100, + "height": null, + "hideTimeOverride": false, + "id": 461, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory Of Events", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46340,14 +62867,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46358,91 +62887,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 73 + "h": 7, + "w": 12, + "x": 12, + "y": 28 }, - "hiddenSeries": false, - "id": 14630, + "height": null, + "hideTimeOverride": false, + "id": 462, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "total", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}-total", + "metric": "", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Observed Region Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46450,6 +63015,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -46458,7 +63024,8 @@ "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46468,91 +63035,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The errors met when backing up.\n**They are retryable, don't worry.**", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 73 + "h": 7, + "w": 8, + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 13101, + "height": null, + "hideTimeOverride": false, + "id": 463, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46560,14 +63148,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46578,90 +63168,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The errors met when backing up.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 73 + "h": 7, + "w": 8, + "x": 8, + "y": 35 }, - "hiddenSeries": false, - "id": 14910, + "height": null, + "hideTimeOverride": false, + "id": 464, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Current Time", - "dashes": true, - "fill": 0 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "Current Time", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Checkpoint TS of Tasks", + "title": "Fatal Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46669,7 +63281,8 @@ }, "yaxes": [ { - "format": "dateTimeAsIsoNoDateIfToday", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -46677,6 +63290,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46687,100 +63301,138 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 78 + "h": 7, + "w": 8, + "x": 16, + "y": 35 }, - "hiddenSeries": false, - "id": 14908, + "height": null, + "hideTimeOverride": false, + "id": 465, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "Current Time", + "bars": false, + "dashes": true, + "fill": 0, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", "format": "time_series", "hide": false, "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{task}}", + "metric": "", + "query": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "time() * 1000", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Current Time", + "metric": "", + "query": "time() * 1000", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Fatal Errors", + "title": "Checkpoint TS of Tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46788,14 +63440,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "dateTimeAsIsoNoDateIfToday", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46806,668 +63460,836 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateBlues", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The duration of flushing a batch of file.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 14078, + "id": 466, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Flush Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateReds", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", + "description": "The duration of scanning the initial data from local DB and transform them into apply events.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 14136, + "id": 467, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Initial scanning duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "description": "The duration of converting a raft request into a apply event.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13934, + "id": 468, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Convert Raft Event duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", + "description": "The duration of waiting the mutex of the controller.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12840, + "id": 469, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Wait for Lock Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateCividis", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The number of KV-modify of each raft command observed.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15059, + "id": 470, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Command Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", + "description": "The total cost of saving an event into temporary file.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12841, + "id": 471, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Save to Temp File Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13552, + "id": 472, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Write to Temp File Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The duration of collecting metadata and call the UNIX system call *write* for each event.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 90 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13551, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "y": 49 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 473, + "interval": null, + "legend": { + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "System Write Call Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -47476,71 +64298,106 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal message type count.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 97 + "y": 56 }, - "hiddenSeries": false, - "id": 14914, + "height": null, + "hideTimeOverride": false, + "id": 474, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Type", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47548,14 +64405,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 2, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47566,83 +64425,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 97 + "h": 7, + "w": 8, + "x": 8, + "y": 56 }, - "hiddenSeries": false, - "id": 14912, + "height": null, + "hideTimeOverride": false, + "id": 475, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Handling Duration (P99)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47650,95 +64538,132 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 97 + "h": 7, + "w": 8, + "x": 16, + "y": 56 }, - "hiddenSeries": false, - "id": 14913, + "height": null, + "hideTimeOverride": false, + "id": 476, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Handling Duration (P90)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47746,14 +64671,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47764,78 +64691,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 103 + "y": 63 }, - "hiddenSeries": false, - "id": 14271, + "height": null, + "hideTimeOverride": false, + "id": 477, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ cf }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{cf}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Throughput ", + "title": "Initial Scan RocksDB Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47843,14 +64804,16 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47861,78 +64824,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Misc statistics of RocksDB during initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 103 + "h": 7, + "w": 12, + "x": 12, + "y": 63 }, - "hiddenSeries": false, - "id": 14270, + "height": null, + "hideTimeOverride": false, + "id": 478, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ cf }}/{{ op }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{op}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Operation ", + "title": "Initial Scan RocksDB Operation", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47940,14 +64937,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47958,80 +64957,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "leader-changed": "blue", - "region-changed": "purple" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The reason of triggering initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 103 + "h": 7, + "w": 12, + "x": 0, + "y": 70 }, - "hiddenSeries": false, - "id": 14915, + "height": null, + "hideTimeOverride": false, + "id": 479, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": false + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{reason}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Initial Scanning Trigger Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48039,6 +65070,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -48047,6 +65079,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -48057,80 +65090,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "del": "dark-red", - "put": "green" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 103 + "h": 7, + "w": 12, + "x": 12, + "y": 70 }, - "hiddenSeries": false, - "id": 15176, + "height": null, + "hideTimeOverride": false, + "id": 480, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ type }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region Checkpoint Key Putting", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48138,6 +65203,7 @@ }, "yaxes": [ { + "decimals": null, "format": "cps", "label": null, "logBase": 1, @@ -48146,6 +65212,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -48156,261 +65223,320 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 109 + "y": 77 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15544, + "id": 481, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Request Checkpoint Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 109 + "w": 12, + "x": 12, + "y": 77 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15716, + "id": 482, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Tick Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "aliasColors": { - "epoch-not-match": "purple", - "not-leader": "blue", - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The reason of advancer failed to be advanced.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 109 + "w": 12, + "x": 0, + "y": 84 }, - "hiddenSeries": false, - "id": 23763572666, + "height": null, + "hideTimeOverride": false, + "id": 483, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ reason }}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{reason}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region Checkpoint Failure Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48418,15 +65544,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48436,101 +65564,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "fail": "red", - "success": "green", - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The result of getting region checkpoints.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 109 + "w": 12, + "x": 12, + "y": 84 }, - "hiddenSeries": false, - "id": 23763572665, + "height": null, + "hideTimeOverride": false, + "id": 484, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "fail", - "transform": "negative-Y", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ result }}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{result}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Request Result", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48538,15 +65677,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48556,96 +65697,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 116 + "y": 91 }, - "hiddenSeries": false, - "id": 15359, + "height": null, + "hideTimeOverride": false, + "id": 485, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "consistency-check", - "yaxis": 1 - }, - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ step }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Tick Duration (P99)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48653,15 +65810,17 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48671,96 +65830,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 116 + "w": 12, + "x": 12, + "y": 91 }, - "hiddenSeries": false, - "id": 15360, + "height": null, + "hideTimeOverride": false, + "id": 486, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 1 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ step }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Tick Duration (P90)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48768,15 +65943,17 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48786,96 +65963,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The frequent of getting region level checkpoint.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 116 + "w": 12, + "x": 0, + "y": 98 }, - "hiddenSeries": false, - "id": 23763572733, + "height": null, + "hideTimeOverride": false, + "id": 487, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ step }} {{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{step}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get Region Operation Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48883,15 +66076,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48901,97 +66096,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The variant of checkpoint group.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 116 + "w": 12, + "x": 12, + "y": 98 }, - "hiddenSeries": false, - "id": 23763572734, + "height": null, + "hideTimeOverride": false, + "id": 488, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ step }} {{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{step}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Try Advance Trigger Time", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48999,15 +66209,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -49017,50 +66229,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Backup Log", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 52 + "y": 0 }, - "id": 24763573238, + "height": null, + "hideTimeOverride": false, + "id": 489, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend.", + "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 0 }, - "hiddenSeries": false, - "id": 24763574116, + "height": null, + "hideTimeOverride": false, + "id": 490, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49068,88 +66328,132 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "Slow Trend", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "none", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 0 }, - "hiddenSeries": false, - "id": 24763574117, + "height": null, + "hideTimeOverride": false, + "id": 491, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49157,88 +66461,132 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "QPS Changing Trend", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "none", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 7 }, - "hiddenSeries": false, - "id": 24763574115, + "height": null, + "hideTimeOverride": false, + "id": 492, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49246,59 +66594,64 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [ - { - "value": 275000, - "colorMode": "critical", - "op": "gt", - "fill": false, - "line": true, - "visible": true, - "yaxis": "left" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], - "timeRegions": [], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "AVG Sampling Latency", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -49306,41 +66659,67 @@ "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The QPS of each store.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 7 }, - "hiddenSeries": false, - "id": 24763573970, + "height": null, + "hideTimeOverride": false, + "id": 493, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49348,235 +66727,265 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_result_value{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "QPS of each store", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Slow Trend Statistics", + "transformations": [], + "transparent": false, "type": "row" } ], "refresh": "1m", - "schemaVersion": 27, + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 2, "includeAll": false, - "label": "K8s-cluster", + "label": "k8s_cluster", "multi": false, "name": "k8s_cluster", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", - "refId": "quota-k8s_cluster-Variable-Query" - }, + "query": "label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", "refresh": 2, - "regex": "", - "skipUrlSync": false, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 2, "includeAll": false, "label": "tidb_cluster", "multi": false, "name": "tidb_cluster", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\"}, tidb_cluster)", - "refId": "quota-tidb_cluster-Variable-Query" - }, + "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster =\"$k8s_cluster\"}, tidb_cluster)", "refresh": 2, - "regex": "", - "skipUrlSync": false, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "db", "multi": true, "name": "db", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "refId": "quota-db-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "label_values(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, type)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "command", "multi": true, "name": "command", "options": [], - "query": { - "query": "query_result(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, + "query": "query_result(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0)", + "refresh": 2, "regex": "/\\btype=\"([^\"]+)\"/", - "skipUrlSync": false, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": ".*", - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "instance", "multi": false, "name": "instance", "options": [], - "query": { - "query": "label_values(tikv_engine_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, instance)", - "refId": "quota-instance-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_size_bytes{k8s_cluster =\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, instance)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "description": null, - "error": null, "hide": 2, "includeAll": true, "label": "titan_db", "multi": true, "name": "titan_db", "options": [], - "query": { - "query": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "refId": "quota-titan_db-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } @@ -49587,6 +66996,7 @@ "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -49614,5 +67024,5 @@ "timezone": "browser", "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", - "version": 1 + "version": 0 } diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 new file mode 100644 index 000000000000..abb8baa67702 --- /dev/null +++ b/metrics/grafana/tikv_details.json.sha256 @@ -0,0 +1 @@ +1b98912ed3e87960a2ce063f6063b4f4f6fe6bbba98518ceabd768036287763e ./metrics/grafana/tikv_details.json diff --git a/scripts/check-dashboards b/scripts/check-dashboards new file mode 100755 index 000000000000..fdb73c281688 --- /dev/null +++ b/scripts/check-dashboards @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -euo pipefail + +for sha256 in ./metrics/grafana/*.sha256; do + if ! sha256sum -c "$sha256"; then + dashboard=$(basename "$sha256" .sha256) + echo "Please avoid manually modifying $dashboard" + echo "Try ./scripts/gen-tikv-details-dashboard" + exit 1 + fi +done + +echo "Dashboards check passed." diff --git a/scripts/gen-tikv-details-dashboard b/scripts/gen-tikv-details-dashboard new file mode 100755 index 000000000000..2c91cf3dbb98 --- /dev/null +++ b/scripts/gen-tikv-details-dashboard @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -euo pipefail + +docker build -t tikv-dashboard-gen -f - . < /metrics/grafana/\$name.json.sha256 + done + " From 7b1ee1181bb15ccf8b04d1fbbe40c9e3c3df4602 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Wed, 29 Nov 2023 16:53:48 +0800 Subject: [PATCH 168/203] tidb_query_expr: fix incorrect intdiv for decimal type (#16025) close tikv/tikv#16024 Signed-off-by: wjhuang2016 Co-authored-by: iosmanthus --- .../tidb_query_expr/src/impl_arithmetic.rs | 73 ++++++++++++++++--- components/tidb_query_expr/src/lib.rs | 9 ++- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/components/tidb_query_expr/src/impl_arithmetic.rs b/components/tidb_query_expr/src/impl_arithmetic.rs index 2f48fec46939..5960e69c2cd7 100644 --- a/components/tidb_query_expr/src/impl_arithmetic.rs +++ b/components/tidb_query_expr/src/impl_arithmetic.rs @@ -4,7 +4,7 @@ use num_traits::identities::Zero; use tidb_query_codegen::rpn_fn; use tidb_query_common::Result; use tidb_query_datatype::{ - codec::{self, data_type::*, div_i64, div_i64_with_u64, div_u64_with_i64, Error}, + codec::{self, data_type::*, div_i64, div_i64_with_u64, div_u64_with_i64, mysql::Res, Error}, expr::EvalContext, }; @@ -452,21 +452,39 @@ fn int_divide_decimal(ctx: &mut EvalContext, lhs: &Decimal, rhs: &Decimal) -> Re let result = arithmetic_with_ctx::(ctx, lhs, rhs)?; if let Some(result) = result { let result = result.as_i64(); - Ok(if result.is_truncated() { - Some(result.unwrap()) - } else { - result - .into_result_with_overflow_err( - ctx, - Error::overflow("BIGINT", format!("({} / {})", lhs, rhs)), - ) - .map(Some)? - }) + match result { + Res::Ok(i) => Ok(Some(i)), + Res::Truncated(i) => Ok(Some(i)), + _ => Err(Error::overflow("BIGINT", format!("({} / {})", lhs, rhs)).into()), + } } else { Ok(None) } } +#[rpn_fn(capture = [ctx])] +#[inline] +fn int_divide_decimal_unsigned( + ctx: &mut EvalContext, + lhs: &Decimal, + rhs: &Decimal, +) -> Result> { + let result = arithmetic_with_ctx::(ctx, lhs, rhs)?; + if let Some(result) = result { + let unsigned_result = result.as_u64(); + if unsigned_result.is_overflow() { + let signed_result = result.as_i64(); + return if signed_result.unwrap() == 0 && signed_result.is_truncated() { + Ok(Some(0)) + } else { + Err(Error::overflow("BIGINT UNSIGNED", format!("({} / {})", lhs, rhs)).into()) + }; + } + return Ok(Some(unsigned_result.unwrap() as i64)); + } + Ok(None) +} + pub struct DecimalDivide; impl ArithmeticOpWithCtx for DecimalDivide { @@ -962,6 +980,7 @@ mod tests { // divide by zero (Some("0.0"), Some("0.0"), None), (None, None, None), + (Some("0"), Some("45584"), Some(0)), ]; for (lhs, rhs, expected) in test_cases { @@ -995,6 +1014,38 @@ mod tests { } } + #[test] + fn test_int_divide_decimal_unsigned_overflow() { + let lft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let rft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let output: Option = RpnFnScalarEvaluator::new() + .push_param_with_field_type(Decimal::from(1), lft) + .push_param_with_field_type(Decimal::from_f64(-2_f64).unwrap(), rft) + .evaluate(ScalarFuncSig::IntDivideDecimal) + .unwrap(); + assert_eq!(output, Some(0)); + + let lft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let rft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let output: Result> = RpnFnScalarEvaluator::new() + .push_param_with_field_type(Decimal::from(1), lft) + .push_param_with_field_type(Decimal::from_f64(-1_f64).unwrap(), rft) + .evaluate(ScalarFuncSig::IntDivideDecimal); + assert!(output.is_err(), "should be error"); + } + #[test] fn test_real_multiply() { let should_pass = vec![(1.01001, -0.01, Real::new(-0.0101001).ok())]; diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index f1aae1de746f..50e106815877 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -284,6 +284,13 @@ fn divide_mapper(lhs_is_unsigned: bool, rhs_is_unsigned: bool) -> RpnFnMeta { } } +fn divide_decimal_mapper(lhs_is_unsigned: bool, rhs_is_unsigned: bool) -> RpnFnMeta { + match (lhs_is_unsigned, rhs_is_unsigned) { + (false, false) => int_divide_decimal_fn_meta(), + _ => int_divide_decimal_unsigned_fn_meta(), + } +} + fn map_rhs_int_sig(value: ScalarFuncSig, children: &[Expr], mapper: F) -> Result where F: Fn(bool) -> RpnFnMeta, @@ -421,7 +428,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::DivideDecimal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::DivideReal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::IntDivideInt => map_int_sig(value, children, divide_mapper)?, - ScalarFuncSig::IntDivideDecimal => int_divide_decimal_fn_meta(), + ScalarFuncSig::IntDivideDecimal => map_int_sig(value, children, divide_decimal_mapper)?, ScalarFuncSig::ModReal => arithmetic_fn_meta::(), ScalarFuncSig::ModDecimal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::ModInt => map_int_sig(value, children, mod_mapper)?, From 1afb327c6d2a650ddac0e506e2d4727ebad78eb0 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 29 Nov 2023 18:34:18 +0800 Subject: [PATCH 169/203] cloud: update cloud sdk that supports fips 140 for cloud sdk (#16098) close tikv/tikv#16097 Signed-off-by: Leavrth Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 134 ++++-------------------------- Cargo.toml | 1 + components/cloud/aws/src/s3.rs | 2 + components/cloud/azure/Cargo.toml | 12 +-- 4 files changed, 27 insertions(+), 122 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 146e9aa04ab6..13bd3c05781c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -310,7 +310,7 @@ dependencies = [ [[package]] name = "azure_core" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-trait", "base64 0.21.0", @@ -336,7 +336,7 @@ dependencies = [ [[package]] name = "azure_identity" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-lock", "async-trait", @@ -356,7 +356,7 @@ dependencies = [ [[package]] name = "azure_security_keyvault" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-trait", "azure_core", @@ -371,20 +371,19 @@ dependencies = [ [[package]] name = "azure_storage" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "RustyXML", "async-trait", "azure_core", "bytes", "futures 0.3.15", - "hmac 0.12.1", "log", "once_cell", + "openssl", "serde", "serde_derive", "serde_json", - "sha2 0.10.6", "time 0.3.20", "url", "uuid 1.2.1", @@ -393,7 +392,7 @@ dependencies = [ [[package]] name = "azure_storage_blobs" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "RustyXML", "azure_core", @@ -665,15 +664,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "boolinator" version = "2.4.0" @@ -1098,15 +1088,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "cpufeatures" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" -dependencies = [ - "libc 0.2.146", -] - [[package]] name = "cpuid-bool" version = "0.1.2" @@ -1265,26 +1246,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "crypto-mac" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4857fd85a0c34b3c3297875b747c1e02e06b6a0ea32dd892d8192b9ce0813ea6" -dependencies = [ - "generic-array", - "subtle", -] - [[package]] name = "csv" version = "1.1.6" @@ -1393,17 +1354,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "digest" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" -dependencies = [ - "block-buffer 0.10.4", - "crypto-common", - "subtle", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -2413,25 +2363,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" -[[package]] -name = "hmac" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" -dependencies = [ - "crypto-mac", - "digest 0.9.0", -] - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest 0.10.6", -] - [[package]] name = "home" version = "0.5.5" @@ -2975,17 +2906,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -[[package]] -name = "md-5" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" -dependencies = [ - "block-buffer 0.9.0", - "digest 0.9.0", - "opaque-debug", -] - [[package]] name = "md5" version = "0.7.0" @@ -3443,7 +3363,7 @@ dependencies = [ "serde", "serde_json", "serde_path_to_error", - "sha2 0.9.1", + "sha2", "thiserror", "url", ] @@ -4717,7 +4637,7 @@ dependencies = [ [[package]] name = "rusoto_core" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "base64 0.13.0", @@ -4741,7 +4661,7 @@ dependencies = [ [[package]] name = "rusoto_credential" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "chrono", @@ -4758,7 +4678,7 @@ dependencies = [ [[package]] name = "rusoto_kms" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -4771,7 +4691,7 @@ dependencies = [ [[package]] name = "rusoto_mock" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "chrono", @@ -4785,7 +4705,7 @@ dependencies = [ [[package]] name = "rusoto_s3" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -4799,32 +4719,29 @@ dependencies = [ [[package]] name = "rusoto_signature" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "base64 0.13.0", "bytes", "chrono", - "digest 0.9.0", "futures 0.3.15", "hex 0.4.2", - "hmac 0.10.1", "http", "hyper", "log", - "md-5", + "openssl", "percent-encoding", "pin-project-lite", "rusoto_credential", "rustc_version 0.3.3", "serde", - "sha2 0.9.1", "tokio", ] [[package]] name = "rusoto_sts" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -5226,24 +5143,13 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" dependencies = [ - "block-buffer 0.9.0", + "block-buffer", "cfg-if 0.1.10", "cpuid-bool", - "digest 0.9.0", + "digest", "opaque-debug", ] -[[package]] -name = "sha2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" -dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest 0.10.6", -] - [[package]] name = "shlex" version = "0.1.1" @@ -5593,12 +5499,6 @@ dependencies = [ "syn 2.0.18", ] -[[package]] -name = "subtle" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" - [[package]] name = "symbolic-common" version = "10.1.1" diff --git a/Cargo.toml b/Cargo.toml index 2d26bf6afe43..fdc86fb5f15d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -189,6 +189,7 @@ protobuf = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } protobuf-codegen = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } # TODO: remove this replacement after rusoto_s3 truly supports virtual-host style (https://github.com/rusoto/rusoto/pull/1823). +# UPDATE: use openssl for signature to support fips 140 rusoto_core = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_credential = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_kms = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 73ddf479fd8e..f06d86b37cb9 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -330,6 +330,8 @@ async fn try_read_exact( } } +// NOTICE: the openssl fips doesn't support md5, therefore use md5 pakcage to +// hash fn get_content_md5(object_lock_enabled: bool, content: &[u8]) -> Option { object_lock_enabled.then(|| { let digest = md5::compute(content); diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 7dd98224a73d..07a4752451ef 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -9,11 +9,13 @@ failpoints = ["fail/failpoints"] [dependencies] async-trait = "0.1" -azure_core = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_identity = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_security_keyvault = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } -azure_storage = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } -azure_storage_blobs = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +# TODO: The azure sdk with the newest version needs the rustc v1.70, but current version of rustc in TiKV is v1.67. +# Therefore use the patch to update sdk to support fips 140. +azure_core = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } +azure_identity = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } +azure_security_keyvault = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips", default-features = false } +azure_storage = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips", default-features = false } +azure_storage_blobs = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } base64 = "0.13" cloud = { workspace = true } fail = "0.5" From bab43d0e374b00dbf291c2870a22f8906c7a8431 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 30 Nov 2023 13:32:48 +0800 Subject: [PATCH 170/203] metrics: add extra filters to filter out redundant data. (#16103) close tikv/tikv#16102 Add some necessary filters to filter out unnecessary data in metrics. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.dashboard.py | 14 ++++++++++++-- metrics/grafana/tikv_details.json | 14 +++++++------- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index ade81f717fd0..59cc8e5f596d 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -247,6 +247,7 @@ def Cluster() -> RowPanel: target( expr=expr_sum_rate( "process_cpu_seconds_total", + label_selectors=['job=~".*tikv"'], ), ), ], @@ -257,7 +258,10 @@ def Cluster() -> RowPanel: yaxes=yaxes(left_format=UNITS.BYTES_IEC), targets=[ target( - expr=expr_sum("process_resident_memory_bytes"), + expr=expr_sum( + "process_resident_memory_bytes", + label_selectors=['job=~".*tikv"'], + ), ), ], ), @@ -376,6 +380,7 @@ def Cluster() -> RowPanel: label_selectors=['type="buckets"'], ), legend_format=r"{{instance}}-buckets", + hide=True, ), ], ), @@ -390,7 +395,12 @@ def Cluster() -> RowPanel: targets=[ target( expr=expr_operator( - "time()", "-", expr_simple("process_start_time_seconds") + "time()", + "-", + expr_simple( + "process_start_time_seconds", + label_selectors=['job=~".*tikv"'], + ), ), legend_format=r"{{instance}}", ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c36a81d522aa..467ac073332f 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -922,7 +922,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -930,7 +930,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -1055,7 +1055,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -1063,7 +1063,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -1915,7 +1915,7 @@ "datasource": "${DS_TEST-CLUSTER}", "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, @@ -2046,7 +2046,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) )", "format": "time_series", "hide": false, "instant": false, @@ -2054,7 +2054,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) )", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index abb8baa67702..31188a36bed2 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -1b98912ed3e87960a2ce063f6063b4f4f6fe6bbba98518ceabd768036287763e ./metrics/grafana/tikv_details.json +6ada9b4ad4a0bcbc847dbead88ff56bea77179e0a410bdd6148670a65ac94ed5 ./metrics/grafana/tikv_details.json From cd5c18c3bba403e586fe7da93ae1df8ebdf01c04 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 30 Nov 2023 14:43:18 +0800 Subject: [PATCH 171/203] chore: Use checkbox for check list in pull request template (#16104) close tikv/tikv#15990 Use checkbox for check list in pull request template Signed-off-by: Connor Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .github/pull_request_template.md | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 69bd19374c1f..35c561124f52 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,56 +12,56 @@ PR Title Format: ### What is changed and how it works? -Issue Number: Close #xxx -What's Changed: +Issue Number: Close #xxx +What's Changed: + ```commit-message + ``` ### Related changes -- PR to update `pingcap/docs`/`pingcap/docs-cn`: -- Need to cherry-pick to the release branch +- [ ] PR to update `pingcap/docs`/`pingcap/docs-cn`: +- [ ] Need to cherry-pick to the release branch -### Check List +### Check List Tests -- Unit test -- Integration test -- Manual test (add detailed scripts or steps below) -- No code +- [ ] Unit test +- [ ] Integration test +- [ ] Manual test (add detailed scripts or steps below) +- [ ] No code Side effects -- Performance regression - - Consumes more CPU - - Consumes more MEM -- Breaking backward compatibility +- [ ] Performance regression: Consumes more CPU +- [ ] Performance regression: Consumes more Memory +- [ ] Breaking backward compatibility -### Release note - -```release-note -Please add a release note. +### Release note + + +```release-note + ``` From 2de7cad9fb5e1011a8853733d4a1b776bc16bce2 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 30 Nov 2023 17:31:19 +0800 Subject: [PATCH 172/203] tikv-ctl: enhance the easy of use on EncyptionMeta cmd. (#16095) close tikv/tikv#16094 In previous version, users who wanna use tikv-ctl to dump the encryption meta might be confused with the ambiguous errors on `data-dir` configuration. To make this tool easier to use, the hints on setting `data-dir` will be present to users if they miss the configuration `storage.data-dir` in tikv.toml and `--data-dir`, when using `encryption-meta` command. Signed-off-by: lucasliang --- cmd/tikv-ctl/src/main.rs | 64 +++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 5ed1bcbd9cc9..ec0c8bfc915f 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -120,6 +120,9 @@ fn main() { } } Cmd::RaftEngineCtl { args } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .expect("data_key_manager_from_config should success"); @@ -141,6 +144,9 @@ fn main() { dump_snap_meta_file(path); } Cmd::DecryptFile { file, out_file } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let message = "This action will expose sensitive data as plaintext on persistent storage"; if !warning_prompt(message) { @@ -189,28 +195,36 @@ fn main() { io::copy(&mut reader, &mut outf).unwrap(); println!("crc32: {}", calc_crc32(outfile).unwrap()); } - Cmd::EncryptionMeta { cmd: subcmd } => match subcmd { - EncryptionMetaCmd::DumpKey { ids } => { - let message = "This action will expose encryption key(s) as plaintext. Do not output the \ + Cmd::EncryptionMeta { cmd: subcmd } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } + match subcmd { + EncryptionMetaCmd::DumpKey { ids } => { + let message = "This action will expose encryption key(s) as plaintext. Do not output the \ result in file on disk."; - if !warning_prompt(message) { - return; + if !warning_prompt(message) { + return; + } + DataKeyManager::dump_key_dict( + create_backend(&cfg.security.encryption.master_key) + .expect("encryption-meta master key creation"), + &cfg.storage.data_dir, + ids, + ) + .unwrap(); + } + EncryptionMetaCmd::DumpFile { path } => { + let path = path + .map(|path| fs::canonicalize(path).unwrap().to_str().unwrap().to_owned()); + DataKeyManager::dump_file_dict(&cfg.storage.data_dir, path.as_deref()).unwrap(); } - DataKeyManager::dump_key_dict( - create_backend(&cfg.security.encryption.master_key) - .expect("encryption-meta master key creation"), - &cfg.storage.data_dir, - ids, - ) - .unwrap(); - } - EncryptionMetaCmd::DumpFile { path } => { - let path = - path.map(|path| fs::canonicalize(path).unwrap().to_str().unwrap().to_owned()); - DataKeyManager::dump_file_dict(&cfg.storage.data_dir, path.as_deref()).unwrap(); } - }, + } Cmd::CleanupEncryptionMeta {} => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let key_manager = match data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .expect("data_key_manager_from_config should success") @@ -1329,3 +1343,17 @@ fn read_cluster_id(config: &TikvConfig) -> Result { .unwrap(); Ok(ident.cluster_id) } + +fn validate_storage_data_dir(config: &mut TikvConfig, data_dir: Option) -> bool { + if let Some(data_dir) = data_dir { + if !Path::new(&data_dir).exists() { + eprintln!("--data-dir {:?} not exists", data_dir); + return false; + } + config.storage.data_dir = data_dir; + } else if config.storage.data_dir.is_empty() { + eprintln!("--data-dir or data-dir in the config file should not be empty"); + return false; + } + true +} From 98d83f4c811454d480e54c73de14cc3385815e1e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 30 Nov 2023 18:22:19 +0800 Subject: [PATCH 173/203] metrics/grafana: set shared crosshair tooltip (#16106) ref tikv/tikv#15990 Set shared crosshair tooltip for tikv_detail dashboard Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 5 ++++- metrics/grafana/tikv_details.json | 2 +- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 59cc8e5f596d..6ab065f5433c 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -8566,7 +8566,6 @@ def SlowTrendStatistics() -> RowPanel: refresh="1m", inputs=[DATASOURCE_INPUT], editable=True, - graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, templating=Templates(), panels=[ Duration(), @@ -8609,4 +8608,8 @@ def SlowTrendStatistics() -> RowPanel: BackupLog(), SlowTrendStatistics(), ], + # Set 14 or larger to support shared crosshair or shared tooltip. + # See https://github.com/grafana/grafana/blob/v10.2.2/public/app/features/dashboard/state/DashboardMigrator.ts#L443-L445 + schemaVersion=14, + graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, ).auto_panel_ids() diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 467ac073332f..6f8dffa27cd9 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -66821,7 +66821,7 @@ ], "refresh": "1m", "rows": [], - "schemaVersion": 12, + "schemaVersion": 14, "sharedCrosshair": false, "style": "dark", "tags": [], diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 31188a36bed2..81ae429e3612 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -6ada9b4ad4a0bcbc847dbead88ff56bea77179e0a410bdd6148670a65ac94ed5 ./metrics/grafana/tikv_details.json +d496158baafb3f61d8f4dca2a8434031ad6092b93f3aeecb4fd2947df09a8caf ./metrics/grafana/tikv_details.json From fd989444fc9fb0dd2fdfafaece6130254b7b9f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 30 Nov 2023 20:31:18 +0800 Subject: [PATCH 174/203] log-backup: make initialize failure fatal error, release memory while task stopped. (#16071) close tikv/tikv#16056, close tikv/tikv#16070 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 100 ++++++++++-------- .../backup-stream/src/metadata/client.rs | 14 ++- components/backup-stream/src/router.rs | 34 ++++++ components/backup-stream/src/tempfiles.rs | 5 + .../backup-stream/tests/failpoints/mod.rs | 29 ++++- .../backup-stream/tests/integration/mod.rs | 2 +- components/backup-stream/tests/suite.rs | 5 + 7 files changed, 139 insertions(+), 50 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 6c19edc9f93e..f453469768c0 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -211,6 +211,53 @@ where self.meta_client.clone() } + fn on_fatal_error_of_task(&self, task: &str, err: &Error) -> future![()] { + metrics::update_task_status(TaskStatus::Error, task); + let meta_cli = self.get_meta_client(); + let pdc = self.pd_client.clone(); + let store_id = self.store_id; + let sched = self.scheduler.clone(); + let safepoint_name = self.pause_guard_id_for_task(task); + let safepoint_ttl = self.pause_guard_duration(); + let code = err.error_code().code.to_owned(); + let msg = err.to_string(); + let task = task.to_owned(); + async move { + let err_fut = async { + let safepoint = meta_cli.global_progress_of_task(&task).await?; + pdc.update_service_safe_point( + safepoint_name, + TimeStamp::new(safepoint.saturating_sub(1)), + safepoint_ttl, + ) + .await?; + meta_cli.pause(&task).await?; + let mut last_error = StreamBackupError::new(); + last_error.set_error_code(code); + last_error.set_error_message(msg.clone()); + last_error.set_store_id(store_id); + last_error.set_happen_at(TimeStamp::physical_now()); + meta_cli.report_last_error(&task, last_error).await?; + Result::Ok(()) + }; + if let Err(err_report) = err_fut.await { + err_report.report(format_args!("failed to upload error {}", err_report)); + let name = task.to_owned(); + // Let's retry reporting after 5s. + tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_secs(5)).await; + try_send!( + sched, + Task::FatalError( + TaskSelector::ByName(name), + Box::new(annotate!(err_report, "origin error: {}", msg)) + ) + ); + }); + } + } + } + fn on_fatal_error(&self, select: TaskSelector, err: Box) { err.report_fatal(); let tasks = self @@ -220,49 +267,7 @@ where for task in tasks { // Let's pause the task first. self.unload_task(&task); - metrics::update_task_status(TaskStatus::Error, &task); - - let meta_cli = self.get_meta_client(); - let pdc = self.pd_client.clone(); - let store_id = self.store_id; - let sched = self.scheduler.clone(); - let safepoint_name = self.pause_guard_id_for_task(&task); - let safepoint_ttl = self.pause_guard_duration(); - let code = err.error_code().code.to_owned(); - let msg = err.to_string(); - self.pool.block_on(async move { - let err_fut = async { - let safepoint = meta_cli.global_progress_of_task(&task).await?; - pdc.update_service_safe_point( - safepoint_name, - TimeStamp::new(safepoint.saturating_sub(1)), - safepoint_ttl, - ) - .await?; - meta_cli.pause(&task).await?; - let mut last_error = StreamBackupError::new(); - last_error.set_error_code(code); - last_error.set_error_message(msg.clone()); - last_error.set_store_id(store_id); - last_error.set_happen_at(TimeStamp::physical_now()); - meta_cli.report_last_error(&task, last_error).await?; - Result::Ok(()) - }; - if let Err(err_report) = err_fut.await { - err_report.report(format_args!("failed to upload error {}", err_report)); - // Let's retry reporting after 5s. - tokio::task::spawn(async move { - tokio::time::sleep(Duration::from_secs(5)).await; - try_send!( - sched, - Task::FatalError( - TaskSelector::ByName(task.to_owned()), - Box::new(annotate!(err_report, "origin error: {}", msg)) - ) - ); - }); - } - }); + self.pool.block_on(self.on_fatal_error_of_task(&task, &err)); } } @@ -637,6 +642,9 @@ where let run = async move { let task_name = task.info.get_name(); let ranges = cli.ranges_of_task(task_name).await?; + fail::fail_point!("load_task::error_when_fetching_ranges", |_| { + Err(Error::Other("what range? no such thing, go away.".into())) + }); info!( "register backup stream ranges"; "task" => ?task, @@ -664,10 +672,8 @@ where Result::Ok(()) }; if let Err(e) = run.await { - e.report(format!( - "failed to register backup stream task {} to router: ranges not found", - task_clone.info.get_name() - )); + self.on_fatal_error_of_task(&task_clone.info.name, &Box::new(e)) + .await; } }); metrics::update_task_status(TaskStatus::Running, &task_name); diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 1fdc1b3b1e8d..2232770915fc 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -286,7 +286,19 @@ impl MetadataClient { Ok(()) } - pub async fn get_last_error( + pub async fn get_last_error(&self, name: &str) -> Result> { + let key = MetaKey::last_errors_of(name); + + let r = self.meta_store.get_latest(Keys::Prefix(key)).await?.inner; + if r.is_empty() { + return Ok(None); + } + let r = &r[0]; + let err = protobuf::parse_from_bytes(r.value())?; + Ok(Some(err)) + } + + pub async fn get_last_error_of( &self, name: &str, store_id: u64, diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 849a503e21b0..00ce93635e83 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -830,6 +830,28 @@ pub struct StreamTaskInfo { temp_file_pool: Arc, } +impl Drop for StreamTaskInfo { + fn drop(&mut self) { + let (success, failed): (Vec<_>, Vec<_>) = self + .flushing_files + .get_mut() + .drain(..) + .chain(self.flushing_meta_files.get_mut().drain(..)) + .map(|(_, f, _)| f.inner.path().to_owned()) + .map(|p| self.temp_file_pool.remove(&p)) + .partition(|r| *r); + info!("stream task info dropped[1/2], removing flushing_temp files"; "success" => %success.len(), "failure" => %failed.len()); + let (success, failed): (Vec<_>, Vec<_>) = self + .files + .get_mut() + .drain() + .map(|(_, f)| f.into_inner().inner.path().to_owned()) + .map(|p| self.temp_file_pool.remove(&p)) + .partition(|r| *r); + info!("stream task info dropped[2/2], removing temp files"; "success" => %success.len(), "failure" => %failed.len()); + } +} + impl std::fmt::Debug for StreamTaskInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StreamTaskInfo") @@ -2089,6 +2111,12 @@ mod tests { let (task, _path) = task("cleanup_test".to_owned()).await?; must_register_table(&router, task, 1).await; write_simple_data(&router).await; + let tempfiles = router + .get_task_info("cleanup_test") + .await + .unwrap() + .temp_file_pool + .clone(); router .get_task_info("cleanup_test") .await? @@ -2097,6 +2125,7 @@ mod tests { write_simple_data(&router).await; let mut w = walkdir::WalkDir::new(&tmp).into_iter(); assert!(w.next().is_some(), "the temp files doesn't created"); + assert!(tempfiles.mem_used() > 0, "the temp files doesn't created."); drop(router); let w = walkdir::WalkDir::new(&tmp) .into_iter() @@ -2114,6 +2143,11 @@ mod tests { "the temp files should be removed, but it is {:?}", w ); + assert_eq!( + tempfiles.mem_used(), + 0, + "the temp files hasn't been cleared." + ); Ok(()) } diff --git a/components/backup-stream/src/tempfiles.rs b/components/backup-stream/src/tempfiles.rs index add1ee67c128..b8f9c9e1120b 100644 --- a/components/backup-stream/src/tempfiles.rs +++ b/components/backup-stream/src/tempfiles.rs @@ -259,6 +259,11 @@ impl TempFilePool { &self.cfg } + #[cfg(test)] + pub fn mem_used(&self) -> usize { + self.current.load(Ordering::Acquire) + } + /// Create a file for writting. /// This function is synchronous so we can call it easier in the polling /// context. (Anyway, it is really hard to call an async function in the diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index 8dfc21529e47..ea09e9c7a1f1 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -30,6 +30,32 @@ mod all { use super::{ make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, }; + use crate::make_table_key; + + #[test] + fn failed_register_task() { + let suite = SuiteBuilder::new_named("failed_register_task").build(); + fail::cfg("load_task::error_when_fetching_ranges", "return").unwrap(); + let cli = suite.get_meta_cli(); + block_on(cli.insert_task_with_range( + &suite.simple_task("failed_register_task"), + &[(&make_table_key(1, b""), &make_table_key(2, b""))], + )) + .unwrap(); + + for _ in 0..10 { + if block_on(cli.get_last_error_of("failed_register_task", 1)) + .unwrap() + .is_some() + { + return; + } + std::thread::sleep(Duration::from_millis(100)); + } + + suite.dump_slash_etc(); + panic!("No error uploaded when failed to comminate to PD."); + } #[test] fn basic() { @@ -192,7 +218,8 @@ mod all { suite.must_split(&make_split_key_at_record(1, 42)); std::thread::sleep(Duration::from_secs(2)); - let error = run_async_test(suite.get_meta_cli().get_last_error("retry_abort", 1)).unwrap(); + let error = + run_async_test(suite.get_meta_cli().get_last_error_of("retry_abort", 1)).unwrap(); let error = error.expect("no error uploaded"); error .get_error_message() diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index 395159060c14..04fee6b2c091 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -160,7 +160,7 @@ mod all { let err = run_async_test( suite .get_meta_cli() - .get_last_error("test_fatal_error", *victim), + .get_last_error_of("test_fatal_error", *victim), ) .unwrap() .unwrap(); diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 0e4038d07a0d..2886bb4f5d7d 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -395,6 +395,11 @@ impl Suite { MetadataClient::new(self.meta_store.clone(), 0) } + #[allow(dead_code)] + pub fn dump_slash_etc(&self) { + self.meta_store.inner.blocking_lock().dump(); + } + pub fn must_split(&mut self, key: &[u8]) { let region = self.cluster.get_region(key); self.cluster.must_split(®ion, key); From 3544ed36b088d3cf706fcc037d62944e48b57028 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 4 Dec 2023 13:52:21 +0800 Subject: [PATCH 175/203] raftstore: fix a panic cause by peer destroy racing (#16112) close tikv/tikv#16111, close pingcap/tidb#49012 In case a node is isolated during the merge and the target peer is replaced by a peer with a larger ID, an "atomic_snapshot" is created which covers both the source peer and the snapshot of the target peer. In such cases, the snapshot needs to destroy the source peer too. However, if the source peer is already being destroyed triggered by gc message, it may result in a panic with a "no entry found for key" message. This commit resolves the issue by cleaning up atomic_snap_regions after the destroy, so the target peer is no longer expected to find the source peer. This cleanup is safe because the source region has already cleaned up its data and metadata from disk. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/apply.rs | 1 + components/raftstore/src/store/fsm/peer.rs | 15 ++- tests/failpoints/cases/test_merge.rs | 105 ++++++++++++++++++++ 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 252249b74b29..f70e0a31181e 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -4074,6 +4074,7 @@ where /// Handles peer destroy. When a peer is destroyed, the corresponding apply /// delegate should be removed too. fn handle_destroy(&mut self, ctx: &mut ApplyContext, d: Destroy) { + fail_point!("on_apply_handle_destroy"); assert_eq!(d.region_id, self.delegate.region_id()); if d.merge_from_snapshot { assert_eq!(self.delegate.stopped, false); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ee2daf1c3c8f..fbcfe498bb87 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3842,14 +3842,18 @@ where self.fsm.peer.tag ); } else { + // Remove itself from atomic_snap_regions as it has cleaned both + // data and metadata. let target_region_id = *meta.targets_map.get(®ion_id).unwrap(); - let is_ready = meta - .atomic_snap_regions + meta.atomic_snap_regions .get_mut(&target_region_id) .unwrap() - .get_mut(®ion_id) - .unwrap(); - *is_ready = true; + .remove(®ion_id); + info!("peer has destroyed, clean up for incoming overlapped snapshot"; + "region_id" => region_id, + "peer_id" => self.fsm.peer_id(), + "target_region_id" => target_region_id, + ); } } @@ -4984,6 +4988,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), "region" => ?region, + "destroy_regions" => ?persist_res.destroy_regions, ); let mut state = self.ctx.global_replication_state.lock().unwrap(); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index eb15c7e16fad..929afeb70f48 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -2068,3 +2068,108 @@ fn test_restart_may_lose_merging_state() { cluster.must_put(b"k400", b"v400"); } + +// If a node is isolated during merge, and the target peer is replaced by a peer +// with a larger ID, then the snapshot of the target peer covers the source +// regions as well. +// In such cases, the snapshot becomes an "atomic_snapshot" which needs to +// destroy the source peer too. +// This test case checks the race between destroying the source peer by atomic +// snapshot and the gc message. The source peer must be successfully destroyed +// in this case. +#[test_case(test_raftstore::new_node_cluster)] +fn test_destroy_race_during_atomic_snapshot_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.run(); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + // Allow raft messages to source peer on store 3 before PrepareMerge. + let left_filter_block = Arc::new(atomic::AtomicBool::new(false)); + let left_filter_block_ = left_filter_block.clone(); + let left_blocked_messages = Arc::new(Mutex::new(vec![])); + let left_filter = RegionPacketFilter::new(left.get_id(), 3) + .direction(Direction::Recv) + .when(left_filter_block.clone()) + .reserve_dropped(left_blocked_messages.clone()) + .set_msg_callback(Arc::new(move |msg: &RaftMessage| { + debug!("dbg left msg_callback"; "msg" => ?msg); + if left_filter_block.load(atomic::Ordering::SeqCst) { + return; + } + for e in msg.get_message().get_entries() { + let ctx = raftstore::store::ProposalContext::from_bytes(&e.context); + if ctx.contains(raftstore::store::ProposalContext::PREPARE_MERGE) { + // Block further messages. + left_filter_block.store(true, atomic::Ordering::SeqCst); + } + } + })); + cluster.sim.wl().add_recv_filter(3, Box::new(left_filter)); + // Block messages to target peer on store 3. + let right_filter_block = Arc::new(atomic::AtomicBool::new(true)); + let new_peer_id = 1004; + let (new_peer_id_tx, new_peer_id_rx) = std::sync::mpsc::channel(); + let new_peer_id_tx = Mutex::new(Some(new_peer_id_tx)); + let (new_peer_snap_tx, new_peer_snap_rx) = std::sync::mpsc::channel(); + let new_peer_snap_tx = Mutex::new(new_peer_snap_tx); + let right_filter = RegionPacketFilter::new(right.get_id(), 3) + .direction(Direction::Recv) + .when(right_filter_block.clone()) + .set_msg_callback(Arc::new(move |msg: &RaftMessage| { + debug!("dbg right msg_callback"; "msg" => ?msg); + if msg.get_to_peer().get_id() == new_peer_id { + let _ = new_peer_id_tx.lock().unwrap().take().map(|tx| tx.send(())); + if msg.get_message().get_msg_type() == MessageType::MsgSnapshot { + let _ = new_peer_snap_tx.lock().unwrap().send(()); + } + } + })); + cluster.sim.wl().add_recv_filter(3, Box::new(right_filter)); + pd_client.must_merge(left.get_id(), right.get_id()); + + // Make target peer on store 3 a stale peer. + pd_client.must_remove_peer(right.get_id(), find_peer(&right, 3).unwrap().to_owned()); + pd_client.must_add_peer(right.get_id(), new_peer(3, new_peer_id)); + // Unblock messages to target peer on store 3. + right_filter_block.store(false, atomic::Ordering::SeqCst); + // Wait for receiving new peer id message to destroy stale target peer. + new_peer_id_rx.recv_timeout(Duration::from_secs(5)).unwrap(); + cluster.must_region_not_exist(right.get_id(), 3); + // Let source peer continue prepare merge. It will fails to schedule merge, + // because the target peer is destroyed. + left_filter_block_.store(false, atomic::Ordering::SeqCst); + // Before sending blocked messages, make sure source peer is paused at + // destroy apply delegate, so that the new right peer snapshot can will + // try to destroy source peer before applying snapshot. + fail::cfg("on_apply_handle_destroy", "pause").unwrap(); + // Send blocked messages to source peer. Prepare merge must fail to schedule + // CommitMerge because now target peer stale peer is destroyed. + let router = cluster.sim.wl().get_router(3).unwrap(); + for raft_msg in std::mem::take(&mut *left_blocked_messages.lock().unwrap()) { + router.send_raft_message(raft_msg).unwrap(); + } + // Wait the new right peer snapshot. + new_peer_snap_rx + .recv_timeout(Duration::from_secs(5)) + .unwrap(); + // Give it some time to step snapshot message. + sleep_ms(500); + // Let source peer destroy continue, so it races with atomic snapshot destroy. + fail::remove("on_apply_handle_destroy"); + + // New peer applies snapshot eventually. + cluster.must_transfer_leader(right.get_id(), new_peer(3, new_peer_id)); + cluster.must_put(b"k4", b"v4"); +} From ca8c70d9a0ca499007a8457c9d77cdfb958823e2 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 4 Dec 2023 15:04:22 +0800 Subject: [PATCH 176/203] raftstore: Verify checksum right after SST files are generated (#16107) close tikv/tikv#15986 Verify checksum right after SST files are generated to avoid corrupted SST being transferred to other TiKVs Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_test/src/lib.rs | 8 +++- components/raftstore/src/store/snap.rs | 20 ++++++-- components/raftstore/src/store/snap/io.rs | 57 ++++++++++++++++++++--- metrics/alertmanager/tikv.rules.yml | 12 +++++ tests/failpoints/cases/test_snap.rs | 17 +++++++ tests/integrations/storage/test_titan.rs | 2 + 6 files changed, 103 insertions(+), 13 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index dd56d9a5db40..85d9d4c1b788 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -127,7 +127,7 @@ pub mod kv { } fn destroy_tablet(&self, _ctx: TabletContext, path: &Path) -> Result<()> { - encryption::trash_dir_all(path, self.db_opt.key_manager.as_deref())?; + encryption::trash_dir_all(path, self.db_opt.get_key_manager().as_deref())?; Ok(()) } @@ -202,13 +202,17 @@ pub mod ctor { #[derive(Clone, Default)] pub struct DbOptions { - pub(crate) key_manager: Option>, + key_manager: Option>, rate_limiter: Option>, state_storage: Option>, enable_multi_batch_write: bool, } impl DbOptions { + pub fn get_key_manager(&self) -> Option> { + self.key_manager.clone() + } + pub fn set_key_manager(&mut self, key_manager: Option>) { self.key_manager = key_manager; } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index a857cbffdfda..6976f4614df7 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -92,6 +92,12 @@ impl From for Error { } } +impl From for Error { + fn from(e: engine_traits::Error) -> Self { + Error::Other(Box::new(e)) + } +} + pub type Result = result::Result; impl ErrorCodeExt for Error { @@ -873,8 +879,13 @@ impl Snapshot { self.switch_to_cf_file(cf)?; let cf_file = &mut self.cf_files[self.cf_index]; let cf_stat = if plain_file_used(cf_file.cf) { - let key_mgr = self.mgr.encryption_key_manager.as_ref(); - snap_io::build_plain_cf_file::(cf_file, key_mgr, kv_snap, &begin_key, &end_key)? + snap_io::build_plain_cf_file::( + cf_file, + self.mgr.encryption_key_manager.as_ref(), + kv_snap, + &begin_key, + &end_key, + )? } else { snap_io::build_sst_cf_file_list::( cf_file, @@ -885,6 +896,7 @@ impl Snapshot { self.mgr .get_actual_max_per_file_size(allow_multi_files_snapshot), &self.mgr.limiter, + self.mgr.encryption_key_manager.clone(), )? }; SNAPSHOT_LIMIT_GENERATE_BYTES.inc_by(cf_stat.total_size as u64); @@ -1212,7 +1224,7 @@ impl Snapshot { if file_for_recving.written_size != cf_file.size[i] { return Err(io::Error::new( - ErrorKind::Other, + ErrorKind::InvalidData, format!( "snapshot file {} for cf {} size mismatches, \ real size {}, expected size {}", @@ -1227,7 +1239,7 @@ impl Snapshot { let checksum = file_for_recving.write_digest.finalize(); if checksum != cf_file.checksum[i] { return Err(io::Error::new( - ErrorKind::Other, + ErrorKind::InvalidData, format!( "snapshot file {} for cf {} checksum \ mismatches, real checksum {}, expected \ diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 952f49baf446..c897aaa25979 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -10,14 +10,15 @@ use std::{ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter, Iv}; use engine_traits::{ - CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstWriter, - SstWriterBuilder, WriteBatch, + CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstReader, + SstWriter, SstWriterBuilder, WriteBatch, }; +use fail::fail_point; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ box_try, codec::bytes::{BytesEncoder, CompactBytesFromFileDecoder}, - debug, info, + debug, error, info, time::{Instant, Limiter}, }; @@ -114,6 +115,7 @@ pub fn build_sst_cf_file_list( end_key: &[u8], raw_size_per_file: u64, io_limiter: &Limiter, + key_mgr: Option>, ) -> Result where E: KvEngine, @@ -131,6 +133,48 @@ where let sst_writer = RefCell::new(create_sst_file_writer::(engine, cf, &path)?); let mut file_length: usize = 0; + let finish_sst_writer = |sst_writer: E::SstWriter, + path: String, + key_mgr: Option>| + -> Result<(), Error> { + sst_writer.finish()?; + (|| { + fail_point!("inject_sst_file_corruption", |_| { + static CALLED: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); + if CALLED + .compare_exchange( + false, + true, + std::sync::atomic::Ordering::SeqCst, + std::sync::atomic::Ordering::SeqCst, + ) + .is_err() + { + return; + } + // overwrite the file to break checksum + let mut f = OpenOptions::new().write(true).open(&path).unwrap(); + f.write_all(b"x").unwrap(); + }); + })(); + + let sst_reader = E::SstReader::open(&path, key_mgr)?; + if let Err(e) = sst_reader.verify_checksum() { + // use sst reader to verify block checksum, it would detect corrupted SST due to + // memory bit-flip + fs::remove_file(&path)?; + error!( + "failed to pass block checksum verification"; + "file" => path, + "err" => ?e, + ); + return Err(io::Error::new(io::ErrorKind::InvalidData, e).into()); + } + File::open(&path).and_then(|f| f.sync_all())?; + Ok(()) + }; + let instant = Instant::now(); box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { let entry_len = key.len() + value.len(); @@ -149,8 +193,7 @@ where match result { Ok(new_sst_writer) => { let old_writer = sst_writer.replace(new_sst_writer); - box_try!(old_writer.finish()); - box_try!(File::open(prev_path).and_then(|f| f.sync_all())); + box_try!(finish_sst_writer(old_writer, prev_path, key_mgr.clone())); } Err(e) => { let io_error = io::Error::new(io::ErrorKind::Other, e); @@ -176,9 +219,8 @@ where Ok(true) })); if stats.key_count > 0 { + box_try!(finish_sst_writer(sst_writer.into_inner(), path, key_mgr)); cf_file.add_file(file_id); - box_try!(sst_writer.into_inner().finish()); - box_try!(File::open(path).and_then(|f| f.sync_all())); info!( "build_sst_cf_file_list builds {} files in cf {}. Total keys {}, total size {}. raw_size_per_file {}, total takes {:?}", file_id + 1, @@ -425,6 +467,7 @@ mod tests { &keys::data_key(b"z"), *max_file_size, &limiter, + db_opt.as_ref().and_then(|opt| opt.get_key_manager()), ) .unwrap(); if stats.key_count == 0 { diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index aa8530df45fe..1b460311e60e 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -1,6 +1,18 @@ groups: - name: alert.rules rules: + - alert: TiKV_critical_error + expr: sum(rate(tikv_critical_error_total[1m])) BY (type, instance) > 0 + # without the for clause will become active on the first evaluation. + labels: + env: ENV_LABELS_ENV + level: critical + expr: sum(rate(tikv_critical_error_total[1m])) BY (type, instance) > 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' + value: '{{ $value }}' + summary: TiKV encounters critical error + - alert: TiKV_memory_used_too_fast expr: process_resident_memory_bytes{job=~"tikv",instance=~".*"} - (process_resident_memory_bytes{job=~"tikv",instance=~".*"} offset 5m) > 5*1024*1024*1024 for: 5m diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 7748b1d29856..ca23b4c5a179 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -992,3 +992,20 @@ fn test_snapshot_send_failed() { sleep_ms(100); assert!(mgr.list_snapshot().unwrap().is_empty()); } + +#[test] +/// Test a corrupted snapshot can be detected and retry to generate a new one. +fn test_retry_corrupted_snapshot() { + let mut cluster = new_node_cluster(0, 3); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + let r = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + must_get_none(&cluster.get_engine(3), b"k1"); + pd_client.must_add_peer(r, new_peer(2, 2)); + fail::cfg("inject_sst_file_corruption", "return").unwrap(); + pd_client.must_add_peer(r, new_peer(3, 3)); + + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); +} diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 4bb8fee40878..752c6aaee1ac 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -376,6 +376,7 @@ fn test_delete_files_in_range_for_titan() { b"{", u64::MAX, &limiter, + None, ) .unwrap(); let mut cf_file_write = CfFile::new( @@ -392,6 +393,7 @@ fn test_delete_files_in_range_for_titan() { b"{", u64::MAX, &limiter, + None, ) .unwrap(); From e14a803902d9870266a260b3e30de2d5a3bd00a6 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Dec 2023 11:20:50 +0800 Subject: [PATCH 177/203] raftstore: clean up destroyed_region_for_snap when a peer is destroyed (#16133) ref tikv/tikv#16111 Signed-off-by: Neil Shen --- components/raftstore/src/store/fsm/peer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index fbcfe498bb87..7c33bf66b876 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3849,6 +3849,7 @@ where .get_mut(&target_region_id) .unwrap() .remove(®ion_id); + meta.destroyed_region_for_snap.remove(®ion_id); info!("peer has destroyed, clean up for incoming overlapped snapshot"; "region_id" => region_id, "peer_id" => self.fsm.peer_id(), From d76ab8f7325a4aa1c88fa94fc4122b5c415a96c5 Mon Sep 17 00:00:00 2001 From: qupeng Date: Tue, 5 Dec 2023 13:08:48 +0800 Subject: [PATCH 178/203] cdc: return server_is_busy to cdc clients if necessary (#16127) ref tikv/tikv#16035 return server_is_busy to cdc clients if necessary Signed-off-by: qupeng --- Cargo.lock | 2 +- components/cdc/src/delegate.rs | 6 ++++++ components/cdc/src/endpoint.rs | 8 +++----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13bd3c05781c..147b42405bf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2715,7 +2715,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#87bebcc0d071a18cbbd94a4fc02de9c4988af815" +source = "git+https://github.com/pingcap/kvproto.git#96c40585233f176393213dbd4c04d76259bad8f9" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 780cfe8dea66..637ecab0440d 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -206,6 +206,12 @@ impl Downstream { self.sink_error_event(region_id, err_event) } + pub fn sink_server_is_busy(&self, region_id: u64, reason: String) -> Result<()> { + let mut err_event = EventError::default(); + err_event.mut_server_is_busy().reason = reason; + self.sink_error_event(region_id, err_event) + } + pub fn set_sink(&mut self, sink: Sink) { self.sink = Some(sink); } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e1a985d4e981..9f840ab49d57 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -742,10 +742,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { - assert!(err.has_region_not_found()); + assert!(err.has_server_is_busy()); } other => panic!("unknown event {:?}", other), } From 44301d2066ba7675067f1ac5c5d94eaf629ebd65 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 5 Dec 2023 16:34:19 +0800 Subject: [PATCH 179/203] raftstore: fine-tunes slow score (#16087) ref tikv/tikv#15909 In the previous implementation, SlowScore identified a node as slow if it had hotspot regions. That is, previous SlowScore has fairly high false-positive rate. Moreover, this approach needs adjustment in sensitivity to promptly detect I/O jitters. To address this, this pr refines the algorithm by incorporating CPU usage as an additional condition to determine whether a node is slow. And based on our testing records, this modification significantly reduces the false-positive rate. Additionally, this pr has updated the default value of `inspect-interval` to `100ms` to enhance sensitivity and improve overall performance. Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- components/raftstore-v2/src/operation/life.rs | 7 +- .../raftstore-v2/src/operation/ready/mod.rs | 16 ++-- components/raftstore/src/store/config.rs | 10 ++- components/raftstore/src/store/fsm/store.rs | 13 ++- .../raftstore/src/store/local_metrics.rs | 80 +++++++++++++++---- components/raftstore/src/store/peer.rs | 18 +++-- components/raftstore/src/store/worker/pd.rs | 56 ++++++++++--- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 65 +++------------ 9 files changed, 162 insertions(+), 104 deletions(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 00df317f73a8..e9fc84643da3 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -45,6 +45,7 @@ use raftstore::{ life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, Proposal, }, + local_metrics::IoType as InspectIoType, metrics::RAFT_PEER_PENDING_DURATION, util, DiskFullPeers, Transport, WriteTask, }, @@ -579,9 +580,9 @@ impl Store { { // Record the last statistics of commit-log-duration and store-write-duration. inspector.record_store_wait(start_ts.saturating_elapsed()); - inspector.record_store_commit(ctx.raft_metrics.stat_commit_log.avg()); - // Reset the stat_commit_log and wait it to be refreshed in the next tick. - ctx.raft_metrics.stat_commit_log.reset(); + inspector.record_store_commit(ctx.raft_metrics.health_stats.avg(InspectIoType::Network)); + // Reset the health_stats and wait it to be refreshed in the next tick. + ctx.raft_metrics.health_stats.reset(); ctx.pending_latency_inspect.push(inspector); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index a2697f29f027..39ce97073599 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -43,6 +43,7 @@ use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ fsm::store::StoreRegionMeta, + local_metrics::IoType, needs_evict_entry_cache, util::{self, is_first_append_entry, is_initial_msg}, worker_metrics::SNAP_COUNTER, @@ -989,7 +990,7 @@ impl Peer { return; } let now = Instant::now(); - let stat_raft_commit_log = &mut ctx.raft_metrics.stat_commit_log; + let health_stats = &mut ctx.raft_metrics.health_stats; for i in old_index + 1..=new_index { if let Some((term, trackers)) = self.proposals().find_trackers(i) { if self.entry_storage().term(i).map_or(false, |t| t == term) { @@ -1002,14 +1003,11 @@ impl Peer { for tracker in trackers { // Collect the metrics related to commit_log // durations. - stat_raft_commit_log.record(Duration::from_nanos(tracker.observe( - now, - hist, - |t| { - t.metrics.commit_not_persisted = !commit_persisted; - &mut t.metrics.wf_commit_log_nanos - }, - ))); + let duration = tracker.observe(now, hist, |t| { + t.metrics.commit_not_persisted = !commit_persisted; + &mut t.metrics.wf_commit_log_nanos + }); + health_stats.observe(Duration::from_nanos(duration), IoType::Network); } } } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index b09afb3c6e1c..c7c65e80d6c9 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -353,6 +353,9 @@ pub struct Config { // Interval to inspect the latency of raftstore for slow store detection. pub inspect_interval: ReadableDuration, + /// Threshold of CPU utilization to inspect for slow store detection. + #[doc(hidden)] + pub inspect_cpu_util_thd: f64, // The unsensitive(increase it to reduce sensitiveness) of the cause-trend detection pub slow_trend_unsensitive_cause: f64, @@ -517,7 +520,12 @@ impl Default for Config { region_max_size: ReadableSize(0), region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), - inspect_interval: ReadableDuration::millis(500), + inspect_interval: ReadableDuration::millis(100), + // The default value of `inspect_cpu_util_thd` is 0.4, which means + // when the cpu utilization is greater than 40%, the store might be + // regarded as a slow node if there exists delayed inspected messages. + // It's good enough for most cases to reduce the false positive rate. + inspect_cpu_util_thd: 0.4, // The param `slow_trend_unsensitive_cause == 2.0` can yield good results, // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled slow_trend_unsensitive_cause: 10.0, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 8c8919df67e5..9c3274d7945e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -93,7 +93,7 @@ use crate::{ ApplyBatchSystem, ApplyNotifier, ApplyPollerBuilder, ApplyRes, ApplyRouter, ApplyTaskRes, }, - local_metrics::RaftMetrics, + local_metrics::{IoType as InspectIoType, RaftMetrics}, memory::*, metrics::*, peer_storage, @@ -847,9 +847,14 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> mut inspector, } => { inspector.record_store_wait(send_time.saturating_elapsed()); - inspector.record_store_commit(self.ctx.raft_metrics.stat_commit_log.avg()); - // Reset the stat_commit_log and wait it to be refreshed in the next tick. - self.ctx.raft_metrics.stat_commit_log.reset(); + inspector.record_store_commit( + self.ctx + .raft_metrics + .health_stats + .avg(InspectIoType::Network), + ); + // Reset the health_stats and wait it to be refreshed in the next tick. + self.ctx.raft_metrics.health_stats.reset(); self.ctx.pending_latency_inspect.push(inspector); } StoreMsg::UnsafeRecoveryReport(report) => self.store_heartbeat_pd(Some(report)), diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 7207ac7869d6..dc94a3afbe79 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -68,35 +68,81 @@ impl RaftSendMessageMetrics { } } +/// Buffered statistics for recording local raftstore message duration. +/// +/// As it's only used for recording local raftstore message duration, +/// and it will be manually reset preiodically, so it's not necessary +/// to use `LocalHistogram`. #[derive(Default)] -pub struct RaftCommitLogStatistics { - pub last_commit_log_duration_sum: Duration, - pub last_commit_log_count_sum: u64, +struct LocalHealthStatistics { + duration_sum: Duration, + count: u64, } -impl RaftCommitLogStatistics { +impl LocalHealthStatistics { #[inline] - pub fn record(&mut self, dur: Duration) { - self.last_commit_log_count_sum += 1; - self.last_commit_log_duration_sum += dur; + fn observe(&mut self, dur: Duration) { + self.count += 1; + self.duration_sum += dur; } #[inline] - pub fn avg(&self) -> Duration { - if self.last_commit_log_count_sum > 0 { - Duration::from_micros( - self.last_commit_log_duration_sum.as_micros() as u64 - / self.last_commit_log_count_sum, - ) + fn avg(&self) -> Duration { + if self.count > 0 { + Duration::from_micros(self.duration_sum.as_micros() as u64 / self.count) } else { Duration::default() } } #[inline] + fn reset(&mut self) { + self.count = 0; + self.duration_sum = Duration::default(); + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IoType { + Disk = 0, + Network = 1, +} + +/// Buffered statistics for recording the health of raftstore. +#[derive(Default)] +pub struct HealthStatistics { + // represents periodic latency on the disk io. + disk_io_dur: LocalHealthStatistics, + // represents the latency of the network io. + network_io_dur: LocalHealthStatistics, +} + +impl HealthStatistics { + #[inline] + pub fn observe(&mut self, dur: Duration, io_type: IoType) { + match io_type { + IoType::Disk => self.disk_io_dur.observe(dur), + IoType::Network => self.network_io_dur.observe(dur), + } + } + + #[inline] + pub fn avg(&self, io_type: IoType) -> Duration { + match io_type { + IoType::Disk => self.disk_io_dur.avg(), + IoType::Network => self.network_io_dur.avg(), + } + } + + #[inline] + /// Reset HealthStatistics. + /// + /// Should be manually reset when the metrics are + /// accepted by slowness inspector. pub fn reset(&mut self) { - self.last_commit_log_count_sum = 0; - self.last_commit_log_duration_sum = Duration::default(); + self.disk_io_dur.reset(); + self.network_io_dur.reset(); } } @@ -133,7 +179,7 @@ pub struct RaftMetrics { pub wf_commit_not_persist_log: LocalHistogram, // local statistics for slowness - pub stat_commit_log: RaftCommitLogStatistics, + pub health_stats: HealthStatistics, pub check_stale_peer: LocalIntCounter, pub leader_missing: Arc>>, @@ -172,7 +218,7 @@ impl RaftMetrics { wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), - stat_commit_log: RaftCommitLogStatistics::default(), + health_stats: HealthStatistics::default(), check_stale_peer: CHECK_STALE_PEER_COUNTER.local(), leader_missing: Arc::default(), last_flush_time: Instant::now_coarse(), diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 185ab9d2a925..2d304490bb7a 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -71,7 +71,7 @@ use uuid::Uuid; use super::{ cmd_resp, - local_metrics::RaftMetrics, + local_metrics::{IoType, RaftMetrics}, metrics::*, peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, read_queue::{ReadIndexQueue, ReadIndexRequest}, @@ -1860,7 +1860,7 @@ where Ok(()) } - fn report_persist_log_duration(&self, pre_persist_index: u64, metrics: &RaftMetrics) { + fn report_persist_log_duration(&self, pre_persist_index: u64, metrics: &mut RaftMetrics) { if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } @@ -1909,9 +1909,15 @@ where t.metrics.commit_not_persisted = !commit_persisted; &mut t.metrics.wf_commit_log_nanos }); + // Normally, commit_log_duration both contains the duraiton on persisting + // raft logs and transferring raft logs to other nodes. Therefore, it can + // reflects slowness of the node on I/Os, whatever the reason is. + // Here, health_stats uses the recorded commit_log_duration as the + // latency to perspect whether there exists jitters on network. It's not + // accurate, but it's proved that it's a good approximation. metrics - .stat_commit_log - .record(Duration::from_nanos(duration)); + .health_stats + .observe(Duration::from_nanos(duration), IoType::Network); } } } @@ -3129,7 +3135,7 @@ where let pre_persist_index = self.raft_group.raft.raft_log.persisted; let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.on_persist_ready(self.persisted_number); - self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); + self.report_persist_log_duration(pre_persist_index, &mut ctx.raft_metrics); self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; @@ -3174,7 +3180,7 @@ where let pre_persist_index = self.raft_group.raft.raft_log.persisted; let pre_commit_index = self.raft_group.raft.raft_log.committed; let mut light_rd = self.raft_group.advance_append(ready); - self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); + self.report_persist_log_duration(pre_persist_index, &mut ctx.raft_metrics); self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index b5bb189d84b6..71ab6a9e2a93 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -41,7 +41,7 @@ use tikv_util::{ box_err, debug, error, info, metrics::ThreadInfoStatistics, store::QueryStats, - sys::thread::StdThreadBuildWrapper, + sys::{thread::StdThreadBuildWrapper, SysQuota}, thd_name, time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, @@ -225,6 +225,9 @@ pub struct StoreStat { pub store_cpu_usages: RecordPairVec, pub store_read_io_rates: RecordPairVec, pub store_write_io_rates: RecordPairVec, + + store_cpu_quota: f64, // quota of cpu usage + store_cpu_busy_thd: f64, } impl Default for StoreStat { @@ -249,10 +252,33 @@ impl Default for StoreStat { store_cpu_usages: RecordPairVec::default(), store_read_io_rates: RecordPairVec::default(), store_write_io_rates: RecordPairVec::default(), + + store_cpu_quota: 0.0_f64, + store_cpu_busy_thd: 0.8_f64, } } } +impl StoreStat { + fn set_cpu_quota(&mut self, cpu_cores: f64, busy_thd: f64) { + self.store_cpu_quota = cpu_cores * 100.0; + self.store_cpu_busy_thd = busy_thd; + } + + fn maybe_busy(&self) -> bool { + if self.store_cpu_quota < 1.0 || self.store_cpu_busy_thd > 1.0 { + return false; + } + + let mut cpu_usage = 0_u64; + for record in self.store_cpu_usages.iter() { + cpu_usage += record.get_value(); + } + + (cpu_usage as f64 / self.store_cpu_quota) >= self.store_cpu_busy_thd + } +} + #[derive(Default)] pub struct PeerStat { pub read_bytes: u64, @@ -836,14 +862,14 @@ impl SlowScore { } } - fn record(&mut self, id: u64, duration: Duration) { + fn record(&mut self, id: u64, duration: Duration, not_busy: bool) { self.last_record_time = Instant::now(); if id != self.last_tick_id { return; } self.last_tick_finished = true; self.total_requests += 1; - if duration >= self.inspect_interval { + if not_busy && duration >= self.inspect_interval { self.timeout_requests += 1; } } @@ -1027,6 +1053,8 @@ where causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_manager: GrpcServiceManager, ) -> Runner { + let mut store_stat = StoreStat::default(); + store_stat.set_cpu_quota(SysQuota::cpu_cores_quota(), cfg.inspect_cpu_util_thd); let store_heartbeat_interval = cfg.pd_store_heartbeat_tick_interval.0; let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; let mut stats_monitor = StatsMonitor::new( @@ -1045,7 +1073,7 @@ where is_hb_receiver_scheduled: false, region_peers: HashMap::default(), region_buckets: HashMap::default(), - store_stat: StoreStat::default(), + store_stat, start_ts: UnixSecs::now(), scheduler, store_heartbeat_interval, @@ -2269,8 +2297,11 @@ where Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => { // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. - self.slow_score - .record(id, duration.delays_on_disk_io(false)); + self.slow_score.record( + id, + duration.delays_on_disk_io(false), + !self.store_stat.maybe_busy(), + ); self.slow_trend.record(duration); } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), @@ -2310,7 +2341,12 @@ where self.update_health_status(ServingStatus::Serving); } if !self.slow_score.last_tick_finished { - self.slow_score.record_timeout(); + // If the last tick is not finished, it means that the current store might + // be busy on handling requests or delayed on I/O operations. And only when + // the current store is not busy, it should record the last_tick as a timeout. + if !self.store_stat.maybe_busy() { + self.slow_score.record_timeout(); + } // If the last slow_score already reached abnormal state and was delayed for // reporting by `store-heartbeat` to PD, we should report it here manually as // a FAKE `store-heartbeat`. @@ -2342,17 +2378,17 @@ where STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_process"]) .observe(tikv_util::time::duration_to_sec( - duration.store_process_duration.unwrap(), + duration.store_process_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_wait"]) .observe(tikv_util::time::duration_to_sec( - duration.store_wait_duration.unwrap(), + duration.store_wait_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_commit"]) .observe(tikv_util::time::duration_to_sec( - duration.store_commit_duration.unwrap(), + duration.store_commit_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 5e7e4529c405..f1628cda50e1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -248,6 +248,7 @@ fn test_serde_custom_tikv_config() { io_reschedule_concurrent_max_count: 1234, io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), + inspect_cpu_util_thd: 0.666, check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index d1e83663c24f..61a2a24b43a8 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -3,7 +3,7 @@ slow-log-threshold = "1s" panic-when-unexpected-key-or-data = true abort-on-panic = true memory-usage-limit = "10GB" -memory-usage-high-water= 0.65 +memory-usage-high-water = 0.65 [log] level = "fatal" @@ -134,9 +134,7 @@ export-priority = "high" other-priority = "low" [pd] -endpoints = [ - "example.com:443", -] +endpoints = ["example.com:443"] [metric] job = "tikv_1" @@ -223,6 +221,7 @@ waterfall-metrics = true io-reschedule-concurrent-max-count = 1234 io-reschedule-hotpot-duration = "4321s" inspect-interval = "444ms" +inspect-cpu-util-thd = 0.666 check-leader-lease-interval = "123ms" renew-leader-lease-advance-duration = "456ms" reactive-memory-lock-tick-interval = "566ms" @@ -302,15 +301,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] bottommost-level-compression = "disable" bottommost-zstd-compression-dict-size = 1024 bottommost-zstd-compression-sample-size = 1024 @@ -374,15 +365,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -428,15 +411,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" write-buffer-limit = "16MB" max-write-buffer-number = 12 @@ -483,15 +458,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -569,15 +536,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -638,9 +597,7 @@ ca-path = "invalid path" cert-path = "invalid path" key-path = "invalid path" redact-info-log = true -cert-allowed-cn = [ - "example.tikv.com", -] +cert-allowed-cn = ["example.tikv.com"] [security.encryption] data-encryption-method = "aes128-ctr" @@ -688,9 +645,9 @@ enable-compaction-filter = false compaction-filter-skip-version-check = true [pessimistic-txn] -enabled = false # test backward compatibility +enabled = false # test backward compatibility wait-for-lock-timeout = "10ms" -wake-up-delay-duration = 100 # test backward compatibility +wake-up-delay-duration = 100 # test backward compatibility pipelined = false in-memory = false From 3c2fcd7ce1852c2412e3aa196c3fd0c91ecd477f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 6 Dec 2023 10:35:48 +0800 Subject: [PATCH 180/203] metrics: fix heatmap on grafana 9 (#16145) close tikv/tikv#16144 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/common.py | 4 + metrics/grafana/tikv_details.json | 216 +++++++++++------------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 113 insertions(+), 109 deletions(-) diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index cb6757bee937..2c2ed7570ed2 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -940,6 +940,9 @@ def heatmap_panel( # the resolution is too high. # See: https://grafana.com/blog/2020/06/23/how-to-visualize-prometheus-histograms-in-grafana/ maxDataPoints=512, + # Fix grafana heatmap migration panic if options is null. + # See: https://github.com/grafana/grafana/blob/v9.5.14/public/app/plugins/panel/heatmap/migrations.ts#L17 + options={}, ) @@ -1081,6 +1084,7 @@ def heatmap_panel_graph_panel_histogram_quantile_pairs( description=graph_description, metric=f"{metric}", yaxes=yaxes(left_format=yaxis_format), + label_selectors=label_selectors, by_labels=graph_by_labels, hide_count=True, ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 6f8dffa27cd9..11833e02ce05 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4191,7 +4191,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -4496,7 +4496,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -4733,7 +4733,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15259,7 +15259,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15413,7 +15413,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15421,14 +15421,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15436,14 +15436,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -15451,14 +15451,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -15466,7 +15466,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -15564,7 +15564,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15869,7 +15869,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -16174,7 +16174,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -16479,7 +16479,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -17639,7 +17639,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -17944,7 +17944,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18249,7 +18249,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18554,7 +18554,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18658,7 +18658,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -19381,7 +19381,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -19485,7 +19485,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -23398,7 +23398,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24413,7 +24413,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24567,7 +24567,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24575,14 +24575,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24590,14 +24590,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -24605,14 +24605,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -24620,7 +24620,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -24718,7 +24718,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24872,7 +24872,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24880,14 +24880,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24895,14 +24895,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -24910,14 +24910,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -24925,7 +24925,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25023,7 +25023,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25177,7 +25177,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25185,14 +25185,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25200,14 +25200,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25215,14 +25215,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25230,7 +25230,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25328,7 +25328,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25482,7 +25482,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25490,14 +25490,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25505,14 +25505,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25520,14 +25520,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25535,7 +25535,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25633,7 +25633,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25787,7 +25787,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25795,14 +25795,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25810,14 +25810,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25825,14 +25825,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25840,7 +25840,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -26997,7 +26997,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -29849,7 +29849,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -30542,7 +30542,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -34316,7 +34316,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -37131,7 +37131,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -43838,7 +43838,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -51579,7 +51579,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52302,7 +52302,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52406,7 +52406,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52510,7 +52510,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52747,7 +52747,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52851,7 +52851,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52955,7 +52955,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -53458,7 +53458,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -54668,7 +54668,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -55038,7 +55038,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57015,7 +57015,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57119,7 +57119,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57371,7 +57371,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57475,7 +57475,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57579,7 +57579,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57846,7 +57846,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59248,7 +59248,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59352,7 +59352,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59456,7 +59456,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59560,7 +59560,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59664,7 +59664,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59768,7 +59768,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59872,7 +59872,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63511,7 +63511,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63615,7 +63615,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63719,7 +63719,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63823,7 +63823,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63927,7 +63927,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64031,7 +64031,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64135,7 +64135,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64239,7 +64239,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -65274,7 +65274,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -65378,7 +65378,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 81ae429e3612..978a1d4c9aa3 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -d496158baafb3f61d8f4dca2a8434031ad6092b93f3aeecb4fd2947df09a8caf ./metrics/grafana/tikv_details.json +ad780a5aca1d52f0f3a84780fc94af71195f3b8a59bc915ea5bf37a15ae5a357 ./metrics/grafana/tikv_details.json From c1f7c4aab03398517de4ebc589a24f2239b44463 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 6 Dec 2023 14:07:20 +0800 Subject: [PATCH 181/203] Fix the QPS metrics in tikv_details.json. (#16149) close tikv/tikv#16148 Fix the issue where the filter label of QPS, in the tikv grafana, lacks the label `type`. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.dashboard.py | 1 + metrics/grafana/tikv_details.json | 4 ++-- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 6ab065f5433c..5c84152174e8 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -316,6 +316,7 @@ def Cluster() -> RowPanel: expr=expr_sum_rate( "tikv_grpc_msg_duration_seconds_count", label_selectors=['type!="kv_gc"'], + by_labels=["instance", "type"], ), legend_format=r"{{instance}}-{{type}}", ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 11833e02ce05..8c6bea179929 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1469,7 +1469,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, @@ -1477,7 +1477,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance, type) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 978a1d4c9aa3..330822d3c7bc 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -ad780a5aca1d52f0f3a84780fc94af71195f3b8a59bc915ea5bf37a15ae5a357 ./metrics/grafana/tikv_details.json +dbcc3ef2b588c133dbe4b56196abb366da5b25631f6d42bbc6ae1811b21bbec5 ./metrics/grafana/tikv_details.json From 54d7d425c9813a066308ddea85ee027e8faf5682 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 6 Dec 2023 17:02:50 +0800 Subject: [PATCH 182/203] resource_control: replace limited future's post delay with delay before first poll (#16100) ref tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/future.rs | 63 +++++++++++-------- .../resource_control/src/resource_limiter.rs | 37 ++++++----- components/resource_control/src/service.rs | 2 + components/resource_control/src/worker.rs | 31 +++++---- 4 files changed, 81 insertions(+), 52 deletions(-) diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 53bca48b3019..0750a21c5742 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -92,7 +92,9 @@ pub struct LimitedFuture { #[pin] post_delay: OptionalFuture>, resource_limiter: Arc, - res: Poll, + // if the future is first polled, we need to let it consume a 0 value + // to compensate the debt of previously finished tasks. + is_first_poll: bool, } impl LimitedFuture { @@ -102,7 +104,7 @@ impl LimitedFuture { pre_delay: None.into(), post_delay: None.into(), resource_limiter, - res: Poll::Pending, + is_first_poll: true, } } } @@ -112,19 +114,32 @@ impl Future for LimitedFuture { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut this = self.project(); - if !this.post_delay.is_done() { - assert!(this.pre_delay.is_done()); + if *this.is_first_poll { + debug_assert!(this.pre_delay.finished && this.post_delay.finished); + *this.is_first_poll = false; + let wait_dur = this + .resource_limiter + .consume(Duration::ZERO, IoBytes::default(), true) + .min(MAX_WAIT_DURATION); + if wait_dur > Duration::ZERO { + *this.pre_delay = Some( + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + wait_dur) + .compat(), + ) + .into(); + } + } + if !this.post_delay.finished { + assert!(this.pre_delay.finished); std::mem::swap(&mut *this.pre_delay, &mut *this.post_delay); } - if !this.pre_delay.is_done() { + if !this.pre_delay.finished { let res = this.pre_delay.poll(cx); if res.is_pending() { return Poll::Pending; } } - if this.res.is_ready() { - return std::mem::replace(this.res, Poll::Pending); - } // get io stats is very expensive, so we only do so if only io control is // enabled. let mut last_io_bytes = None; @@ -157,8 +172,10 @@ impl Future for LimitedFuture { } else { IoBytes::default() }; - let mut wait_dur = this.resource_limiter.consume(dur, io_bytes); - if wait_dur == Duration::ZERO { + let mut wait_dur = this + .resource_limiter + .consume(dur, io_bytes, res.is_pending()); + if wait_dur == Duration::ZERO || res.is_ready() { return res; } if wait_dur > MAX_WAIT_DURATION { @@ -171,31 +188,24 @@ impl Future for LimitedFuture { .compat(), ) .into(); - if this.post_delay.poll(cx).is_ready() { - return res; - } - *this.res = res; + _ = this.post_delay.poll(cx); Poll::Pending } } /// `OptionalFuture` is similar to futures::OptionFuture, but provide an extra -/// `is_done` method. +/// `finished` flag to determine if the future requires poll. #[pin_project] struct OptionalFuture { #[pin] f: Option, - done: bool, + finished: bool, } impl OptionalFuture { fn new(f: Option) -> Self { - let done = f.is_none(); - Self { f, done } - } - - fn is_done(&self) -> bool { - self.done + let finished = f.is_none(); + Self { f, finished } } } @@ -212,7 +222,7 @@ impl Future for OptionalFuture { let this = self.project(); match this.f.as_pin_mut() { Some(x) => x.poll(cx).map(|r| { - *this.done = true; + *this.finished = true; Some(r) }), None => Poll::Ready(None), @@ -312,7 +322,7 @@ mod tests { let delta = new_stats - stats; let dur = start.saturating_elapsed(); assert_eq!(delta.total_consumed, 150); - assert_eq!(delta.total_wait_dur_us, 150_000); + assert!(delta.total_wait_dur_us >= 140_000 && delta.total_wait_dur_us <= 160_000); assert!(dur >= Duration::from_millis(150) && dur <= Duration::from_millis(160)); // fetch io bytes failed, consumed value is 0. @@ -320,7 +330,10 @@ mod tests { { fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - assert_eq!(resource_limiter.get_limit_statistics(Io), new_stats); + assert_eq!( + resource_limiter.get_limit_statistics(Io).total_consumed, + new_stats.total_consumed + ); fail::remove("failed_to_get_thread_io_bytes_stats"); } } diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index bce6867ac2ea..ab2144f18ccf 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -71,19 +71,22 @@ impl ResourceLimiter { self.is_background } - pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { + pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes, wait: bool) -> Duration { let cpu_dur = - self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); - let io_dur = self.limiters[ResourceType::Io as usize].consume_io(io_bytes); + self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64, wait); + let io_dur = self.limiters[ResourceType::Io as usize].consume_io(io_bytes, wait); let wait_dur = cpu_dur.max(io_dur); - BACKGROUND_TASKS_WAIT_DURATION - .with_label_values(&[&self.name]) - .inc_by(wait_dur.as_micros() as u64); + if wait_dur > Duration::ZERO { + BACKGROUND_TASKS_WAIT_DURATION + .with_label_values(&[&self.name]) + .inc_by(wait_dur.as_micros() as u64); + } + wait_dur } pub async fn async_consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { - let dur = self.consume(cpu_time, io_bytes); + let dur = self.consume(cpu_time, io_bytes, true); if !dur.is_zero() { _ = GLOBAL_TIMER_HANDLE .delay(Instant::now() + dur) @@ -154,12 +157,14 @@ impl QuotaLimiter { ) } - fn consume(&self, value: u64) -> Duration { - if value == 0 { + fn consume(&self, value: u64, wait: bool) -> Duration { + if value == 0 && self.limiter.speed_limit().is_infinite() { return Duration::ZERO; } - let dur = self.limiter.consume_duration(value as usize); - if dur != Duration::ZERO { + let mut dur = self.limiter.consume_duration(value as usize); + if !wait { + dur = Duration::ZERO; + } else if dur != Duration::ZERO { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } @@ -167,16 +172,18 @@ impl QuotaLimiter { dur } - fn consume_io(&self, value: IoBytes) -> Duration { + fn consume_io(&self, value: IoBytes, wait: bool) -> Duration { self.read_bytes.fetch_add(value.read, Ordering::Relaxed); self.write_bytes.fetch_add(value.write, Ordering::Relaxed); let value = value.read + value.write; - if value == 0 { + if value == 0 && self.limiter.speed_limit().is_infinite() { return Duration::ZERO; } - let dur = self.limiter.consume_duration(value as usize); - if dur != Duration::ZERO { + let mut dur = self.limiter.consume_duration(value as usize); + if !wait { + dur = Duration::ZERO; + } else if dur != Duration::ZERO { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 2c2bbdc55490..26652cda00e0 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -575,6 +575,7 @@ pub mod tests { read: 1000, write: 1000, }, + true, ); // Wait for report ru metrics. std::thread::sleep(Duration::from_millis(100)); @@ -595,6 +596,7 @@ pub mod tests { read: 2000, write: 2000, }, + true, ); // Wait for report ru metrics. std::thread::sleep(Duration::from_millis(100)); diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 2ea72f132eed..b90787914d6d 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -700,6 +700,7 @@ mod tests { read: 1000, write: 1000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -729,6 +730,7 @@ mod tests { read: 1000, write: 1000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -747,6 +749,7 @@ mod tests { read: 5000, write: 5000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -798,6 +801,7 @@ mod tests { read: 600, write: 600, }, + true, ); bg_limiter.consume( Duration::from_millis(1800), @@ -805,6 +809,7 @@ mod tests { read: 900, write: 900, }, + true, ); worker.adjust_quota(); check_limiter( @@ -873,6 +878,7 @@ mod tests { read: 600, write: 600, }, + true, ); new_bg_limiter.consume( Duration::from_millis(1800), @@ -880,6 +886,7 @@ mod tests { read: 900, write: 900, }, + true, ); worker.adjust_quota(); @@ -954,7 +961,7 @@ mod tests { // only default group, always return infinity. reset_quota(&mut worker, 6.4); - priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default()); + priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default(), true); worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); @@ -964,46 +971,46 @@ mod tests { resource_ctl.add_resource_group(rg2); reset_quota(&mut worker, 6.4); - priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default()); + priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default(), true); worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 1.2); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 2.6, 0.6); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); reset_quota(&mut worker, 8.0); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 2.8); reset_quota(&mut worker, 6.0); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 5.2); From 8218b14797c8bdb0cb54ca0830448cafc56edd29 Mon Sep 17 00:00:00 2001 From: Shirly Date: Wed, 6 Dec 2023 17:19:21 +0800 Subject: [PATCH 183/203] raftstore/util: fix errors in comments of get_min_resolved_ts (#16140) ref tikv/tikv#13437 Signed-off-by: Shirly Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/util.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d1ef3fde75ad..6eef4c61686f 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1193,14 +1193,15 @@ impl RegionReadProgressRegistry { } // Get the minimum `resolved_ts` which could ensure that there will be no more - // locks whose `start_ts` is greater than it. + // locks whose `commit_ts` is smaller than it. pub fn get_min_resolved_ts(&self) -> u64 { self.registry .lock() .unwrap() .iter() .map(|(_, rrp)| rrp.resolved_ts()) - .filter(|ts| *ts != 0) // ts == 0 means the peer is uninitialized + //TODO: the uninitialized peer should be taken into consideration instead of skipping it(https://github.com/tikv/tikv/issues/15506). + .filter(|ts| *ts != 0) // ts == 0 means the peer is uninitialized, .min() .unwrap_or(0) } From 213299221806959c95d05d0f2d7368e597fa9281 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 6 Dec 2023 19:37:20 +0800 Subject: [PATCH 184/203] server: Remove periodic heap profiling (#16151) ref tikv/tikv#15927 Remove periodic heap profiling Signed-off-by: Connor1996 --- components/server/src/server.rs | 1 - components/server/src/server2.rs | 1 - doc/http.md | 58 +---- src/config/mod.rs | 12 +- src/server/status_server/mod.rs | 160 ++---------- src/server/status_server/profile.rs | 269 +-------------------- tests/integrations/server/status_server.rs | 1 - 7 files changed, 37 insertions(+), 465 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 059cda0bb91e..00ab39a0e6af 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1450,7 +1450,6 @@ where self.cfg_controller.take().unwrap(), Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.core.store_path.clone(), self.resource_manager.clone(), self.grpc_service_mgr.clone(), ) { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 636a4bc9282b..38f5e94038f3 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1296,7 +1296,6 @@ where self.cfg_controller.clone().unwrap(), Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.core.store_path.clone(), self.resource_manager.clone(), self.grpc_service_mgr.clone(), ) { diff --git a/doc/http.md b/doc/http.md index 5aff02e75eae..625af0340911 100644 --- a/doc/http.md +++ b/doc/http.md @@ -36,76 +36,25 @@ The server will return CPU profiling data. The response format is determined by The raw profile data can be handled by `pprof` tool. For example, use `go tool pprof --http=0.0.0.0:1234 xxx.proto` to open a interactive web browser. -## Activate Heap Profiling - -Activate heap profiling of jemalloc. When activated, jemalloc would collect memory usage at malloc, demalloc, etc., walking the call stack to capture a backtrace. So it would affect performance in some extent. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_activate?interval=' -``` - -#### Parameters - -- **interval** (optional): Specifies the interval (in seconds) for dumping heap profiles in a temporary directory under TiKV data directory. If set to 0, period dumping is disable. You can dump heap profiles manually by the other API. - - Default: 0 - - Example: `?interval=60` - -#### Response - -A confirmation message indicating whether heap profiling activation was successful. If it has been already activated, it would return a error message without any side effect. - -## Deactivate Heap Profiling - -Deactivate the currently running heap profiling. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_deactivate' -``` - -#### Response - -If heap profiling is active, it will be stopped. The server will return a message indicating whether the deactivation was successful. -If heap profiling is not currently active, the server will return a message indicating that no heap profiling is running. - -## List Heap Profiles - -List available heap profiling profiles which are periodically dumped when activated by `heap_activate` API with `interval` specified. - -Note that, once deactivation is performed, all existing profiles will be deleted. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_list' -``` - -#### Response - -It will return a list of profiles, each represented as a file name and last modification timestamp, in plain text format. The profiles are sorted in reverse order based on their modification timestamps. - -If there are no available heap profiles or heap profiling is inactive, the server will return an empty list. - -## Retrieve Heap Profile +## Heap Profiling Collect and export heap profiling data. Note that, heap profile is not like CPU profile which is collected within the specified time range right after the request. Instead, heap profile is just a snapshot of the accumulated memory usage at the time of request, as the memory usage is always being collected once activated. ```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?name=&jeprof=' +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?jeprof=' ``` #### Parameters -- **name** (optional): Specifies the name of the heap profile to retrieve. If not specified, a heap profile will be retrieved. - - Default: `` - - Example: `?name=000001.heap` - - **jeprof** (optional): Indicates whether to use Jeprof to process the heap profile to generate call graph. It needs `perl` being installed. - Default: false - Example: `?jeprof=true` #### Response -The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format. Otherwise, the response will be raw profile data in jemalloc dedicated format. +The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format generated by `jeprof` needing `perl` installed in the TiKV environment. Otherwise, the response will be raw profile data in jemalloc dedicated format. ## Heap Profile Symbolization @@ -139,4 +88,3 @@ curl -X POST -d '' 'http://$TIKV_ADDRESS/debug/pprof/symbol' #### Response A list of resolved symbols in plain text. Each line represented as a hexadecimal address followed by the corresponding function name. If a memory address cannot be resolved, it will be marked with "??". - diff --git a/src/config/mod.rs b/src/config/mod.rs index e5df8c3e153e..aac91aaaedf0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -82,7 +82,6 @@ use crate::{ server::{ gc_worker::{GcConfig, RawCompactionFilterFactory, WriteCompactionFilterFactory}, lock_manager::Config as PessimisticTxnConfig, - status_server::HEAP_PROFILE_ACTIVE, ttl::TtlCompactionFilterFactory, Config as ServerConfig, CONFIG_ROCKSDB_GAUGE, }, @@ -3272,12 +3271,10 @@ impl Default for MemoryConfig { impl MemoryConfig { pub fn init(&self) { if self.enable_heap_profiling { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); if let Err(e) = tikv_alloc::activate_prof() { error!("failed to enable heap profiling"; "err" => ?e); return; } - *activate = Some(None); tikv_alloc::set_prof_sample(self.profiling_sample_per_bytes.0).unwrap(); } } @@ -3289,16 +3286,9 @@ impl ConfigManager for MemoryConfigManager { fn dispatch(&mut self, changes: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Bool(enable)) = changes.get("enable_heap_profiling") { if *enable { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - // already enabled by HTTP API, do nothing - if activate.is_none() { - tikv_alloc::activate_prof()?; - *activate = Some(None); - } + tikv_alloc::activate_prof()?; } else { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); tikv_alloc::deactivate_prof()?; - *activate = None; } } diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 60b267a6d944..9a2bb5743ae9 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -6,7 +6,6 @@ use std::{ env::args, error::Error as StdError, net::SocketAddr, - path::PathBuf, pin::Pin, str::{self, FromStr}, sync::Arc, @@ -18,7 +17,7 @@ use async_stream::stream; use collections::HashMap; use flate2::{write::GzEncoder, Compression}; use futures::{ - compat::{Compat01As03, Stream01CompatExt}, + compat::Compat01As03, future::{ok, poll_fn}, prelude::*, }; @@ -40,7 +39,6 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; -pub use profile::HEAP_PROFILE_ACTIVE; use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; @@ -57,7 +55,7 @@ use tikv_util::{ }; use tokio::{ io::{AsyncRead, AsyncWrite}, - runtime::{Builder, Handle, Runtime}, + runtime::{Builder, Runtime}, sync::oneshot::{self, Receiver, Sender}, }; use tokio_openssl::SslStream; @@ -91,7 +89,6 @@ pub struct StatusServer { cfg_controller: ConfigController, router: R, security_config: Arc, - store_path: PathBuf, resource_manager: Option>, grpc_service_mgr: GrpcServiceManager, } @@ -105,7 +102,6 @@ where cfg_controller: ConfigController, security_config: Arc, router: R, - store_path: PathBuf, resource_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result { @@ -128,120 +124,27 @@ where cfg_controller, router, security_config, - store_path, resource_manager, grpc_service_mgr, }) } - fn list_heap_prof(_req: Request) -> hyper::Result> { - let profiles = match list_heap_profiles() { - Ok(s) => s, - Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), - }; - - let text = profiles - .into_iter() - .map(|(f, ct)| format!("{}\t\t{}", f, ct)) - .collect::>() - .join("\n") - .into_bytes(); - - let response = Response::builder() - .header("Content-Type", mime::TEXT_PLAIN.to_string()) - .header("Content-Length", text.len()) - .body(text.into()) - .unwrap(); - Ok(response) - } - - async fn activate_heap_prof( - req: Request, - store_path: PathBuf, - ) -> hyper::Result> { - let query = req.uri().query().unwrap_or(""); - let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); - - let interval: u64 = match query_pairs.get("interval") { - Some(val) => match val.parse() { - Ok(val) => val, - Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), - }, - None => 0, - }; - - let period = if interval == 0 { - None - } else { - let interval = Duration::from_secs(interval); - Some( - GLOBAL_TIMER_HANDLE - .interval(Instant::now() + interval, interval) - .compat() - .map_ok(|_| ()) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_stream(), - ) - }; - let (tx, rx) = oneshot::channel(); - let callback = move || tx.send(()).unwrap_or_default(); - let res = Handle::current().spawn(activate_heap_profile(period, store_path, callback)); - if rx.await.is_ok() { - let msg = "activate heap profile success"; - Ok(make_response(StatusCode::OK, msg)) - } else { - let errmsg = format!("{:?}", res.await); - Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, errmsg)) - } - } - - fn deactivate_heap_prof(_req: Request) -> hyper::Result> { - let body = if deactivate_heap_profile() { - "deactivate heap profile success" - } else { - "no heap profile is running" - }; - Ok(make_response(StatusCode::OK, body)) - } - async fn dump_heap_prof_to_resp(req: Request) -> hyper::Result> { let query = req.uri().query().unwrap_or(""); let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); - let result = if let Some(name) = query_pairs.get("name") { - let re = Regex::new(HEAP_PROFILE_REGEX).unwrap(); - if !re.is_match(name) { - let errmsg = format!("heap profile name {} is invalid", name); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); - } - let profiles = match list_heap_profiles() { - Ok(s) => s, + let result = { + let path = match dump_one_heap_profile() { + Ok(path) => path, Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), }; - if profiles.iter().any(|(f, _)| f == name) { - let dir = match heap_profiles_dir() { - Some(path) => path, - None => { - return Ok(make_response( - StatusCode::INTERNAL_SERVER_ERROR, - "heap profile is not active", - )); - } - }; - let path = dir.join(name.as_ref()); - if use_jeprof { - jeprof_heap_profile(path.to_str().unwrap()) - } else { - read_file(path.to_str().unwrap()) - } + if use_jeprof { + jeprof_heap_profile(path.to_str().unwrap()) } else { - let errmsg = format!("heap profile {} not found", name); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); + read_file(path.to_str().unwrap()) } - } else { - dump_one_heap_profile() }; match result { @@ -692,7 +595,6 @@ where let security_config = self.security_config.clone(); let cfg_controller = self.cfg_controller.clone(); let router = self.router.clone(); - let store_path = self.store_path.clone(); let resource_manager = self.resource_manager.clone(); let grpc_service_mgr = self.grpc_service_mgr.clone(); // Start to serve. @@ -701,7 +603,6 @@ where let security_config = security_config.clone(); let cfg_controller = cfg_controller.clone(); let router = router.clone(); - let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); let grpc_service_mgr = grpc_service_mgr.clone(); async move { @@ -711,7 +612,6 @@ where let security_config = security_config.clone(); let cfg_controller = cfg_controller.clone(); let router = router.clone(); - let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); let grpc_service_mgr = grpc_service_mgr.clone(); async move { @@ -749,12 +649,23 @@ where Self::handle_get_metrics(req, &cfg_controller) } (Method::GET, "/status") => Ok(Response::default()), - (Method::GET, "/debug/pprof/heap_list") => Self::list_heap_prof(req), + (Method::GET, "/debug/pprof/heap_list") => { + Ok(make_response( + StatusCode::GONE, + "Deprecated, heap profiling is always enabled by default, just use /debug/pprof/heap to get the heap profile when needed", + )) + } (Method::GET, "/debug/pprof/heap_activate") => { - Self::activate_heap_prof(req, store_path).await + Ok(make_response( + StatusCode::GONE, + "Deprecated, use config `memory.enable_heap_profiling` to toggle", + )) } (Method::GET, "/debug/pprof/heap_deactivate") => { - Self::deactivate_heap_prof(req) + Ok(make_response( + StatusCode::GONE, + "Deprecated, use config `memory.enable_heap_profiling` to toggle", + )) } (Method::GET, "/debug/pprof/heap") => { Self::dump_heap_prof_to_resp(req).await @@ -1221,13 +1132,11 @@ mod tests { #[test] fn test_status_service() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1271,13 +1180,11 @@ mod tests { #[test] fn test_config_endpoint() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1330,7 +1237,6 @@ mod tests { ConfigController::new(config), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1388,13 +1294,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1506,13 +1410,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints_can_trigger_fails() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1552,13 +1454,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints_should_give_404_when_failpoints_are_disable() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1590,13 +1490,11 @@ mod tests { } fn do_test_security_status_service(allowed_cn: HashSet, expected: bool) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(new_security_cfg(Some(allowed_cn))), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1665,13 +1563,11 @@ mod tests { #[test] #[ignore] fn test_pprof_heap_service() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1697,13 +1593,11 @@ mod tests { #[test] fn test_pprof_profile_service() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1732,13 +1626,11 @@ mod tests { #[test] fn test_pprof_symbol_service() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1785,13 +1677,11 @@ mod tests { #[test] fn test_metrics() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1842,13 +1732,11 @@ mod tests { #[test] fn test_change_log_level() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1898,13 +1786,11 @@ mod tests { let cfgs = [TikvConfig::default(), multi_rocks_cfg]; let resp_strs = ["raft-kv", "partitioned-raft-kv"]; for (cfg, resp_str) in IntoIterator::into_iter(cfgs).zip(resp_strs) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::new(cfg), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1937,13 +1823,11 @@ mod tests { multi_rocks_cfg.storage.engine = EngineType::RaftKv2; let cfgs = [TikvConfig::default(), multi_rocks_cfg]; for cfg in IntoIterator::into_iter(cfgs) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::new(cfg), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index dbf819b35fe0..7d7e90741e44 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -1,43 +1,33 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - fs::{File, Metadata}, + fs::File, io::{Read, Write}, path::PathBuf, pin::Pin, process::{Command, Stdio}, sync::Mutex, - time::{Duration, UNIX_EPOCH}, }; -use chrono::{offset::Local, DateTime}; use futures::{ - channel::oneshot::{self, Sender}, future::BoxFuture, - select, task::{Context, Poll}, - Future, FutureExt, Stream, StreamExt, + Future, FutureExt, }; use lazy_static::lazy_static; use pprof::protos::Message; use regex::Regex; -use tempfile::{NamedTempFile, TempDir}; +use tempfile::NamedTempFile; #[cfg(not(test))] -use tikv_alloc::{activate_prof, deactivate_prof, dump_prof}; +use tikv_alloc::dump_prof; #[cfg(test)] -pub use self::test_utils::TEST_PROFILE_MUTEX; +use self::test_utils::dump_prof; #[cfg(test)] -use self::test_utils::{activate_prof, deactivate_prof, dump_prof}; - -// File name suffix for periodically dumped heap profiles. -pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; -pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; +pub use self::test_utils::TEST_PROFILE_MUTEX; lazy_static! { // If it's some it means there are already a CPU profiling. static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); - // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. - pub static ref HEAP_PROFILE_ACTIVE: Mutex, TempDir)>>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -93,91 +83,11 @@ impl Future for ProfileRunner { } /// Trigger a heap profile and return the content. -pub fn dump_one_heap_profile() -> Result, String> { - if HEAP_PROFILE_ACTIVE.lock().unwrap().is_none() { - return Err("heap profiling is not activated".to_owned()); - } +pub fn dump_one_heap_profile() -> Result { let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; - let path = f.path().to_str().unwrap(); - dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; - read_file(path) -} - -/// Activate heap profile and call `callback` if successfully. -/// `deactivate_heap_profile` can only be called after it's notified from -/// `callback`. -pub async fn activate_heap_profile( - dump_period: Option, - store_path: PathBuf, - callback: F, -) -> Result<(), String> -where - S: Stream> + Send + Unpin + 'static, - F: FnOnce() + Send + 'static, -{ - if HEAP_PROFILE_ACTIVE.lock().unwrap().is_some() { - return Err("Already in Heap Profiling".to_owned()); - } - - let (tx, rx) = oneshot::channel(); - let dir = tempfile::Builder::new() - .prefix("heap-") - .tempdir_in(store_path) - .map_err(|e| format!("create temp directory: {}", e))?; - let dir_path = dir.path().to_str().unwrap().to_owned(); - - let on_start = move || { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - assert!(activate.is_none()); - *activate = Some(Some((tx, dir))); - activate_prof().map_err(|e| format!("activate_prof: {}", e))?; - callback(); - info!("periodical heap profiling is started"); - Ok(()) - }; - - let on_end = |_| { - let res = deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)); - *HEAP_PROFILE_ACTIVE.lock().unwrap() = None; - res - }; - - let end = async move { - if let Some(dump_period) = dump_period { - select! { - _ = rx.fuse() => { - info!("periodical heap profiling is canceled"); - Ok(()) - }, - res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { - warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); - res - } - } - } else { - let _ = rx.await; - info!("periodical heap profiling is canceled"); - Ok(()) - } - }; - - ProfileRunner::new(on_start, on_end, end.boxed())?.await -} - -/// Deactivate heap profile. Return `false` if it hasn't been activated. -pub fn deactivate_heap_profile() -> bool { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - match activate.as_mut() { - Some(tx) => { - if let Some((tx, _)) = tx.take() { - let _ = tx.send(()); - } else { - *activate = None; - } - true - } - None => false, - } + let path = f.path(); + dump_prof(path.to_str().unwrap()).map_err(|e| format!("dump_prof: {}", e))?; + Ok(path.to_owned()) } /// Trigger one cpu profile. @@ -274,57 +184,6 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { Ok(output.stdout) } -pub fn heap_profiles_dir() -> Option { - HEAP_PROFILE_ACTIVE - .lock() - .unwrap() - .as_ref() - .and_then(|v| v.as_ref().map(|(_, dir)| dir.path().to_owned())) -} - -pub fn list_heap_profiles() -> Result, String> { - let path = match heap_profiles_dir() { - Some(path) => path.into_os_string().into_string().unwrap(), - None => return Ok(vec![]), - }; - - let dir = std::fs::read_dir(path).map_err(|e| format!("read dir fail: {}", e))?; - let mut profiles = Vec::new(); - for item in dir { - let item = match item { - Ok(x) => x, - _ => continue, - }; - let f = item.file_name().to_str().unwrap().to_owned(); - if !f.ends_with(HEAP_PROFILE_SUFFIX) { - continue; - } - let ct = item.metadata().map(|x| last_change_epoch(&x)).unwrap(); - let dt = DateTime::::from(UNIX_EPOCH + Duration::from_secs(ct)); - profiles.push((f, dt.format("%Y-%m-%d %H:%M:%S").to_string())); - } - - // Reverse sort them. - profiles.sort_by(|x, y| y.1.cmp(&x.1)); - info!("list_heap_profiles gets {} items", profiles.len()); - Ok(profiles) -} - -async fn dump_heap_profile_periodically(mut period: S, dir: String) -> Result<(), String> -where - S: Stream> + Send + Unpin + 'static, -{ - let mut id = 0; - while let Some(res) = period.next().await { - res?; - id += 1; - let path = format!("{}/{:0>6}{}", dir, id, HEAP_PROFILE_SUFFIX); - dump_prof(&path).map_err(|e| format!("dump_prof: {}", e))?; - info!("a heap profile is dumped to {}", path); - } - Ok(()) -} - fn extract_thread_name(thread_name: &str) -> String { THREAD_NAME_RE .captures(thread_name) @@ -350,43 +209,18 @@ mod test_utils { pub static ref TEST_PROFILE_MUTEX: Mutex<()> = Mutex::new(()); } - pub fn activate_prof() -> ProfResult<()> { - Ok(()) - } - pub fn deactivate_prof() -> ProfResult<()> { - Ok(()) - } pub fn dump_prof(_: &str) -> ProfResult<()> { Ok(()) } } -#[cfg(unix)] -fn last_change_epoch(metadata: &Metadata) -> u64 { - use std::os::unix::fs::MetadataExt; - metadata.ctime() as u64 -} - -#[cfg(not(unix))] -fn last_change_epoch(metadata: &Metadata) -> u64 { - 0 -} - #[cfg(test)] mod tests { - use std::sync::mpsc::sync_channel; - - use futures::{channel::mpsc, executor::block_on, SinkExt}; + use futures::executor::block_on; use tokio::runtime; use super::*; - #[test] - fn test_last_change_epoch() { - let f = tempfile::tempfile().unwrap(); - assert!(last_change_epoch(&f.metadata().unwrap()) > 0); - } - #[test] fn test_extract_thread_name() { assert_eq!(&extract_thread_name("test-name-1"), "test-name"); @@ -424,86 +258,5 @@ mod tests { drop(tx1); block_on(res1).unwrap().unwrap_err(); - - let expected = "Already in Heap Profiling"; - - let (tx1, rx1) = mpsc::channel(1); - let res1 = rt.spawn(activate_heap_profile( - Some(rx1), - std::env::temp_dir(), - || {}, - )); - thread::sleep(Duration::from_millis(100)); - - let (_tx2, rx2) = mpsc::channel(1); - let res2 = rt.spawn(activate_heap_profile( - Some(rx2), - std::env::temp_dir(), - || {}, - )); - assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); - - drop(tx1); - block_on(res1).unwrap().unwrap(); - } - - #[test] - fn test_profile_guard_toggle() { - let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let rt = runtime::Builder::new_multi_thread() - .worker_threads(4) - .build() - .unwrap(); - - // Test activated profiling can be stopped by canceling the period stream. - let (tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); - drop(tx); - block_on(res).unwrap().unwrap(); - - // Test activated profiling can be stopped by the handle. - let (tx, rx) = sync_channel::(1); - let on_activated = move || drop(tx); - let check_activated = move || rx.recv().is_err(); - - let (_tx, _rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile( - Some(_rx), - std::env::temp_dir(), - on_activated, - )); - assert!(check_activated()); - assert!(deactivate_heap_profile()); - block_on(res).unwrap().unwrap(); - } - - #[test] - fn test_heap_profile_exit() { - let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let rt = runtime::Builder::new_multi_thread() - .worker_threads(4) - .build() - .unwrap(); - - // Test heap profiling can be stopped by sending an error. - let (mut tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); - block_on(tx.send(Err("test".to_string()))).unwrap(); - block_on(res).unwrap().unwrap_err(); - - // Test heap profiling can be activated again. - let (tx, rx) = sync_channel::(1); - let on_activated = move || drop(tx); - let check_activated = move || rx.recv().is_err(); - - let (_tx, _rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile( - Some(_rx), - std::env::temp_dir(), - on_activated, - )); - assert!(check_activated()); - assert!(deactivate_heap_profile()); - block_on(res).unwrap().unwrap(); } } diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 3e1fbd4a9e84..a2921f77b951 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -45,7 +45,6 @@ fn test_region_meta_endpoint() { ConfigController::default(), Arc::new(SecurityConfig::default()), router, - std::env::temp_dir(), None, GrpcServiceManager::dummy(), ) From eefbdcba61bd3847a302fafcade6bf5f05627c35 Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 11 Dec 2023 16:51:18 +0800 Subject: [PATCH 185/203] *: uniform deadline exceeded error in cop response (#16155) close tikv/tikv#16154 Signed-off-by: crazycs520 --- components/raftstore/src/errors.rs | 25 ++++++++++++++- components/tikv_util/src/deadline.rs | 9 ++++++ src/coprocessor/endpoint.rs | 35 ++++++++++++++++++--- src/storage/errors.rs | 8 ++--- tests/failpoints/cases/test_coprocessor.rs | 36 ++++++++++++++++------ 5 files changed, 93 insertions(+), 20 deletions(-) diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index d1597a77121e..49a52de26e17 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -7,7 +7,10 @@ use error_code::{self, ErrorCode, ErrorCodeExt}; use kvproto::{errorpb, metapb, raft_serverpb}; use protobuf::ProtobufError; use thiserror::Error; -use tikv_util::{codec, deadline::DeadlineError}; +use tikv_util::{ + codec, + deadline::{set_deadline_exceeded_busy_error, DeadlineError}, +}; use super::{coprocessor::Error as CopError, store::SnapError}; @@ -287,6 +290,9 @@ impl From for errorpb::Error { e.set_store_peer_id(store_peer_id); errorpb.set_mismatch_peer_id(e); } + Error::DeadlineExceeded => { + set_deadline_exceeded_busy_error(&mut errorpb); + } _ => {} }; @@ -350,3 +356,20 @@ impl ErrorCodeExt for Error { } } } + +#[cfg(test)] +mod tests { + use kvproto::errorpb; + + use crate::Error; + + #[test] + fn test_deadline_exceeded_error() { + let err: errorpb::Error = Error::DeadlineExceeded.into(); + assert_eq!( + err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!(err.get_message(), "Deadline is exceeded"); + } +} diff --git a/components/tikv_util/src/deadline.rs b/components/tikv_util/src/deadline.rs index 84463f507b91..64416999fe32 100644 --- a/components/tikv_util/src/deadline.rs +++ b/components/tikv_util/src/deadline.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use fail::fail_point; +use kvproto::errorpb; use super::time::{Duration, Instant}; @@ -58,3 +59,11 @@ impl Deadline { std::time::Instant::now() + self.deadline.duration_since(Instant::now_coarse()) } } + +const DEADLINE_EXCEEDED: &str = "deadline is exceeded"; + +pub fn set_deadline_exceeded_busy_error(e: &mut errorpb::Error) { + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason(DEADLINE_EXCEEDED.to_owned()); + e.set_server_is_busy(server_is_busy_err); +} diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 005a18938de1..001d1e94ca00 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -19,7 +19,9 @@ use resource_metering::{FutureExt, ResourceTagFactory, StreamExt}; use tidb_query_common::execute_stats::ExecSummary; use tikv_alloc::trace::MemoryTraceGuard; use tikv_kv::SnapshotExt; -use tikv_util::{quota_limiter::QuotaLimiter, time::Instant}; +use tikv_util::{ + deadline::set_deadline_exceeded_busy_error, quota_limiter::QuotaLimiter, time::Instant, +}; use tipb::{AnalyzeReq, AnalyzeType, ChecksumRequest, ChecksumScanOn, DagRequest, ExecType}; use tokio::sync::Semaphore; use txn_types::Lock; @@ -832,7 +834,10 @@ macro_rules! make_error_response_common { } Error::DeadlineExceeded => { $tag = "deadline_exceeded"; - $resp.set_other_error($e.to_string()); + let mut err = errorpb::Error::default(); + set_deadline_exceeded_busy_error(&mut err); + err.set_message($e.to_string()); + $resp.set_region_error(err); } Error::MaxPendingTasksExceeded => { $tag = "max_pending_tasks_exceeded"; @@ -1936,7 +1941,11 @@ mod tests { let resp = block_on(copr.handle_unary_request(config, handler_builder)).unwrap(); assert_eq!(resp.get_data().len(), 0); - assert!(!resp.get_other_error().is_empty()); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); } { @@ -1953,7 +1962,11 @@ mod tests { let resp = block_on(copr.handle_unary_request(config, handler_builder)).unwrap(); assert_eq!(resp.get_data().len(), 0); - assert!(!resp.get_other_error().is_empty()); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); } } @@ -2005,4 +2018,18 @@ mod tests { let resp = block_on(copr.parse_and_handle_unary_request(req, None)); assert_eq!(resp.get_locked().get_key(), b"key"); } + + #[test] + fn test_make_error_response() { + let resp = make_error_response(Error::DeadlineExceeded); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); + } } diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 0e7db9ffc96e..b603b9047085 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -12,7 +12,7 @@ use std::{ use error_code::{self, ErrorCode, ErrorCodeExt}; use kvproto::{errorpb, kvrpcpb, kvrpcpb::ApiVersion}; use thiserror::Error; -use tikv_util::deadline::DeadlineError; +use tikv_util::deadline::{set_deadline_exceeded_busy_error, DeadlineError}; use txn_types::{KvPair, TimeStamp}; use crate::storage::{ @@ -222,7 +222,6 @@ impl Display for ErrorHeaderKind { const SCHEDULER_IS_BUSY: &str = "scheduler is busy"; const GC_WORKER_IS_BUSY: &str = "gc worker is busy"; -const DEADLINE_EXCEEDED: &str = "deadline is exceeded"; /// Get the `ErrorHeaderKind` enum that corresponds to the error in the protobuf /// message. Returns `ErrorHeaderKind::Other` if no match found. @@ -319,9 +318,8 @@ pub fn extract_region_error_from_error(e: &Error) -> Option { } Error(box ErrorInner::DeadlineExceeded) => { let mut err = errorpb::Error::default(); - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(DEADLINE_EXCEEDED.to_owned()); - err.set_server_is_busy(server_is_busy_err); + err.set_message(e.to_string()); + set_deadline_exceeded_busy_error(&mut err); Some(err) } _ => None, diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 0710f778aa77..be9d978b23ae 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -31,8 +31,15 @@ fn test_deadline() { fail::cfg("deadline_check_fail", "return()").unwrap(); let resp = handle_request(&endpoint, req); - - assert!(resp.get_other_error().contains("exceeding the deadline")); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); } #[test] @@ -46,8 +53,15 @@ fn test_deadline_2() { fail::cfg("rockskv_async_snapshot", "panic").unwrap(); fail::cfg("deadline_check_fail", "return()").unwrap(); let resp = handle_request(&endpoint, req); - - assert!(resp.get_other_error().contains("exceeding the deadline")); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); } /// Test deadline exceeded when request is handling @@ -80,12 +94,14 @@ fn test_deadline_3() { let mut resp = SelectResponse::default(); resp.merge_from_bytes(cop_resp.get_data()).unwrap(); - assert!( - cop_resp.other_error.contains("exceeding the deadline") - || resp - .get_error() - .get_msg() - .contains("exceeding the deadline") + let region_err = cop_resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" ); } From 99916c0b9e70583597024aab001d9a3c7825ed10 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Mon, 11 Dec 2023 18:39:18 +0800 Subject: [PATCH 186/203] coprocessor: rewrite `Upper` and `Lower` functions by porting the implementation from Go library (#16160) close tikv/tikv#16159 rewrite `Upper` and `Lower` functions by porting the implementation from Go library Signed-off-by: gengliqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/collation/encoding/gbk.rs | 60 +- .../src/codec/collation/encoding/mod.rs | 2 + .../collation/encoding/unicode_letter.rs | 550 ++++++++++++++++++ .../src/codec/collation/mod.rs | 4 +- components/tidb_query_expr/src/impl_string.rs | 46 +- 5 files changed, 619 insertions(+), 43 deletions(-) create mode 100644 components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs index 6f27475ff2c2..137d9dd22c36 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs @@ -28,45 +28,39 @@ impl Encoding for EncodingGbk { #[inline] // GBK lower and upper follows https://dev.mysql.com/worklog/task/?id=4583. fn lower(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(|ch| { - let c = ch as u32; - match c { - 0x216A..=0x216B => char::from_u32(c), - _ => char::from_u32(c).unwrap().to_lowercase().next(), - } + let res = s.chars().flat_map(|ch| match ch as u32 { + 0x216A..=0x216B => Some(ch), + _ => unicode_to_lower(ch), }); writer.write_from_char_iter(res) } #[inline] fn upper(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(|ch| { - let c = ch as u32; - match c { - 0x00E0..=0x00E1 - | 0x00E8..=0x00EA - | 0x00EC..=0x00ED - | 0x00F2..=0x00F3 - | 0x00F9..=0x00FA - | 0x00FC - | 0x0101 - | 0x0113 - | 0x011B - | 0x012B - | 0x0144 - | 0x0148 - | 0x014D - | 0x016B - | 0x01CE - | 0x01D0 - | 0x01D2 - | 0x01D4 - | 0x01D6 - | 0x01D8 - | 0x01DA - | 0x01DC => char::from_u32(c), - _ => char::from_u32(c).unwrap().to_uppercase().next(), - } + let res = s.chars().flat_map(|ch| match ch as u32 { + 0x00E0..=0x00E1 + | 0x00E8..=0x00EA + | 0x00EC..=0x00ED + | 0x00F2..=0x00F3 + | 0x00F9..=0x00FA + | 0x00FC + | 0x0101 + | 0x0113 + | 0x011B + | 0x012B + | 0x0144 + | 0x0148 + | 0x014D + | 0x016B + | 0x01CE + | 0x01D0 + | 0x01D2 + | 0x01D4 + | 0x01D6 + | 0x01D8 + | 0x01DA + | 0x01DC => Some(ch), + _ => unicode_to_upper(ch), }); writer.write_from_char_iter(res) } diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs index b2434105ce5b..268b11aad41d 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs @@ -2,12 +2,14 @@ mod ascii; mod gbk; +mod unicode_letter; mod utf8; use std::str; pub use ascii::*; pub use gbk::*; +pub use unicode_letter::*; pub use utf8::*; use super::Encoding; diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs b/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs new file mode 100644 index 000000000000..e83af2723c53 --- /dev/null +++ b/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs @@ -0,0 +1,550 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +/// In order to keep the same behavoir as TiDB that uses go standard library to +/// implement lower and upper functions. Below code is ported from https://github.com/golang/go/blob/go1.21.3/src/unicode/letter.go. +const UPPER_CASE: usize = 0; +const LOWER_CASE: usize = 1; +const TITLE_CASE: usize = 2; +const MAX_CASE: usize = 3; + +const MAX_ASCII: i32 = 0x7F; +const MAX_RUNE: i32 = 0x10FFFF; +const REPLACEMENT_CHAR: i32 = 0xFFFD; + +const UPPER_LOWER: i32 = MAX_RUNE + 1; + +static CASE_TABLE: &[(i32, i32, [i32; MAX_CASE])] = &[ + (0x0041, 0x005A, [0, 32, 0]), + (0x0061, 0x007A, [-32, 0, -32]), + (0x00B5, 0x00B5, [743, 0, 743]), + (0x00C0, 0x00D6, [0, 32, 0]), + (0x00D8, 0x00DE, [0, 32, 0]), + (0x00E0, 0x00F6, [-32, 0, -32]), + (0x00F8, 0x00FE, [-32, 0, -32]), + (0x00FF, 0x00FF, [121, 0, 121]), + (0x0100, 0x012F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0130, 0x0130, [0, -199, 0]), + (0x0131, 0x0131, [-232, 0, -232]), + (0x0132, 0x0137, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0139, 0x0148, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x014A, 0x0177, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0178, 0x0178, [0, -121, 0]), + (0x0179, 0x017E, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x017F, 0x017F, [-300, 0, -300]), + (0x0180, 0x0180, [195, 0, 195]), + (0x0181, 0x0181, [0, 210, 0]), + (0x0182, 0x0185, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0186, 0x0186, [0, 206, 0]), + (0x0187, 0x0188, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0189, 0x018A, [0, 205, 0]), + (0x018B, 0x018C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x018E, 0x018E, [0, 79, 0]), + (0x018F, 0x018F, [0, 202, 0]), + (0x0190, 0x0190, [0, 203, 0]), + (0x0191, 0x0192, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0193, 0x0193, [0, 205, 0]), + (0x0194, 0x0194, [0, 207, 0]), + (0x0195, 0x0195, [97, 0, 97]), + (0x0196, 0x0196, [0, 211, 0]), + (0x0197, 0x0197, [0, 209, 0]), + (0x0198, 0x0199, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x019A, 0x019A, [163, 0, 163]), + (0x019C, 0x019C, [0, 211, 0]), + (0x019D, 0x019D, [0, 213, 0]), + (0x019E, 0x019E, [130, 0, 130]), + (0x019F, 0x019F, [0, 214, 0]), + (0x01A0, 0x01A5, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01A6, 0x01A6, [0, 218, 0]), + (0x01A7, 0x01A8, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01A9, 0x01A9, [0, 218, 0]), + (0x01AC, 0x01AD, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01AE, 0x01AE, [0, 218, 0]), + (0x01AF, 0x01B0, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01B1, 0x01B2, [0, 217, 0]), + (0x01B3, 0x01B6, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01B7, 0x01B7, [0, 219, 0]), + (0x01B8, 0x01B9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01BC, 0x01BD, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01BF, 0x01BF, [56, 0, 56]), + (0x01C4, 0x01C4, [0, 2, 1]), + (0x01C5, 0x01C5, [-1, 1, 0]), + (0x01C6, 0x01C6, [-2, 0, -1]), + (0x01C7, 0x01C7, [0, 2, 1]), + (0x01C8, 0x01C8, [-1, 1, 0]), + (0x01C9, 0x01C9, [-2, 0, -1]), + (0x01CA, 0x01CA, [0, 2, 1]), + (0x01CB, 0x01CB, [-1, 1, 0]), + (0x01CC, 0x01CC, [-2, 0, -1]), + (0x01CD, 0x01DC, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01DD, 0x01DD, [-79, 0, -79]), + (0x01DE, 0x01EF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01F1, 0x01F1, [0, 2, 1]), + (0x01F2, 0x01F2, [-1, 1, 0]), + (0x01F3, 0x01F3, [-2, 0, -1]), + (0x01F4, 0x01F5, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01F6, 0x01F6, [0, -97, 0]), + (0x01F7, 0x01F7, [0, -56, 0]), + (0x01F8, 0x021F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0220, 0x0220, [0, -130, 0]), + (0x0222, 0x0233, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x023A, 0x023A, [0, 10795, 0]), + (0x023B, 0x023C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x023D, 0x023D, [0, -163, 0]), + (0x023E, 0x023E, [0, 10792, 0]), + (0x023F, 0x0240, [10815, 0, 10815]), + (0x0241, 0x0242, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0243, 0x0243, [0, -195, 0]), + (0x0244, 0x0244, [0, 69, 0]), + (0x0245, 0x0245, [0, 71, 0]), + (0x0246, 0x024F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0250, 0x0250, [10783, 0, 10783]), + (0x0251, 0x0251, [10780, 0, 10780]), + (0x0252, 0x0252, [10782, 0, 10782]), + (0x0253, 0x0253, [-210, 0, -210]), + (0x0254, 0x0254, [-206, 0, -206]), + (0x0256, 0x0257, [-205, 0, -205]), + (0x0259, 0x0259, [-202, 0, -202]), + (0x025B, 0x025B, [-203, 0, -203]), + (0x025C, 0x025C, [42319, 0, 42319]), + (0x0260, 0x0260, [-205, 0, -205]), + (0x0261, 0x0261, [42315, 0, 42315]), + (0x0263, 0x0263, [-207, 0, -207]), + (0x0265, 0x0265, [42280, 0, 42280]), + (0x0266, 0x0266, [42308, 0, 42308]), + (0x0268, 0x0268, [-209, 0, -209]), + (0x0269, 0x0269, [-211, 0, -211]), + (0x026A, 0x026A, [42308, 0, 42308]), + (0x026B, 0x026B, [10743, 0, 10743]), + (0x026C, 0x026C, [42305, 0, 42305]), + (0x026F, 0x026F, [-211, 0, -211]), + (0x0271, 0x0271, [10749, 0, 10749]), + (0x0272, 0x0272, [-213, 0, -213]), + (0x0275, 0x0275, [-214, 0, -214]), + (0x027D, 0x027D, [10727, 0, 10727]), + (0x0280, 0x0280, [-218, 0, -218]), + (0x0282, 0x0282, [42307, 0, 42307]), + (0x0283, 0x0283, [-218, 0, -218]), + (0x0287, 0x0287, [42282, 0, 42282]), + (0x0288, 0x0288, [-218, 0, -218]), + (0x0289, 0x0289, [-69, 0, -69]), + (0x028A, 0x028B, [-217, 0, -217]), + (0x028C, 0x028C, [-71, 0, -71]), + (0x0292, 0x0292, [-219, 0, -219]), + (0x029D, 0x029D, [42261, 0, 42261]), + (0x029E, 0x029E, [42258, 0, 42258]), + (0x0345, 0x0345, [84, 0, 84]), + (0x0370, 0x0373, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0376, 0x0377, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x037B, 0x037D, [130, 0, 130]), + (0x037F, 0x037F, [0, 116, 0]), + (0x0386, 0x0386, [0, 38, 0]), + (0x0388, 0x038A, [0, 37, 0]), + (0x038C, 0x038C, [0, 64, 0]), + (0x038E, 0x038F, [0, 63, 0]), + (0x0391, 0x03A1, [0, 32, 0]), + (0x03A3, 0x03AB, [0, 32, 0]), + (0x03AC, 0x03AC, [-38, 0, -38]), + (0x03AD, 0x03AF, [-37, 0, -37]), + (0x03B1, 0x03C1, [-32, 0, -32]), + (0x03C2, 0x03C2, [-31, 0, -31]), + (0x03C3, 0x03CB, [-32, 0, -32]), + (0x03CC, 0x03CC, [-64, 0, -64]), + (0x03CD, 0x03CE, [-63, 0, -63]), + (0x03CF, 0x03CF, [0, 8, 0]), + (0x03D0, 0x03D0, [-62, 0, -62]), + (0x03D1, 0x03D1, [-57, 0, -57]), + (0x03D5, 0x03D5, [-47, 0, -47]), + (0x03D6, 0x03D6, [-54, 0, -54]), + (0x03D7, 0x03D7, [-8, 0, -8]), + (0x03D8, 0x03EF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03F0, 0x03F0, [-86, 0, -86]), + (0x03F1, 0x03F1, [-80, 0, -80]), + (0x03F2, 0x03F2, [7, 0, 7]), + (0x03F3, 0x03F3, [-116, 0, -116]), + (0x03F4, 0x03F4, [0, -60, 0]), + (0x03F5, 0x03F5, [-96, 0, -96]), + (0x03F7, 0x03F8, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03F9, 0x03F9, [0, -7, 0]), + (0x03FA, 0x03FB, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03FD, 0x03FF, [0, -130, 0]), + (0x0400, 0x040F, [0, 80, 0]), + (0x0410, 0x042F, [0, 32, 0]), + (0x0430, 0x044F, [-32, 0, -32]), + (0x0450, 0x045F, [-80, 0, -80]), + (0x0460, 0x0481, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x048A, 0x04BF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x04C0, 0x04C0, [0, 15, 0]), + (0x04C1, 0x04CE, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x04CF, 0x04CF, [-15, 0, -15]), + (0x04D0, 0x052F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0531, 0x0556, [0, 48, 0]), + (0x0561, 0x0586, [-48, 0, -48]), + (0x10A0, 0x10C5, [0, 7264, 0]), + (0x10C7, 0x10C7, [0, 7264, 0]), + (0x10CD, 0x10CD, [0, 7264, 0]), + (0x10D0, 0x10FA, [3008, 0, 0]), + (0x10FD, 0x10FF, [3008, 0, 0]), + (0x13A0, 0x13EF, [0, 38864, 0]), + (0x13F0, 0x13F5, [0, 8, 0]), + (0x13F8, 0x13FD, [-8, 0, -8]), + (0x1C80, 0x1C80, [-6254, 0, -6254]), + (0x1C81, 0x1C81, [-6253, 0, -6253]), + (0x1C82, 0x1C82, [-6244, 0, -6244]), + (0x1C83, 0x1C84, [-6242, 0, -6242]), + (0x1C85, 0x1C85, [-6243, 0, -6243]), + (0x1C86, 0x1C86, [-6236, 0, -6236]), + (0x1C87, 0x1C87, [-6181, 0, -6181]), + (0x1C88, 0x1C88, [35266, 0, 35266]), + (0x1C90, 0x1CBA, [0, -3008, 0]), + (0x1CBD, 0x1CBF, [0, -3008, 0]), + (0x1D79, 0x1D79, [35332, 0, 35332]), + (0x1D7D, 0x1D7D, [3814, 0, 3814]), + (0x1D8E, 0x1D8E, [35384, 0, 35384]), + (0x1E00, 0x1E95, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x1E9B, 0x1E9B, [-59, 0, -59]), + (0x1E9E, 0x1E9E, [0, -7615, 0]), + (0x1EA0, 0x1EFF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x1F00, 0x1F07, [8, 0, 8]), + (0x1F08, 0x1F0F, [0, -8, 0]), + (0x1F10, 0x1F15, [8, 0, 8]), + (0x1F18, 0x1F1D, [0, -8, 0]), + (0x1F20, 0x1F27, [8, 0, 8]), + (0x1F28, 0x1F2F, [0, -8, 0]), + (0x1F30, 0x1F37, [8, 0, 8]), + (0x1F38, 0x1F3F, [0, -8, 0]), + (0x1F40, 0x1F45, [8, 0, 8]), + (0x1F48, 0x1F4D, [0, -8, 0]), + (0x1F51, 0x1F51, [8, 0, 8]), + (0x1F53, 0x1F53, [8, 0, 8]), + (0x1F55, 0x1F55, [8, 0, 8]), + (0x1F57, 0x1F57, [8, 0, 8]), + (0x1F59, 0x1F59, [0, -8, 0]), + (0x1F5B, 0x1F5B, [0, -8, 0]), + (0x1F5D, 0x1F5D, [0, -8, 0]), + (0x1F5F, 0x1F5F, [0, -8, 0]), + (0x1F60, 0x1F67, [8, 0, 8]), + (0x1F68, 0x1F6F, [0, -8, 0]), + (0x1F70, 0x1F71, [74, 0, 74]), + (0x1F72, 0x1F75, [86, 0, 86]), + (0x1F76, 0x1F77, [100, 0, 100]), + (0x1F78, 0x1F79, [128, 0, 128]), + (0x1F7A, 0x1F7B, [112, 0, 112]), + (0x1F7C, 0x1F7D, [126, 0, 126]), + (0x1F80, 0x1F87, [8, 0, 8]), + (0x1F88, 0x1F8F, [0, -8, 0]), + (0x1F90, 0x1F97, [8, 0, 8]), + (0x1F98, 0x1F9F, [0, -8, 0]), + (0x1FA0, 0x1FA7, [8, 0, 8]), + (0x1FA8, 0x1FAF, [0, -8, 0]), + (0x1FB0, 0x1FB1, [8, 0, 8]), + (0x1FB3, 0x1FB3, [9, 0, 9]), + (0x1FB8, 0x1FB9, [0, -8, 0]), + (0x1FBA, 0x1FBB, [0, -74, 0]), + (0x1FBC, 0x1FBC, [0, -9, 0]), + (0x1FBE, 0x1FBE, [-7205, 0, -7205]), + (0x1FC3, 0x1FC3, [9, 0, 9]), + (0x1FC8, 0x1FCB, [0, -86, 0]), + (0x1FCC, 0x1FCC, [0, -9, 0]), + (0x1FD0, 0x1FD1, [8, 0, 8]), + (0x1FD8, 0x1FD9, [0, -8, 0]), + (0x1FDA, 0x1FDB, [0, -100, 0]), + (0x1FE0, 0x1FE1, [8, 0, 8]), + (0x1FE5, 0x1FE5, [7, 0, 7]), + (0x1FE8, 0x1FE9, [0, -8, 0]), + (0x1FEA, 0x1FEB, [0, -112, 0]), + (0x1FEC, 0x1FEC, [0, -7, 0]), + (0x1FF3, 0x1FF3, [9, 0, 9]), + (0x1FF8, 0x1FF9, [0, -128, 0]), + (0x1FFA, 0x1FFB, [0, -126, 0]), + (0x1FFC, 0x1FFC, [0, -9, 0]), + (0x2126, 0x2126, [0, -7517, 0]), + (0x212A, 0x212A, [0, -8383, 0]), + (0x212B, 0x212B, [0, -8262, 0]), + (0x2132, 0x2132, [0, 28, 0]), + (0x214E, 0x214E, [-28, 0, -28]), + (0x2160, 0x216F, [0, 16, 0]), + (0x2170, 0x217F, [-16, 0, -16]), + (0x2183, 0x2184, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x24B6, 0x24CF, [0, 26, 0]), + (0x24D0, 0x24E9, [-26, 0, -26]), + (0x2C00, 0x2C2F, [0, 48, 0]), + (0x2C30, 0x2C5F, [-48, 0, -48]), + (0x2C60, 0x2C61, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C62, 0x2C62, [0, -10743, 0]), + (0x2C63, 0x2C63, [0, -3814, 0]), + (0x2C64, 0x2C64, [0, -10727, 0]), + (0x2C65, 0x2C65, [-10795, 0, -10795]), + (0x2C66, 0x2C66, [-10792, 0, -10792]), + (0x2C67, 0x2C6C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C6D, 0x2C6D, [0, -10780, 0]), + (0x2C6E, 0x2C6E, [0, -10749, 0]), + (0x2C6F, 0x2C6F, [0, -10783, 0]), + (0x2C70, 0x2C70, [0, -10782, 0]), + (0x2C72, 0x2C73, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C75, 0x2C76, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C7E, 0x2C7F, [0, -10815, 0]), + (0x2C80, 0x2CE3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2CEB, 0x2CEE, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2CF2, 0x2CF3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2D00, 0x2D25, [-7264, 0, -7264]), + (0x2D27, 0x2D27, [-7264, 0, -7264]), + (0x2D2D, 0x2D2D, [-7264, 0, -7264]), + (0xA640, 0xA66D, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA680, 0xA69B, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA722, 0xA72F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA732, 0xA76F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA779, 0xA77C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA77D, 0xA77D, [0, -35332, 0]), + (0xA77E, 0xA787, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA78B, 0xA78C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA78D, 0xA78D, [0, -42280, 0]), + (0xA790, 0xA793, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA794, 0xA794, [48, 0, 48]), + (0xA796, 0xA7A9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7AA, 0xA7AA, [0, -42308, 0]), + (0xA7AB, 0xA7AB, [0, -42319, 0]), + (0xA7AC, 0xA7AC, [0, -42315, 0]), + (0xA7AD, 0xA7AD, [0, -42305, 0]), + (0xA7AE, 0xA7AE, [0, -42308, 0]), + (0xA7B0, 0xA7B0, [0, -42258, 0]), + (0xA7B1, 0xA7B1, [0, -42282, 0]), + (0xA7B2, 0xA7B2, [0, -42261, 0]), + (0xA7B3, 0xA7B3, [0, 928, 0]), + (0xA7B4, 0xA7C3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7C4, 0xA7C4, [0, -48, 0]), + (0xA7C5, 0xA7C5, [0, -42307, 0]), + (0xA7C6, 0xA7C6, [0, -35384, 0]), + (0xA7C7, 0xA7CA, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7D0, 0xA7D1, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7D6, 0xA7D9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7F5, 0xA7F6, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xAB53, 0xAB53, [-928, 0, -928]), + (0xAB70, 0xABBF, [-38864, 0, -38864]), + (0xFF21, 0xFF3A, [0, 32, 0]), + (0xFF41, 0xFF5A, [-32, 0, -32]), + (0x10400, 0x10427, [0, 40, 0]), + (0x10428, 0x1044F, [-40, 0, -40]), + (0x104B0, 0x104D3, [0, 40, 0]), + (0x104D8, 0x104FB, [-40, 0, -40]), + (0x10570, 0x1057A, [0, 39, 0]), + (0x1057C, 0x1058A, [0, 39, 0]), + (0x1058C, 0x10592, [0, 39, 0]), + (0x10594, 0x10595, [0, 39, 0]), + (0x10597, 0x105A1, [-39, 0, -39]), + (0x105A3, 0x105B1, [-39, 0, -39]), + (0x105B3, 0x105B9, [-39, 0, -39]), + (0x105BB, 0x105BC, [-39, 0, -39]), + (0x10C80, 0x10CB2, [0, 64, 0]), + (0x10CC0, 0x10CF2, [-64, 0, -64]), + (0x118A0, 0x118BF, [0, 32, 0]), + (0x118C0, 0x118DF, [-32, 0, -32]), + (0x16E40, 0x16E5F, [0, 32, 0]), + (0x16E60, 0x16E7F, [-32, 0, -32]), + (0x1E900, 0x1E921, [0, 34, 0]), + (0x1E922, 0x1E943, [-34, 0, -34]), +]; + +fn to_case(case: usize, ch: i32) -> i32 { + if case >= MAX_CASE { + return REPLACEMENT_CHAR; + } + // binary search over ranges + let mut lo = 0; + let mut hi = CASE_TABLE.len(); + while lo < hi { + let m = lo + (hi - lo) / 2; + let cr = CASE_TABLE[m]; + if cr.0 <= ch && ch <= cr.1 { + let delta = cr.2[case]; + if delta > MAX_RUNE { + // In an Upper-Lower sequence, which always starts with + // an UpperCase letter, the real deltas always look like: + // {0, 1, 0} UpperCase (Lower is next) + // {-1, 0, -1} LowerCase (Upper, Title are previous) + // The characters at even offsets from the beginning of the + // sequence are upper case; the ones at odd offsets are lower. + // The correct mapping can be done by clearing or setting the low + // bit in the sequence offset. + // The constants UpperCase and TitleCase are even while LowerCase + // is odd so we take the low bit from case. + return cr.0 + (((ch - cr.0) & !1) | (case as i32 & 1)); + } + return ch + delta; + } + if ch < cr.0 { + hi = m; + } else { + lo = m + 1; + } + } + ch +} + +pub fn unicode_to_upper(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'a' as i32 <= r && r <= 'z' as i32 { + r -= ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(UPPER_CASE, r) as u32) + } +} + +pub fn unicode_to_lower(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'A' as i32 <= r && r <= 'Z' as i32 { + r += ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(LOWER_CASE, r) as u32) + } +} + +pub fn unicode_to_title(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'a' as i32 <= r && r <= 'z' as i32 { + r -= ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(TITLE_CASE, r) as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + static CASE_TEST: &[(usize, u32, u32)] = &[ + // ASCII (special-cased so test carefully) + (UPPER_CASE, '\n' as u32, '\n' as u32), + (UPPER_CASE, 'a' as u32, 'A' as u32), + (UPPER_CASE, 'A' as u32, 'A' as u32), + (UPPER_CASE, '7' as u32, '7' as u32), + (LOWER_CASE, '\n' as u32, '\n' as u32), + (LOWER_CASE, 'a' as u32, 'a' as u32), + (LOWER_CASE, 'A' as u32, 'a' as u32), + (LOWER_CASE, '7' as u32, '7' as u32), + (TITLE_CASE, '\n' as u32, '\n' as u32), + (TITLE_CASE, 'a' as u32, 'A' as u32), + (TITLE_CASE, 'A' as u32, 'A' as u32), + (TITLE_CASE, '7' as u32, '7' as u32), + // Latin-1: easy to read the tests! + (UPPER_CASE, 0x80, 0x80), + (UPPER_CASE, 'Å' as u32, 'Å' as u32), + (UPPER_CASE, 'å' as u32, 'Å' as u32), + (LOWER_CASE, 0x80, 0x80), + (LOWER_CASE, 'Å' as u32, 'å' as u32), + (LOWER_CASE, 'å' as u32, 'å' as u32), + (TITLE_CASE, 0x80, 0x80), + (TITLE_CASE, 'Å' as u32, 'Å' as u32), + (TITLE_CASE, 'å' as u32, 'Å' as u32), + // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049 + (UPPER_CASE, 0x0130, 'İ' as u32), + (LOWER_CASE, 0x0130, 'i' as u32), + (UPPER_CASE, 0x0131, 'I' as u32), + (LOWER_CASE, 0x0131, 0x0131), + (TITLE_CASE, 0x0131, 'I' as u32), + // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L; 0069 006A;;;;N;LATIN SMALL LETTER I + // J;;0132;;0132 + (UPPER_CASE, 0x0133, 0x0132), + (LOWER_CASE, 0x0133, 0x0133), + (TITLE_CASE, 0x0133, 0x0132), + // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B; + (UPPER_CASE, 0x212A, 0x212A), + (LOWER_CASE, 0x212A, 'k' as u32), + (TITLE_CASE, 0x212A, 0x212A), + // From an UpperLower sequence + // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641; + (UPPER_CASE, 0xA640, 0xA640), + (LOWER_CASE, 0xA640, 0xA641), + (TITLE_CASE, 0xA640, 0xA640), + // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640 + (UPPER_CASE, 0xA641, 0xA640), + (LOWER_CASE, 0xA641, 0xA641), + (TITLE_CASE, 0xA641, 0xA640), + // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F; + (UPPER_CASE, 0xA64E, 0xA64E), + (LOWER_CASE, 0xA64E, 0xA64F), + (TITLE_CASE, 0xA64E, 0xA64E), + // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E + (UPPER_CASE, 0xA65F, 0xA65E), + (LOWER_CASE, 0xA65F, 0xA65F), + (TITLE_CASE, 0xA65F, 0xA65E), + // From another UpperLower sequence + // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L + // ACUTE;;;013A; + (UPPER_CASE, 0x0139, 0x0139), + (LOWER_CASE, 0x0139, 0x013A), + (TITLE_CASE, 0x0139, 0x0139), + // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L; 004C 00B7;;;;N;;;;0140; + (UPPER_CASE, 0x013f, 0x013f), + (LOWER_CASE, 0x013f, 0x0140), + (TITLE_CASE, 0x013f, 0x013f), + // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N + // HACEK;;0147;;0147 + (UPPER_CASE, 0x0148, 0x0147), + (LOWER_CASE, 0x0148, 0x0148), + (TITLE_CASE, 0x0148, 0x0147), + // Lowercase lower than uppercase. + // AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8 + (UPPER_CASE, 0xab78, 0x13a8), + (LOWER_CASE, 0xab78, 0xab78), + (TITLE_CASE, 0xab78, 0x13a8), + (UPPER_CASE, 0x13a8, 0x13a8), + (LOWER_CASE, 0x13a8, 0xab78), + (TITLE_CASE, 0x13a8, 0x13a8), + // Last block in the 5.1.0 table + // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428; + (UPPER_CASE, 0x10400, 0x10400), + (LOWER_CASE, 0x10400, 0x10428), + (TITLE_CASE, 0x10400, 0x10400), + // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F; + (UPPER_CASE, 0x10427, 0x10427), + (LOWER_CASE, 0x10427, 0x1044F), + (TITLE_CASE, 0x10427, 0x10427), + // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400 + (UPPER_CASE, 0x10428, 0x10400), + (LOWER_CASE, 0x10428, 0x10428), + (TITLE_CASE, 0x10428, 0x10400), + // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427 + (UPPER_CASE, 0x1044F, 0x10427), + (LOWER_CASE, 0x1044F, 0x1044F), + (TITLE_CASE, 0x1044F, 0x10427), + // First one not in the 5.1.0 table + // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;; + (UPPER_CASE, 0x10450, 0x10450), + (LOWER_CASE, 0x10450, 0x10450), + (TITLE_CASE, 0x10450, 0x10450), + // Non-letters with case. + (LOWER_CASE, 0x2161, 0x2171), + (UPPER_CASE, 0x0345, 0x0399), + ]; + + #[test] + fn test_case() { + for &(case, input, output) in CASE_TEST { + if case == UPPER_CASE { + assert_eq!( + unicode_to_upper(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } else if case == LOWER_CASE { + assert_eq!( + unicode_to_lower(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } else { + assert_eq!( + unicode_to_title(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } + } + } +} diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 22127e62f49f..93cf0c8ca556 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -141,13 +141,13 @@ pub trait Encoding { #[inline] fn lower(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(char::to_lowercase); + let res = s.chars().flat_map(|ch| encoding::unicode_to_lower(ch)); writer.write_from_char_iter(res) } #[inline] fn upper(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(char::to_uppercase); + let res = s.chars().flat_map(|ch| encoding::unicode_to_upper(ch)); writer.write_from_char_iter(res) } } diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index fb330f91e03b..c86e8d22ccb3 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -2860,6 +2860,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("ßßåı".as_bytes().to_vec()), + Some("ßßÅI".as_bytes().to_vec()), + ), (None, None), ]; @@ -2920,11 +2924,32 @@ mod tests { #[test] fn test_gbk_lower_upper() { // Test GBK string case - let sig = vec![ScalarFuncSig::Lower, ScalarFuncSig::Upper]; - for s in sig { - let output = RpnFnScalarEvaluator::new() + let cases = vec![ + ( + ScalarFuncSig::LowerUtf8, + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::UpperUtf8, + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::LowerUtf8, + "İİIIÅI".as_bytes().to_vec(), + "iiiiåi".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::UpperUtf8, + "ßßåı".as_bytes().to_vec(), + "ßßÅI".as_bytes().to_vec(), + ), + ]; + for (s, input, output) in cases { + let result = RpnFnScalarEvaluator::new() .push_param_with_field_type( - Some("àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec()).clone(), + Some(input).clone(), FieldTypeBuilder::new() .tp(FieldTypeTp::VarString) .charset(CHARSET_GBK) @@ -2932,10 +2957,7 @@ mod tests { ) .evaluate(s) .unwrap(); - assert_eq!( - output, - Some("àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec()) - ); + assert_eq!(result, Some(output),); } } @@ -2960,6 +2982,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("İİIIÅI".as_bytes().to_vec()), + Some("İİIIÅI".as_bytes().to_vec()), + ), (None, None), ]; @@ -3006,6 +3032,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("İİIIÅI".as_bytes().to_vec()), + Some("iiiiåi".as_bytes().to_vec()), + ), (None, None), ]; From 462a5972814433f005dff74f89a26e5307f2e3d4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:13:47 +0800 Subject: [PATCH 187/203] In-memory Engine: defining hybrid engine (#16114) ref tikv/tikv#16141 Introduce hybrid engine which implements KvEngine but without detailed implementation. Signed-off-by: SpadeA-Tang --- Cargo.lock | 10 ++ Cargo.toml | 4 +- components/engine_traits/src/lib.rs | 2 + components/engine_traits/src/memory_engine.rs | 17 +++ components/hybrid_engine/Cargo.toml | 13 ++ components/hybrid_engine/src/cf_names.rs | 15 +++ components/hybrid_engine/src/cf_options.rs | 21 +++ components/hybrid_engine/src/checkpoint.rs | 22 +++ components/hybrid_engine/src/compact.rs | 71 ++++++++++ components/hybrid_engine/src/db_options.rs | 21 +++ components/hybrid_engine/src/engine.rs | 120 +++++++++++++++++ .../hybrid_engine/src/engine_iterator.rs | 54 ++++++++ .../hybrid_engine/src/flow_control_factors.rs | 23 ++++ .../hybrid_engine/src/hybrid_metrics.rs | 25 ++++ components/hybrid_engine/src/import.rs | 17 +++ components/hybrid_engine/src/iterable.rs | 17 +++ components/hybrid_engine/src/lib.rs | 24 ++++ components/hybrid_engine/src/misc.rs | 127 ++++++++++++++++++ .../hybrid_engine/src/mvcc_properties.rs | 23 ++++ components/hybrid_engine/src/perf_context.rs | 20 +++ .../hybrid_engine/src/range_properties.rs | 60 +++++++++ components/hybrid_engine/src/snapshot.rs | 103 ++++++++++++++ components/hybrid_engine/src/sst.rs | 53 ++++++++ .../hybrid_engine/src/table_properties.rs | 21 +++ .../hybrid_engine/src/ttl_properties.rs | 21 +++ components/hybrid_engine/src/write_batch.rs | 101 ++++++++++++++ 26 files changed, 1004 insertions(+), 1 deletion(-) create mode 100644 components/engine_traits/src/memory_engine.rs create mode 100644 components/hybrid_engine/Cargo.toml create mode 100644 components/hybrid_engine/src/cf_names.rs create mode 100644 components/hybrid_engine/src/cf_options.rs create mode 100644 components/hybrid_engine/src/checkpoint.rs create mode 100644 components/hybrid_engine/src/compact.rs create mode 100644 components/hybrid_engine/src/db_options.rs create mode 100644 components/hybrid_engine/src/engine.rs create mode 100644 components/hybrid_engine/src/engine_iterator.rs create mode 100644 components/hybrid_engine/src/flow_control_factors.rs create mode 100644 components/hybrid_engine/src/hybrid_metrics.rs create mode 100644 components/hybrid_engine/src/import.rs create mode 100644 components/hybrid_engine/src/iterable.rs create mode 100644 components/hybrid_engine/src/lib.rs create mode 100644 components/hybrid_engine/src/misc.rs create mode 100644 components/hybrid_engine/src/mvcc_properties.rs create mode 100644 components/hybrid_engine/src/perf_context.rs create mode 100644 components/hybrid_engine/src/range_properties.rs create mode 100644 components/hybrid_engine/src/snapshot.rs create mode 100644 components/hybrid_engine/src/sst.rs create mode 100644 components/hybrid_engine/src/table_properties.rs create mode 100644 components/hybrid_engine/src/ttl_properties.rs create mode 100644 components/hybrid_engine/src/write_batch.rs diff --git a/Cargo.lock b/Cargo.lock index 147b42405bf2..ab5c5d1663ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2443,6 +2443,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hybrid_engine" +version = "0.0.1" +dependencies = [ + "engine_traits", + "tikv_util", + "txn_types", +] + [[package]] name = "hyper" version = "0.14.23" @@ -6240,6 +6249,7 @@ dependencies = [ "grpcio-health", "hex 0.4.2", "http", + "hybrid_engine", "hyper", "hyper-openssl", "hyper-tls", diff --git a/Cargo.toml b/Cargo.toml index fdc86fb5f15d..e66b7aee0fd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", cloud-aws = ["encryption_export/cloud-aws"] cloud-gcp = ["encryption_export/cloud-gcp"] cloud-azure = ["encryption_export/cloud-azure"] -testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] +testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport", "hybrid_engine/testexport"] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] @@ -90,6 +90,7 @@ grpcio = { workspace = true } grpcio-health = { workspace = true } hex = "0.4" http = "0" +hybrid_engine = { workspace = true } hyper = { version = "0.14", features = ["full"] } hyper-tls = "0.5" into_other = { workspace = true } @@ -319,6 +320,7 @@ encryption = { path = "components/encryption" } encryption_export = { path = "components/encryption/export" } engine_panic = { path = "components/engine_panic" } engine_rocks = { path = "components/engine_rocks" } +hybrid_engine = { path = "components/hybrid_engine" } engine_rocks_helper = { path = "components/engine_rocks_helper" } engine_test = { path = "components/engine_test", default-features = false } engine_traits = { path = "components/engine_traits" } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 9cf4c22dd829..537089945611 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -312,6 +312,8 @@ mod table_properties; pub use crate::table_properties::*; mod checkpoint; pub use crate::checkpoint::*; +mod memory_engine; +pub use memory_engine::RegionCacheEngine; // These modules contain more general traits, some of which may be implemented // by multiple types. diff --git a/components/engine_traits/src/memory_engine.rs b/components/engine_traits/src/memory_engine.rs new file mode 100644 index 000000000000..189c3bc0c28c --- /dev/null +++ b/components/engine_traits/src/memory_engine.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::fmt::Debug; + +use crate::{Iterable, Snapshot, WriteBatchExt}; + +/// RegionCacheEngine works as a region cache caching some regions (in Memory or +/// NVME for instance) to improve the read performance. +pub trait RegionCacheEngine: + WriteBatchExt + Iterable + Debug + Clone + Unpin + Send + Sync + 'static +{ + type Snapshot: Snapshot; + + // If None is returned, the RegionCacheEngine is currently not readable for this + // region or read_ts. + fn snapshot(&self, region_id: u64, read_ts: u64) -> Option; +} diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml new file mode 100644 index 000000000000..f38604a10c11 --- /dev/null +++ b/components/hybrid_engine/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "hybrid_engine" +version = "0.0.1" +edition = "2021" +publish = false + +[features] +testexport = [] + +[dependencies] +engine_traits = { workspace = true } +txn_types = { workspace = true } +tikv_util = { workspace = true } \ No newline at end of file diff --git a/components/hybrid_engine/src/cf_names.rs b/components/hybrid_engine/src/cf_names.rs new file mode 100644 index 000000000000..990fb4d0f763 --- /dev/null +++ b/components/hybrid_engine/src/cf_names.rs @@ -0,0 +1,15 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CfNamesExt, KvEngine, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl CfNamesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn cf_names(&self) -> Vec<&str> { + self.disk_engine().cf_names() + } +} diff --git a/components/hybrid_engine/src/cf_options.rs b/components/hybrid_engine/src/cf_options.rs new file mode 100644 index 000000000000..61fe08da536e --- /dev/null +++ b/components/hybrid_engine/src/cf_options.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CfOptionsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl CfOptionsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type CfOptions = EK::CfOptions; + + fn get_options_cf(&self, cf: &str) -> Result { + self.disk_engine().get_options_cf(cf) + } + + fn set_options_cf(&self, cf: &str, options: &[(&str, &str)]) -> Result<()> { + self.disk_engine().set_options_cf(cf, options) + } +} diff --git a/components/hybrid_engine/src/checkpoint.rs b/components/hybrid_engine/src/checkpoint.rs new file mode 100644 index 000000000000..7d9bdb022ea8 --- /dev/null +++ b/components/hybrid_engine/src/checkpoint.rs @@ -0,0 +1,22 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{Checkpointable, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl Checkpointable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Checkpointer = EK::Checkpointer; + + fn new_checkpointer(&self) -> Result { + self.disk_engine().new_checkpointer() + } + + fn merge(&self, dbs: &[&Self]) -> Result<()> { + let disk_dbs: Vec<_> = dbs.iter().map(|&db| db.disk_engine()).collect(); + self.disk_engine().merge(&disk_dbs) + } +} diff --git a/components/hybrid_engine/src/compact.rs b/components/hybrid_engine/src/compact.rs new file mode 100644 index 000000000000..6afbba556b09 --- /dev/null +++ b/components/hybrid_engine/src/compact.rs @@ -0,0 +1,71 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CompactExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl CompactExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type CompactedEvent = EK::CompactedEvent; + + fn auto_compactions_is_disabled(&self) -> Result { + self.disk_engine().auto_compactions_is_disabled() + } + + fn compact_range_cf( + &self, + cf: &str, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + exclusive_manual: bool, + max_subcompactions: u32, + ) -> Result<()> { + self.disk_engine().compact_range_cf( + cf, + start_key, + end_key, + exclusive_manual, + max_subcompactions, + ) + } + + fn compact_files_in_range_cf( + &self, + cf: &str, + start: Option<&[u8]>, + end: Option<&[u8]>, + output_level: Option, + ) -> Result<()> { + self.disk_engine() + .compact_files_in_range_cf(cf, start, end, output_level) + } + + fn compact_files_in_range( + &self, + start: Option<&[u8]>, + end: Option<&[u8]>, + output_level: Option, + ) -> Result<()> { + self.disk_engine() + .compact_files_in_range(start, end, output_level) + } + + fn compact_files_cf( + &self, + cf: &str, + files: Vec, + output_level: Option, + max_subcompactions: u32, + exclude_l0: bool, + ) -> Result<()> { + self.disk_engine() + .compact_files_cf(cf, files, output_level, max_subcompactions, exclude_l0) + } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + self.disk_engine().check_in_range(start, end) + } +} diff --git a/components/hybrid_engine/src/db_options.rs b/components/hybrid_engine/src/db_options.rs new file mode 100644 index 000000000000..6b4be90a43f0 --- /dev/null +++ b/components/hybrid_engine/src/db_options.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{DbOptionsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl DbOptionsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbOptions = EK::DbOptions; + + fn get_db_options(&self) -> Self::DbOptions { + self.disk_engine().get_db_options() + } + + fn set_db_options(&self, options: &[(&str, &str)]) -> Result<()> { + self.disk_engine().set_db_options(options) + } +} diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs new file mode 100644 index 000000000000..deb544b91c62 --- /dev/null +++ b/components/hybrid_engine/src/engine.rs @@ -0,0 +1,120 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SyncMutable}; + +use crate::snapshot::HybridEngineSnapshot; + +/// This engine is structured with both a disk engine and an region cache +/// engine. The disk engine houses the complete database data, whereas the +/// region cache engine functions as a region cache, selectively caching certain +/// regions (in a better performance storage device such as NVME or RAM) to +/// enhance read performance. For the regions that are cached, region cache +/// engine retains all data that has not been garbage collected. +#[derive(Clone, Debug)] +pub struct HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + disk_engine: EK, + region_cache_engine: EC, +} + +impl HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn disk_engine(&self) -> &EK { + &self.disk_engine + } + + pub fn mut_disk_engine(&mut self) -> &mut EK { + &mut self.disk_engine + } + + pub fn region_cache_engine(&self) -> &EC { + &self.region_cache_engine + } + + pub fn mut_region_cache_engine(&mut self) -> &mut EC { + &mut self.region_cache_engine + } +} + +// todo: implement KvEngine methods as well as it's super traits. +impl KvEngine for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Snapshot = HybridEngineSnapshot; + + fn snapshot(&self) -> Self::Snapshot { + unimplemented!() + } + + fn sync(&self) -> engine_traits::Result<()> { + unimplemented!() + } + + fn bad_downcast(&self) -> &T { + unimplemented!() + } + + #[cfg(feature = "testexport")] + fn inner_refcount(&self) -> usize { + unimplemented!() + } +} + +impl Peekable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbVector = EK::DbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl SyncMutable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&self, key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&self, cf: &str, key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/engine_iterator.rs b/components/hybrid_engine/src/engine_iterator.rs new file mode 100644 index 000000000000..642aac82b605 --- /dev/null +++ b/components/hybrid_engine/src/engine_iterator.rs @@ -0,0 +1,54 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{Iterator, KvEngine, RegionCacheEngine, Result}; +use tikv_util::Either; + +pub struct HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + iter: Either, +} + +impl Iterator for HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn seek(&mut self, _key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_for_prev(&mut self, _key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_to_first(&mut self) -> Result { + unimplemented!() + } + + fn seek_to_last(&mut self) -> Result { + unimplemented!() + } + + fn prev(&mut self) -> Result { + unimplemented!() + } + + fn next(&mut self) -> Result { + unimplemented!() + } + + fn key(&self) -> &[u8] { + unimplemented!() + } + + fn value(&self) -> &[u8] { + unimplemented!() + } + + fn valid(&self) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/flow_control_factors.rs b/components/hybrid_engine/src/flow_control_factors.rs new file mode 100644 index 000000000000..9649671d4182 --- /dev/null +++ b/components/hybrid_engine/src/flow_control_factors.rs @@ -0,0 +1,23 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{FlowControlFactorsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl FlowControlFactorsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_cf_num_files_at_level(&self, cf: &str, level: usize) -> Result> { + self.disk_engine().get_cf_num_files_at_level(cf, level) + } + + fn get_cf_num_immutable_mem_table(&self, cf: &str) -> Result> { + self.disk_engine().get_cf_num_immutable_mem_table(cf) + } + + fn get_cf_pending_compaction_bytes(&self, cf: &str) -> Result> { + self.disk_engine().get_cf_pending_compaction_bytes(cf) + } +} diff --git a/components/hybrid_engine/src/hybrid_metrics.rs b/components/hybrid_engine/src/hybrid_metrics.rs new file mode 100644 index 000000000000..2d49d9ad1d9f --- /dev/null +++ b/components/hybrid_engine/src/hybrid_metrics.rs @@ -0,0 +1,25 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RegionCacheEngine, StatisticsReporter}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineStatisticsReporter {} + +impl StatisticsReporter> for HybridEngineStatisticsReporter +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn new(name: &str) -> Self { + unimplemented!() + } + + fn collect(&mut self, engine: &HybridEngine) { + unimplemented!() + } + + fn flush(&mut self) { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/import.rs b/components/hybrid_engine/src/import.rs new file mode 100644 index 000000000000..de40c83d2144 --- /dev/null +++ b/components/hybrid_engine/src/import.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ImportExt, KvEngine, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl ImportExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type IngestExternalFileOptions = EK::IngestExternalFileOptions; + + fn ingest_external_file_cf(&self, cf: &str, files: &[&str]) -> engine_traits::Result<()> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/iterable.rs b/components/hybrid_engine/src/iterable.rs new file mode 100644 index 000000000000..96933641b068 --- /dev/null +++ b/components/hybrid_engine/src/iterable.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{IterOptions, Iterable, KvEngine, RegionCacheEngine, Result}; + +use crate::{engine::HybridEngine, engine_iterator::HybridEngineIterator}; + +impl Iterable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Iterator = HybridEngineIterator; + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs new file mode 100644 index 000000000000..147fcc5a8a54 --- /dev/null +++ b/components/hybrid_engine/src/lib.rs @@ -0,0 +1,24 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(dead_code)] +#![allow(unused_variables)] + +mod cf_names; +mod cf_options; +mod checkpoint; +mod compact; +mod db_options; +mod engine; +mod engine_iterator; +mod flow_control_factors; +mod hybrid_metrics; +mod import; +mod iterable; +mod misc; +mod mvcc_properties; +mod perf_context; +mod range_properties; +mod snapshot; +mod sst; +mod table_properties; +mod ttl_properties; +mod write_batch; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs new file mode 100644 index 000000000000..9575344e65cb --- /dev/null +++ b/components/hybrid_engine/src/misc.rs @@ -0,0 +1,127 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, MiscExt, RegionCacheEngine, Result}; + +use crate::{engine::HybridEngine, hybrid_metrics::HybridEngineStatisticsReporter}; + +impl MiscExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type StatisticsReporter = HybridEngineStatisticsReporter; + + fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { + unimplemented!() + } + + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { + unimplemented!() + } + + fn flush_oldest_cf( + &self, + wait: bool, + threshold: Option, + ) -> Result { + unimplemented!() + } + + fn delete_ranges_cf( + &self, + wopts: &engine_traits::WriteOptions, + cf: &str, + strategy: engine_traits::DeleteStrategy, + ranges: &[engine_traits::Range<'_>], + ) -> Result { + unimplemented!() + } + + fn get_approximate_memtable_stats_cf( + &self, + cf: &str, + range: &engine_traits::Range<'_>, + ) -> Result<(u64, u64)> { + unimplemented!() + } + + fn ingest_maybe_slowdown_writes(&self, cf: &str) -> Result { + unimplemented!() + } + + fn get_sst_key_ranges(&self, cf: &str, level: usize) -> Result, Vec)>> { + unimplemented!() + } + + fn get_engine_used_size(&self) -> Result { + unimplemented!() + } + + fn path(&self) -> &str { + unimplemented!() + } + + fn sync_wal(&self) -> Result<()> { + unimplemented!() + } + + fn pause_background_work(&self) -> Result<()> { + unimplemented!() + } + + fn continue_background_work(&self) -> Result<()> { + unimplemented!() + } + + fn exists(path: &str) -> bool { + unimplemented!() + } + + fn locked(path: &str) -> Result { + unimplemented!() + } + + fn dump_stats(&self) -> Result { + unimplemented!() + } + + fn get_latest_sequence_number(&self) -> u64 { + unimplemented!() + } + + fn get_oldest_snapshot_sequence_number(&self) -> Option { + unimplemented!() + } + + fn get_total_sst_files_size_cf(&self, cf: &str) -> Result> { + unimplemented!() + } + + fn get_num_keys(&self) -> Result { + unimplemented!() + } + + fn get_range_stats( + &self, + cf: &str, + start: &[u8], + end: &[u8], + ) -> Result> { + unimplemented!() + } + + fn is_stalled_or_stopped(&self) -> bool { + unimplemented!() + } + + fn get_active_memtable_stats_cf( + &self, + cf: &str, + ) -> Result> { + unimplemented!() + } + + fn get_accumulated_flush_count_cf(cf: &str) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/mvcc_properties.rs b/components/hybrid_engine/src/mvcc_properties.rs new file mode 100644 index 000000000000..0d03258d2de2 --- /dev/null +++ b/components/hybrid_engine/src/mvcc_properties.rs @@ -0,0 +1,23 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, MvccProperties, MvccPropertiesExt, RegionCacheEngine}; +use txn_types::TimeStamp; + +use crate::engine::HybridEngine; + +impl MvccPropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_mvcc_properties_cf( + &self, + cf: &str, + safe_point: TimeStamp, + start_key: &[u8], + end_key: &[u8], + ) -> Option { + self.disk_engine() + .get_mvcc_properties_cf(cf, safe_point, start_key, end_key) + } +} diff --git a/components/hybrid_engine/src/perf_context.rs b/components/hybrid_engine/src/perf_context.rs new file mode 100644 index 000000000000..1db4e8c9d277 --- /dev/null +++ b/components/hybrid_engine/src/perf_context.rs @@ -0,0 +1,20 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, PerfContextExt, PerfContextKind, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl PerfContextExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type PerfContext = EK::PerfContext; + + fn get_perf_context( + level: engine_traits::PerfLevel, + kind: PerfContextKind, + ) -> Self::PerfContext { + EK::get_perf_context(level, kind) + } +} diff --git a/components/hybrid_engine/src/range_properties.rs b/components/hybrid_engine/src/range_properties.rs new file mode 100644 index 000000000000..7f38379f36da --- /dev/null +++ b/components/hybrid_engine/src/range_properties.rs @@ -0,0 +1,60 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Range, RangePropertiesExt, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl RangePropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_range_approximate_keys(&self, range: Range<'_>, large_threshold: u64) -> Result { + self.disk_engine() + .get_range_approximate_keys(range, large_threshold) + } + + fn get_range_approximate_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + self.disk_engine() + .get_range_approximate_keys_cf(cfname, range, large_threshold) + } + + fn get_range_approximate_size(&self, range: Range<'_>, large_threshold: u64) -> Result { + self.disk_engine() + .get_range_approximate_size(range, large_threshold) + } + + fn get_range_approximate_size_cf( + &self, + cfname: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + self.disk_engine() + .get_range_approximate_size_cf(cfname, range, large_threshold) + } + + fn get_range_approximate_split_keys( + &self, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + self.disk_engine() + .get_range_approximate_split_keys(range, key_count) + } + + fn get_range_approximate_split_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + self.disk_engine() + .get_range_approximate_split_keys_cf(cfname, range, key_count) + } +} diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs new file mode 100644 index 000000000000..4ada590c3d61 --- /dev/null +++ b/components/hybrid_engine/src/snapshot.rs @@ -0,0 +1,103 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Debug, Formatter}, + marker::PhantomData, +}; + +use engine_traits::{ + CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, + Snapshot, SnapshotMiscExt, +}; + +use crate::engine_iterator::HybridEngineIterator; + +pub struct HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + disk_snap: EK::Snapshot, + + phantom: PhantomData, +} + +impl Snapshot for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ +} + +impl Debug for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "Hybrid Engine Snapshot Impl") + } +} + +impl Drop for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn drop(&mut self) { + unimplemented!() + } +} + +impl Iterable for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Iterator = HybridEngineIterator; + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Peekable for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbVector = EK::DbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl CfNamesExt for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn cf_names(&self) -> Vec<&str> { + self.disk_snap.cf_names() + } +} + +impl SnapshotMiscExt for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn sequence_number(&self) -> u64 { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/sst.rs b/components/hybrid_engine/src/sst.rs new file mode 100644 index 000000000000..2bade295ec32 --- /dev/null +++ b/components/hybrid_engine/src/sst.rs @@ -0,0 +1,53 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ + KvEngine, RegionCacheEngine, Result, SstCompressionType, SstExt, SstWriterBuilder, +}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineSstWriteBuilder {} + +impl SstExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type SstReader = EK::SstReader; + type SstWriter = EK::SstWriter; + type SstWriterBuilder = HybridEngineSstWriteBuilder; +} + +impl SstWriterBuilder> for HybridEngineSstWriteBuilder +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn new() -> Self { + unimplemented!() + } + + fn set_db(self, _db: &HybridEngine) -> Self { + unimplemented!() + } + + fn set_cf(self, _cf: &str) -> Self { + unimplemented!() + } + + fn set_in_memory(self, _in_memory: bool) -> Self { + unimplemented!() + } + + fn set_compression_type(self, _compression: Option) -> Self { + unimplemented!() + } + + fn set_compression_level(self, level: i32) -> Self { + unimplemented!() + } + + fn build(self, _path: &str) -> Result< as SstExt>::SstWriter> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/table_properties.rs b/components/hybrid_engine/src/table_properties.rs new file mode 100644 index 000000000000..6ad95e5931ae --- /dev/null +++ b/components/hybrid_engine/src/table_properties.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Range, RegionCacheEngine, Result, TablePropertiesExt}; + +use crate::engine::HybridEngine; + +impl TablePropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type TablePropertiesCollection = EK::TablePropertiesCollection; + + fn table_properties_collection( + &self, + cf: &str, + ranges: &[Range<'_>], + ) -> Result { + self.disk_engine().table_properties_collection(cf, ranges) + } +} diff --git a/components/hybrid_engine/src/ttl_properties.rs b/components/hybrid_engine/src/ttl_properties.rs new file mode 100644 index 000000000000..d5b7d8578b53 --- /dev/null +++ b/components/hybrid_engine/src/ttl_properties.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RegionCacheEngine, Result, TtlProperties, TtlPropertiesExt}; + +use crate::engine::HybridEngine; + +impl TtlPropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_range_ttl_properties_cf( + &self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + ) -> Result> { + self.disk_engine() + .get_range_ttl_properties_cf(cf, start_key, end_key) + } +} diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs new file mode 100644 index 000000000000..3aba34c9c859 --- /dev/null +++ b/components/hybrid_engine/src/write_batch.rs @@ -0,0 +1,101 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ + KvEngine, Mutable, RegionCacheEngine, Result, WriteBatch, WriteBatchExt, WriteOptions, +}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineWriteBatch { + _disk_write_batch: EK::WriteBatch, + // todo: region_cache_engine write batch +} + +impl WriteBatchExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type WriteBatch = HybridEngineWriteBatch; + const WRITE_BATCH_MAX_KEYS: usize = EK::WRITE_BATCH_MAX_KEYS; + + fn write_batch(&self) -> Self::WriteBatch { + unimplemented!() + } + + fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { + unimplemented!() + } +} + +impl WriteBatch for HybridEngineWriteBatch { + fn write_opt(&mut self, _: &WriteOptions) -> Result { + unimplemented!() + } + + fn write_callback_opt(&mut self, _opts: &WriteOptions, _cb: impl FnMut()) -> Result { + unimplemented!() + } + + fn data_size(&self) -> usize { + unimplemented!() + } + + fn count(&self) -> usize { + unimplemented!() + } + + fn is_empty(&self) -> bool { + unimplemented!() + } + + fn should_write_to_engine(&self) -> bool { + unimplemented!() + } + + fn clear(&mut self) { + unimplemented!() + } + + fn set_save_point(&mut self) { + unimplemented!() + } + + fn pop_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn merge(&mut self, _other: Self) -> Result<()> { + unimplemented!() + } +} + +impl Mutable for HybridEngineWriteBatch { + fn put(&mut self, _key: &[u8], _value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&mut self, _cf: &str, _key: &[u8], _value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&mut self, _key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&mut self, _cf: &str, _key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&mut self, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&mut self, _cf: &str, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { + unimplemented!() + } +} From a07db9f1c72b0b4bbf41579665c656a9adcca9af Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 11 Dec 2023 22:43:49 -0800 Subject: [PATCH 188/203] server: make gc support multi-threads (#16096) close tikv/tikv#16101 do parallel region gc and expose the gc thread configuration. The configuration can be dynamically updated. Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- components/tikv_util/src/worker/pool.rs | 16 ++ src/server/gc_worker/compaction_filter.rs | 2 +- src/server/gc_worker/config.rs | 23 ++- src/server/gc_worker/gc_manager.rs | 103 +++++++++---- src/server/gc_worker/gc_worker.rs | 165 ++++++++++++++++----- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 7 files changed, 246 insertions(+), 65 deletions(-) diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index 9ef827b007a5..a22732a7aae6 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -224,6 +224,14 @@ impl LazyWorker { pub fn remote(&self) -> Remote { self.worker.remote() } + + pub fn pool_size(&self) -> usize { + self.worker.pool_size() + } + + pub fn pool(&self) -> FuturePool { + self.worker.pool() + } } pub struct ReceiverWrapper { @@ -448,6 +456,14 @@ impl Worker { self.pool.remote().clone() } + pub fn pool_size(&self) -> usize { + self.pool.get_pool_size() + } + + pub fn pool(&self) -> FuturePool { + self.pool.clone() + } + fn start_impl( &self, runner: R, diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 665824a1baca..2bea0cf347bf 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -888,7 +888,7 @@ pub mod test_utils { cfg.ratio_threshold = ratio_threshold; } cfg.enable_compaction_filter = true; - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg)), None) }; let feature_gate = { let feature_gate = FeatureGate::default(); diff --git a/src/server/gc_worker/config.rs b/src/server/gc_worker/config.rs index 1816dd845e12..809c55e12682 100644 --- a/src/server/gc_worker/config.rs +++ b/src/server/gc_worker/config.rs @@ -3,7 +3,10 @@ use std::sync::Arc; use online_config::{ConfigChange, ConfigManager, OnlineConfig}; -use tikv_util::config::{ReadableSize, VersionTrack}; +use tikv_util::{ + config::{ReadableSize, VersionTrack}, + yatp_pool::FuturePool, +}; const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1; pub const DEFAULT_GC_BATCH_KEYS: usize = 512; @@ -22,6 +25,8 @@ pub struct GcConfig { /// greater than 5.0.0. Change `compaction_filter_skip_version_check` /// can enable it by force. pub compaction_filter_skip_version_check: bool, + /// gc threads count + pub num_threads: usize, } impl Default for GcConfig { @@ -32,6 +37,7 @@ impl Default for GcConfig { max_write_bytes_per_sec: ReadableSize(DEFAULT_GC_MAX_WRITE_BYTES_PER_SEC), enable_compaction_filter: true, compaction_filter_skip_version_check: false, + num_threads: 1, } } } @@ -41,12 +47,15 @@ impl GcConfig { if self.batch_keys == 0 { return Err("gc.batch_keys should not be 0".into()); } + if self.num_threads == 0 { + return Err("gc.thread_count should not be 0".into()); + } Ok(()) } } #[derive(Clone, Default)] -pub struct GcWorkerConfigManager(pub Arc>); +pub struct GcWorkerConfigManager(pub Arc>, pub Option); impl ConfigManager for GcWorkerConfigManager { fn dispatch( @@ -55,6 +64,16 @@ impl ConfigManager for GcWorkerConfigManager { ) -> std::result::Result<(), Box> { { let change = change.clone(); + if let Some(pool) = self.1.as_ref() { + if let Some(v) = change.get("num_threads") { + let pool_size: usize = v.into(); + pool.scale_pool_size(pool_size); + info!( + "GC worker thread count is changed"; + "new_thread_count" => pool_size, + ); + } + } self.0 .update(move |cfg: &mut GcConfig| cfg.update(change))?; } diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index be18f8216d5b..d9c5287b67d0 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -4,7 +4,7 @@ use std::{ cmp::Ordering, sync::{ atomic::{AtomicU64, Ordering as AtomicOrdering}, - mpsc, Arc, + mpsc, Arc, Condvar, Mutex, }, thread::{self, Builder as ThreadBuilder, JoinHandle}, time::Duration, @@ -20,10 +20,10 @@ use txn_types::{Key, TimeStamp}; use super::{ compaction_filter::is_compaction_filter_allowed, config::GcWorkerConfigManager, - gc_worker::{sync_gc, GcSafePointProvider, GcTask}, + gc_worker::{schedule_gc, GcSafePointProvider, GcTask}, Result, }; -use crate::{server::metrics::*, tikv_util::sys::thread::StdThreadBuildWrapper}; +use crate::{server::metrics::*, storage::Callback, tikv_util::sys::thread::StdThreadBuildWrapper}; const POLL_SAFE_POINT_INTERVAL_SECS: u64 = 10; @@ -245,6 +245,8 @@ pub(super) struct GcManager GcManager { @@ -254,6 +256,7 @@ impl GcMan worker_scheduler: Scheduler>, cfg_tracker: GcWorkerConfigManager, feature_gate: FeatureGate, + concurrent_tasks: usize, ) -> GcManager { GcManager { cfg, @@ -263,6 +266,7 @@ impl GcMan gc_manager_ctx: GcManagerContext::new(), cfg_tracker, feature_gate, + max_concurrent_tasks: concurrent_tasks, } } @@ -442,13 +446,27 @@ impl GcMan let mut progress = Some(Key::from_encoded(BEGIN_KEY.to_vec())); // Records how many region we have GC-ed. - let mut processed_regions = 0; + let mut scheduled_regions = 0; + let task_controller = Arc::new((Mutex::new(0), Condvar::new())); + // the task_controller is the combination to control the number + // of tasks The mutex is used for protecting the number of current + // tasks, while the condvar is used for notifying/get notified when the + // number of current tasks is changed. + let (lock, cvar) = &*task_controller; + let maybe_wait = |max_tasks| { + let mut current_tasks: std::sync::MutexGuard<'_, usize> = lock.lock().unwrap(); + while *current_tasks > max_tasks { + // Wait until the number of current tasks is below the limit + current_tasks = cvar.wait(current_tasks).unwrap(); + } + }; info!("gc_worker: auto gc starts"; "safe_point" => self.curr_safe_point()); // The following loop iterates all regions whose leader is on this TiKV and does // GC on them. At the same time, check whether safe_point is updated // periodically. If it's updated, rewinding will happen. + loop { self.gc_manager_ctx.check_stopped()?; if is_compaction_filter_allowed(&self.cfg_tracker.value(), &self.feature_gate) { @@ -462,9 +480,9 @@ impl GcMan // We have worked to the end and we need to rewind. Restart from beginning. progress = Some(Key::from_encoded(BEGIN_KEY.to_vec())); need_rewind = false; - info!("gc_worker: auto gc rewinds"; "processed_regions" => processed_regions); + info!("gc_worker: auto gc rewinds"; "scheduled_regions" => scheduled_regions); - processed_regions = 0; + scheduled_regions = 0; // Set the metric to zero to show that rewinding has happened. AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC .with_label_values(&[PROCESS_TYPE_GC]) @@ -483,19 +501,40 @@ impl GcMan if finished { // We have worked to the end of the TiKV or our progress has reached `end`, and // we don't need to rewind. In this case, the round of GC has finished. - info!("gc_worker: auto gc finishes"; "processed_regions" => processed_regions); - return Ok(()); + info!("gc_worker: all regions task are scheduled"; + "processed_regions" => scheduled_regions, + ); + break; } } - assert!(progress.is_some()); // Before doing GC, check whether safe_point is updated periodically to // determine if rewinding is needed. self.check_if_need_rewind(&progress, &mut need_rewind, &mut end); - progress = self.gc_next_region(progress.unwrap(), &mut processed_regions)?; + let controller: Arc<(Mutex, Condvar)> = Arc::clone(&task_controller); + let cb = Box::new(move |_res| { + let (lock, cvar) = &*controller; + let mut current_tasks = lock.lock().unwrap(); + *current_tasks -= 1; + cvar.notify_one(); + AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC + .with_label_values(&[PROCESS_TYPE_GC]) + .inc(); + }); + maybe_wait(self.max_concurrent_tasks - 1); + let mut current_tasks = lock.lock().unwrap(); + progress = self.async_gc_next_region(progress.unwrap(), cb, &mut current_tasks)?; + scheduled_regions += 1; } + + // wait for all tasks finished + self.gc_manager_ctx.check_stopped()?; + maybe_wait(0); + info!("gc_worker: auto gc finishes"; "processed_regions" => scheduled_regions); + + Ok(()) } /// Checks whether we need to rewind in this round of GC. Only used in @@ -536,13 +575,14 @@ impl GcMan } } - /// Does GC on the next region after `from_key`. Returns the end key of the - /// region it processed. If we have processed to the end of all regions, - /// returns `None`. - fn gc_next_region( + /// Does GC on the next region after `from_key` asynchronously. Returns the + /// end key of the region it processed. If we have processed to the end + /// of all regions, returns `None`. + fn async_gc_next_region( &mut self, from_key: Key, - processed_regions: &mut usize, + callback: Callback<()>, + running_tasks: &mut usize, ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); @@ -552,16 +592,16 @@ impl GcMan let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); debug!("trying gc"; "region_id" => region.id, "start_key" => &hex_start, "end_key" => &hex_end); - if let Err(e) = sync_gc(&self.worker_scheduler, region, self.curr_safe_point()) { - // Ignore the error and continue, since it's useless to retry this. - // TODO: Find a better way to handle errors. Maybe we should retry. - warn!("failed gc"; "start_key" => &hex_start, "end_key" => &hex_end, "err" => ?e); - } - - *processed_regions += 1; - AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC - .with_label_values(&[PROCESS_TYPE_GC]) - .inc(); + let _ = schedule_gc( + &self.worker_scheduler, + region, + self.curr_safe_point(), + callback, + ) + .map(|_| { + *running_tasks += 1; + Ok::<(), GcManagerError>(()) + }); Ok(next_key) } @@ -710,8 +750,16 @@ mod tests { impl GcManagerTestUtil { pub fn new(regions: BTreeMap, RegionInfo>) -> Self { let (gc_task_sender, gc_task_receiver) = channel(); - let worker = WorkerBuilder::new("test-gc-manager").create(); - let scheduler = worker.start("gc-manager", MockGcRunner { tx: gc_task_sender }); + let worker = WorkerBuilder::new("test-gc-manager") + .thread_count(2) + .create(); + let scheduler = worker.start( + "gc-manager", + MockGcRunner { + tx: gc_task_sender.clone(), + }, + ); + worker.start("gc-manager", MockGcRunner { tx: gc_task_sender }); let (safe_point_sender, safe_point_receiver) = channel(); @@ -731,6 +779,7 @@ mod tests { scheduler, GcWorkerConfigManager::default(), Default::default(), + 2, ); Self { gc_manager: Some(gc_manager), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index c608470ba87a..a0537a478d06 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -34,6 +34,7 @@ use tikv_util::{ Either, }; use txn_types::{Key, TimeStamp}; +use yatp::{task::future::TaskCell, Remote}; use super::{ check_need_gc, @@ -178,7 +179,7 @@ where } /// Used to perform GC operations on the engine. -pub struct GcRunner { +pub struct GcRunnerCore { store_id: u64, engine: E, @@ -193,6 +194,26 @@ pub struct GcRunner { stats_map: HashMap, } +impl Clone for GcRunnerCore { + fn clone(&self) -> Self { + GcRunnerCore { + store_id: self.store_id, + engine: self.engine.clone(), + flow_info_sender: self.flow_info_sender.clone(), + limiter: self.limiter.clone(), + cfg: self.cfg.clone(), + cfg_tracker: self.cfg_tracker.clone(), + stats_map: HashMap::default(), + } + } +} + +/// Used to perform GC operations on the engine. +pub struct GcRunner { + inner: GcRunnerCore, + pool: Remote, +} + pub const MAX_RAW_WRITE_SIZE: usize = 32 * 1024; pub struct MvccRaw { @@ -282,7 +303,7 @@ fn init_snap_ctx(store_id: u64, region: &Region) -> Context { ctx } -impl GcRunner { +impl GcRunnerCore { pub fn new( store_id: u64, engine: E, @@ -918,18 +939,12 @@ impl GcRunner { error!("failed to flush deletes, will leave garbage"; "err" => ?e); } } -} - -impl Runnable for GcRunner { - type Task = GcTask; #[inline] fn run(&mut self, task: GcTask) { let _io_type_guard = WithIoType::new(IoType::Gc); let enum_label = task.get_enum_label(); - GC_GCTASK_COUNTER_STATIC.get(enum_label).inc(); - let timer = SlowTimer::from_secs(GC_TASK_SLOW_SECONDS); let update_metrics = |is_err| { GC_TASK_DURATION_HISTOGRAM_VEC @@ -941,9 +956,6 @@ impl Runnable for GcRunner { } }; - // Refresh config before handle task - self.refresh_cfg(); - match task { GcTask::Gc { region, @@ -1062,6 +1074,37 @@ impl Runnable for GcRunner { } } +impl GcRunner { + pub fn new( + store_id: u64, + engine: E, + flow_info_sender: Sender, + cfg_tracker: Tracker, + cfg: GcConfig, + pool: Remote, + ) -> Self { + Self { + inner: GcRunnerCore::new(store_id, engine, flow_info_sender, cfg_tracker, cfg), + pool, + } + } +} + +impl Runnable for GcRunner { + type Task = GcTask; + + #[inline] + fn run(&mut self, task: GcTask) { + // Refresh config before handle task + self.inner.refresh_cfg(); + + let mut inner = self.inner.clone(); + self.pool.spawn(async move { + inner.run(task); + }); + } +} + /// When we failed to schedule a `GcTask` to `GcRunner`, use this to handle the /// `ScheduleError`. fn handle_gc_task_schedule_error(e: ScheduleError>) -> Result<()> { @@ -1081,7 +1124,7 @@ fn handle_gc_task_schedule_error(e: ScheduleError>) -> Res } /// Schedules a `GcTask` to the `GcRunner`. -fn schedule_gc( +pub fn schedule_gc( scheduler: &Scheduler>, region: Region, safe_point: TimeStamp, @@ -1174,13 +1217,18 @@ impl GcWorker { feature_gate: FeatureGate, region_info_provider: Arc, ) -> Self { - let worker_builder = WorkerBuilder::new("gc-worker").pending_capacity(GC_MAX_PENDING_TASKS); + let worker_builder = WorkerBuilder::new("gc-worker") + .pending_capacity(GC_MAX_PENDING_TASKS) + .thread_count(cfg.num_threads); let worker = worker_builder.create().lazy_build("gc-worker"); let worker_scheduler = worker.scheduler(); GcWorker { engine, flow_info_sender: Some(flow_info_sender), - config_manager: GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg))), + config_manager: GcWorkerConfigManager( + Arc::new(VersionTrack::new(cfg)), + Some(worker.pool()), + ), refs: Arc::new(AtomicUsize::new(1)), worker: Arc::new(Mutex::new(worker)), worker_scheduler, @@ -1219,6 +1267,7 @@ impl GcWorker { self.scheduler(), self.config_manager.clone(), self.feature_gate.clone(), + self.config_manager.value().num_threads, ) .start()?; *handle = Some(new_handle); @@ -1226,14 +1275,20 @@ impl GcWorker { } pub fn start(&mut self, store_id: u64) -> Result<()> { + let mut worker = self.worker.lock().unwrap(); let runner = GcRunner::new( store_id, self.engine.clone(), self.flow_info_sender.take().unwrap(), - self.config_manager.0.clone().tracker("gc-woker".to_owned()), + self.config_manager + .0 + .clone() + .tracker("gc-worker".to_owned()), self.config_manager.value().clone(), + worker.remote(), ); - self.worker.lock().unwrap().start(runner); + worker.start(runner); + Ok(()) } @@ -1296,6 +1351,10 @@ impl GcWorker { pub fn get_config_manager(&self) -> GcWorkerConfigManager { self.config_manager.clone() } + + pub fn get_worker_thread_count(&self) -> usize { + self.worker.lock().unwrap().pool_size() + } } #[cfg(any(test, feature = "testexport"))] @@ -1486,6 +1545,7 @@ mod tests { use engine_traits::Peekable as _; use futures::executor::block_on; use kvproto::{kvrpcpb::ApiVersion, metapb::Peer}; + use online_config::{ConfigChange, ConfigManager, ConfigValue}; use raft::StateRole; use raftstore::coprocessor::{ region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, @@ -1634,10 +1694,12 @@ mod tests { region2.mut_peers().push(new_peer(store_id, 2)); region2.set_start_key(split_key.to_vec()); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( engine, tx, - GcConfig::default(), + gc_config, gate, Arc::new(MockRegionInfoProvider::new(vec![region1, region2])), ); @@ -1810,10 +1872,12 @@ mod tests { let mut host = CoprocessorHost::::default(); let ri_provider = RegionInfoAccessor::new(&mut host); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( prefixed_engine.clone(), tx, - GcConfig::default(), + gc_config, feature_gate, Arc::new(ri_provider.clone()), ); @@ -1902,13 +1966,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( store_id, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -1966,13 +2030,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( store_id, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2067,13 +2131,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( 1, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2202,10 +2266,12 @@ mod tests { let mut region = Region::default(); region.mut_peers().push(new_peer(store_id, 1)); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( engine.clone(), tx, - GcConfig::default(), + gc_config, gate, Arc::new(MockRegionInfoProvider::new(vec![region.clone()])), ); @@ -2333,7 +2399,7 @@ mod tests { ) -> ( MultiRocksEngine, Arc, - GcRunner, + GcRunnerCore, Vec, mpsc::Receiver, ) { @@ -2386,13 +2452,13 @@ mod tests { ])); let cfg = GcConfig::default(); - let gc_runner = GcRunner::new( + let gc_runner = GcRunnerCore::new( store_id, engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2564,13 +2630,13 @@ mod tests { let ri_provider = Arc::new(MockRegionInfoProvider::new(vec![r1, r2])); let cfg = GcConfig::default(); - let mut gc_runner = GcRunner::new( + let mut gc_runner = GcRunnerCore::new( store_id, engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2756,4 +2822,33 @@ mod tests { test_destroy_range_for_multi_rocksdb_impl(b"k05", b"k195", vec![1, 2]); test_destroy_range_for_multi_rocksdb_impl(b"k099", b"k25", vec![2, 3]); } + + #[test] + fn test_update_gc_thread_count() { + let engine = TestEngineBuilder::new().build().unwrap(); + let (tx, _rx) = mpsc::channel(); + let gate = FeatureGate::default(); + gate.set_version("5.0.0").unwrap(); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 1; + let gc_worker = GcWorker::new( + engine, + tx, + gc_config, + gate, + Arc::new(MockRegionInfoProvider::new(vec![])), + ); + let mut config_change = ConfigChange::new(); + config_change.insert(String::from("num_threads"), ConfigValue::Usize(5)); + let mut cfg_manager = gc_worker.get_config_manager(); + cfg_manager.dispatch(config_change).unwrap(); + + assert_eq!(gc_worker.get_worker_thread_count(), 5); + + let mut config_change = ConfigChange::new(); + config_change.insert(String::from("num_threads"), ConfigValue::Usize(2)); + cfg_manager.dispatch(config_change).unwrap(); + + assert_eq!(gc_worker.get_worker_thread_count(), 2); + } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index f1628cda50e1..05cbde827d27 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -834,6 +834,7 @@ fn test_serde_custom_tikv_config() { max_write_bytes_per_sec: ReadableSize::mb(10), enable_compaction_filter: false, compaction_filter_skip_version_check: true, + num_threads: 2, }; value.pessimistic_txn = PessimisticTxnConfig { wait_for_lock_timeout: ReadableDuration::millis(10), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 61a2a24b43a8..9eb628b8dc57 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -643,6 +643,7 @@ batch-keys = 256 max-write-bytes-per-sec = "10MB" enable-compaction-filter = false compaction-filter-skip-version-check = true +num-threads = 2 [pessimistic-txn] enabled = false # test backward compatibility From 95da0269335fa0e05eb077bc8b7216a086d3aefe Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 12 Dec 2023 18:37:48 +0800 Subject: [PATCH 189/203] *: use OpenSSL for crypto RNG (#16170) ref tikv/tikv#15982 To comply with FIPS 140-2 requirements, it's essential to choose an RNG that meets these specifications. This commit replaces the `rand` crate with OpenSSL for cryptographic random number generation. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- Cargo.lock | 32 ++++++++--------- Cargo.toml | 6 ++-- cmd/tikv-ctl/Cargo.toml | 3 +- cmd/tikv-ctl/src/main.rs | 1 + cmd/tikv-server/Cargo.toml | 2 +- cmd/tikv-server/src/main.rs | 1 + components/backup-stream/Cargo.toml | 2 +- components/backup/src/writer.rs | 2 +- components/{fips => crypto}/Cargo.toml | 2 +- components/{fips => crypto}/build.rs | 0 .../{fips/src/lib.rs => crypto/src/fips.rs} | 0 components/crypto/src/lib.rs | 13 +++++++ components/crypto/src/rand.rs | 17 +++++++++ components/encryption/Cargo.toml | 5 ++- components/encryption/src/crypter.rs | 22 ++++++------ .../encryption/src/encrypted_file/mod.rs | 4 +-- components/encryption/src/file_dict_file.rs | 4 +-- components/encryption/src/io.rs | 35 ++++++++----------- components/encryption/src/manager/mod.rs | 28 +++++++-------- components/encryption/src/master_key/file.rs | 2 +- components/encryption/src/master_key/kms.rs | 2 +- components/encryption/src/master_key/mem.rs | 4 ++- components/server/Cargo.toml | 1 - components/tidb_query_expr/Cargo.toml | 2 +- .../tidb_query_expr/src/impl_encryption.rs | 12 ++++--- components/tikv_util/src/lib.rs | 33 ----------------- scripts/check-bins.py | 2 +- src/lib.rs | 2 +- 28 files changed, 119 insertions(+), 120 deletions(-) rename components/{fips => crypto}/Cargo.toml (94%) rename components/{fips => crypto}/build.rs (100%) rename components/{fips/src/lib.rs => crypto/src/fips.rs} (100%) create mode 100644 components/crypto/src/lib.rs create mode 100644 components/crypto/src/rand.rs diff --git a/Cargo.lock b/Cargo.lock index ab5c5d1663ab..0cd0c6cade14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1246,6 +1246,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crypto" +version = "0.0.1" +dependencies = [ + "openssl", + "openssl-sys", + "slog", + "slog-global", +] + [[package]] name = "csv" version = "1.1.6" @@ -1414,6 +1424,7 @@ dependencies = [ "cloud", "crc32fast", "crossbeam", + "crypto", "derive_more", "error_code", "fail", @@ -1428,7 +1439,6 @@ dependencies = [ "openssl", "prometheus", "protobuf", - "rand 0.8.5", "serde", "serde_derive", "slog", @@ -1817,16 +1827,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "fips" -version = "0.0.1" -dependencies = [ - "openssl", - "openssl-sys", - "slog", - "slog-global", -] - [[package]] name = "fix-hidden-lifetime-bug" version = "0.2.5" @@ -5116,7 +5116,6 @@ dependencies = [ "raft_log_engine", "raftstore", "raftstore-v2", - "rand 0.8.5", "resolved_ts", "resource_control", "resource_metering", @@ -6177,6 +6176,7 @@ dependencies = [ "byteorder", "chrono", "codec", + "crypto", "file_system", "flate2", "hex 0.4.2", @@ -6188,7 +6188,6 @@ dependencies = [ "panic_hook", "profiler", "protobuf", - "rand 0.8.5", "regex", "safemem", "serde", @@ -6226,6 +6225,7 @@ dependencies = [ "crc32fast", "crc64fast", "crossbeam", + "crypto", "dashmap", "encryption_export", "engine_panic", @@ -6237,7 +6237,6 @@ dependencies = [ "example_coprocessor_plugin", "fail", "file_system", - "fips", "flate2", "futures 0.3.15", "futures-executor", @@ -6350,12 +6349,12 @@ dependencies = [ "collections", "concurrency_manager", "crossbeam", + "crypto", "encryption_export", "engine_rocks", "engine_traits", "error_code", "file_system", - "fips", "futures 0.3.15", "gag", "grpcio", @@ -6373,7 +6372,6 @@ dependencies = [ "raft-engine-ctl", "raft_log_engine", "raftstore", - "rand 0.8.5", "regex", "security", "serde_json", @@ -6430,9 +6428,9 @@ version = "0.0.1" dependencies = [ "cc", "clap 2.33.0", + "crypto", "encryption_export", "engine_traits", - "fips", "keys", "kvproto", "raft-engine", diff --git a/Cargo.toml b/Cargo.toml index e66b7aee0fd8..2d905a4115f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,7 @@ coprocessor_plugin_api = { workspace = true } crc32fast = "1.2" crc64fast = "0.1" crossbeam = "0.8" +crypto = { workspace = true } dashmap = "5" encryption_export = { workspace = true } engine_panic = { workspace = true } @@ -78,7 +79,6 @@ engine_traits_tests = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } -fips = { workspace = true } flate2 = { version = "1.0", default-features = false, features = ["zlib"] } futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" @@ -243,6 +243,7 @@ members = [ "components/collections", "components/concurrency_manager", "components/coprocessor_plugin_api", + "components/crypto", "components/encryption", "components/encryption/export", "components/engine_rocks_helper", @@ -252,7 +253,6 @@ members = [ "components/error_code", "components/external_storage", "components/file_system", - "components/fips", "components/into_other", "components/keys", "components/log_wrappers", @@ -328,7 +328,7 @@ engine_traits_tests = { path = "components/engine_traits_tests", default-feature error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } file_system = { path = "components/file_system" } -fips = { path = "components/fips" } +crypto = { path = "components/crypto" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } keys = { path = "components/keys" } diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 82553a4b45a1..9504c3a4eae8 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -49,12 +49,12 @@ clap = "2.32" collections = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" +crypto = { workspace = true } encryption_export = { workspace = true } engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } -fips = { workspace = true } futures = "0.3" gag = "1.0" grpcio = { workspace = true } @@ -72,7 +72,6 @@ raft-engine = { git = "https://github.com/tikv/raft-engine.git" } raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } raft_log_engine = { workspace = true } raftstore = { workspace = true } -rand = "0.8" regex = "1" security = { workspace = true } serde_json = "1.0" diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index ec0c8bfc915f..25f8cc1337be 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -20,6 +20,7 @@ use std::{ }; use collections::HashMap; +use crypto::fips; use encryption_export::{ create_backend, data_key_manager_from_config, DataKeyManager, DecrypterReader, Iv, }; diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 6f916d7476d3..cc99e05fb587 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -34,9 +34,9 @@ pprof-fp = ["tikv/pprof-fp"] [dependencies] clap = "2.32" +crypto = { workspace = true } encryption_export = { workspace = true } engine_traits = { workspace = true } -fips = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git" } diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 01354906b466..4c1eb4fc2c57 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -5,6 +5,7 @@ use std::{path::Path, process}; use clap::{crate_authors, App, Arg}; +use crypto::fips; use serde_json::{Map, Value}; use server::setup::{ensure_no_unrecognized_config, validate_and_persist_config}; use tikv::{ diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 6a5a0edbba5c..a91b3fb071d3 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -57,7 +57,6 @@ prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } raftstore = { workspace = true } -rand = "0.8.0" regex = "1" resolved_ts = { workspace = true } security = { path = "../security" } @@ -83,6 +82,7 @@ engine_test = { workspace = true } grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } +rand = "0.8.0" tempdir = "0.3" tempfile = "3.0" test_pd = { workspace = true } diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index dfbe36b60cf1..a2d8a31f0eae 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -121,7 +121,7 @@ impl Writer { .with_label_values(&[cf.into()]) .inc_by(self.total_kvs); let file_name = format!("{}_{}.sst", name, cf); - let iv = Iv::new_ctr(); + let iv = Iv::new_ctr().map_err(|e| Error::Other(box_err!("new IV error: {:?}", e)))?; let encrypter_reader = EncrypterReader::new(sst_reader, cipher.cipher_type, &cipher.cipher_key, iv) .map_err(|e| Error::Other(box_err!("new EncrypterReader error: {:?}", e)))?; diff --git a/components/fips/Cargo.toml b/components/crypto/Cargo.toml similarity index 94% rename from components/fips/Cargo.toml rename to components/crypto/Cargo.toml index ab0d2aa1cf79..26eb77ee0579 100644 --- a/components/fips/Cargo.toml +++ b/components/crypto/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "fips" +name = "crypto" version = "0.0.1" edition = "2021" publish = false diff --git a/components/fips/build.rs b/components/crypto/build.rs similarity index 100% rename from components/fips/build.rs rename to components/crypto/build.rs diff --git a/components/fips/src/lib.rs b/components/crypto/src/fips.rs similarity index 100% rename from components/fips/src/lib.rs rename to components/crypto/src/fips.rs diff --git a/components/crypto/src/lib.rs b/components/crypto/src/lib.rs new file mode 100644 index 000000000000..5afb174040c5 --- /dev/null +++ b/components/crypto/src/lib.rs @@ -0,0 +1,13 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! A shim crate for cryptographic operations, with special considerations for +//! meeting FIPS 140 requirements. +//! +//! This crate provides a set of cryptographic functionalities, including +//! RNG (random number generator). It has been meticulously crafted +//! to adhere to the FIPS 140 standards, ensuring a secure and compliant +//! environment for cryptographic operations in regulated environments. +// TODO: add message digest. + +pub mod fips; +pub mod rand; diff --git a/components/crypto/src/rand.rs b/components/crypto/src/rand.rs new file mode 100644 index 000000000000..d0f97594f49f --- /dev/null +++ b/components/crypto/src/rand.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! Utilities for cryptographically strong random number generation. + +use openssl::{error::ErrorStack, rand}; + +/// Fill buffer with cryptographically strong pseudo-random bytes. +pub fn rand_bytes(buf: &mut [u8]) -> Result<(), ErrorStack> { + rand::rand_bytes(buf) +} + +/// Return a random u64. +pub fn rand_u64() -> Result { + let mut rand_id = [0u8; 8]; + rand_bytes(&mut rand_id)?; + Ok(u64::from_ne_bytes(rand_id)) +} diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 0f2eac6ad5ab..7375a9c0b201 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -17,6 +17,7 @@ bytes = "1.0" cloud = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" +crypto = { workspace = true } derive_more = "0.99.3" error_code = { workspace = true } fail = "0.5" @@ -30,7 +31,9 @@ online_config = { workspace = true } openssl = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -rand = "0.8" +# For simplicity and compliance with FIPS 140 requirements for random number +# generation, do not use the 'rand' crate in encryption-related code. +# rand = "*" serde = "1.0" serde_derive = "1.0" slog = { workspace = true } diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index aafbe7cf88f4..a60b9c9c20bb 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -5,8 +5,10 @@ use std::fmt::{self, Debug, Formatter}; use byteorder::{BigEndian, ByteOrder}; use cloud::kms::PlainKey; use kvproto::encryptionpb::EncryptionMethod; -use openssl::symm::{self, Cipher as OCipher}; -use rand::{rngs::OsRng, RngCore}; +use openssl::{ + rand, + symm::{self, Cipher as OCipher}, +}; use tikv_util::box_err; use crate::{Error, Result}; @@ -70,17 +72,17 @@ pub enum Iv { impl Iv { /// Generate a random IV for AES-GCM. - pub fn new_gcm() -> Iv { + pub fn new_gcm() -> Result { let mut iv = [0u8; GCM_IV_12]; - OsRng.fill_bytes(&mut iv); - Iv::Gcm(iv) + rand::rand_bytes(&mut iv)?; + Ok(Iv::Gcm(iv)) } /// Generate a random IV for AES-CTR. - pub fn new_ctr() -> Iv { + pub fn new_ctr() -> Result { let mut iv = [0u8; CTR_IV_16]; - OsRng.fill_bytes(&mut iv); - Iv::Ctr(iv) + rand::rand_bytes(&mut iv)?; + Ok(Iv::Ctr(iv)) } pub fn from_slice(src: &[u8]) -> Result { @@ -212,9 +214,9 @@ mod tests { let mut ivs = Vec::with_capacity(100); for c in 0..100 { if c % 2 == 0 { - ivs.push(Iv::new_ctr()); + ivs.push(Iv::new_ctr().unwrap()); } else { - ivs.push(Iv::new_gcm()); + ivs.push(Iv::new_gcm().unwrap()); } } ivs.dedup_by(|a, b| a.as_slice() == b.as_slice()); diff --git a/components/encryption/src/encrypted_file/mod.rs b/components/encryption/src/encrypted_file/mod.rs index 9c76b857c70a..8cac47077f44 100644 --- a/components/encryption/src/encrypted_file/mod.rs +++ b/components/encryption/src/encrypted_file/mod.rs @@ -5,10 +5,10 @@ use std::{ path::Path, }; +use crypto::rand; use file_system::{rename, File, OpenOptions}; use kvproto::encryptionpb::EncryptedContent; use protobuf::Message; -use rand::{thread_rng, RngCore}; use slog_global::error; use tikv_util::time::Instant; @@ -66,7 +66,7 @@ impl<'a> EncryptedFile<'a> { // TODO what if a tmp file already exists? let origin_path = self.base.join(self.name); let mut tmp_path = origin_path.clone(); - tmp_path.set_extension(format!("{}.{}", thread_rng().next_u64(), TMP_FILE_SUFFIX)); + tmp_path.set_extension(format!("{}.{}", rand::rand_u64()?, TMP_FILE_SUFFIX)); let mut tmp_file = OpenOptions::new() .create(true) .write(true) diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 0d1dcbbbd6e3..a40fb912b3bc 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -6,10 +6,10 @@ use std::{ }; use byteorder::{BigEndian, ByteOrder}; +use crypto::rand; use file_system::{rename, File, OpenOptions}; use kvproto::encryptionpb::{EncryptedContent, FileDictionary, FileInfo}; use protobuf::Message; -use rand::{thread_rng, RngCore}; use tikv_util::{box_err, info, set_panic_mark, warn}; use crate::{ @@ -127,7 +127,7 @@ impl FileDictionaryFile { if self.enable_log { let origin_path = self.file_path(); let mut tmp_path = origin_path.clone(); - tmp_path.set_extension(format!("{}.{}", thread_rng().next_u64(), TMP_FILE_SUFFIX)); + tmp_path.set_extension(format!("{}.{}", rand::rand_u64()?, TMP_FILE_SUFFIX)); let mut tmp_file = OpenOptions::new() .create(true) .write(true) diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index dc326e784272..4884fc68b92b 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -554,17 +554,10 @@ mod tests { use std::{cmp::min, io::Cursor}; use byteorder::{BigEndian, ByteOrder}; - use rand::{rngs::OsRng, RngCore}; + use openssl::rand; use super::*; - use crate::crypter; - - fn generate_data_key(method: EncryptionMethod) -> Vec { - let key_length = crypter::get_method_key_length(method); - let mut key = vec![0; key_length]; - OsRng.fill_bytes(&mut key); - key - } + use crate::manager::generate_data_key; struct DecoratedCursor { cursor: Cursor>, @@ -628,7 +621,7 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let ivs = [ - Iv::new_ctr(), + Iv::new_ctr().unwrap(), // Iv overflow Iv::from_slice(&{ let mut v = vec![0; 16]; @@ -645,10 +638,10 @@ mod tests { ]; for method in methods { for iv in ivs { - let key = generate_data_key(method); + let (_, key) = generate_data_key(method).unwrap(); let mut plaintext = vec![0; 1024]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let mut encrypter = EncrypterWriter::new( DecoratedCursor::new(plaintext.clone(), 1), method, @@ -704,12 +697,12 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let offsets = [1024, 1024 + 1, 10240 - 1, 10240, 10240 + 1]; let sizes = [1024, 10240]; for method in methods { - let key = generate_data_key(method); - let iv = Iv::new_ctr(); + let (_, key) = generate_data_key(method).unwrap(); + let iv = Iv::new_ctr().unwrap(); let encrypter = EncrypterReader::new(DecoratedCursor::new(plaintext.clone(), 1), method, &key, iv) .unwrap(); @@ -741,13 +734,13 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let offsets = [1024, 1024 + 1, 10240 - 1]; let sizes = [1024, 8000]; let written = vec![0; 10240]; for method in methods { - let key = generate_data_key(method); - let iv = Iv::new_ctr(); + let (_, key) = generate_data_key(method).unwrap(); + let iv = Iv::new_ctr().unwrap(); let encrypter = EncrypterWriter::new(DecoratedCursor::new(written.clone(), 1), method, &key, iv) .unwrap(); @@ -787,12 +780,12 @@ mod tests { EncryptionMethod::Aes256Ctr, EncryptionMethod::Sm4Ctr, ]; - let iv = Iv::new_ctr(); + let iv = Iv::new_ctr().unwrap(); let mut plain_text = vec![0; 10240]; - OsRng.fill_bytes(&mut plain_text); + rand::rand_bytes(&mut plain_text).unwrap(); for method in methods { - let key = generate_data_key(method); + let (_, key) = generate_data_key(method).unwrap(); // encrypt plaintext into encrypt_text let read_once = 16; let mut encrypt_reader = EncrypterReader::new( diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index f5a203e96262..0f20741e8414 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -13,6 +13,7 @@ use std::{ }; use crossbeam::channel::{self, select, tick}; +use crypto::rand; use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; @@ -200,7 +201,7 @@ impl Dicts { fn new_file(&self, fname: &str, method: EncryptionMethod, sync: bool) -> Result { let mut file_dict_file = self.file_dict_file.lock().unwrap(); let iv = if method != EncryptionMethod::Plaintext { - Iv::new_ctr() + Iv::new_ctr()? } else { Iv::Empty }; @@ -348,7 +349,9 @@ impl Dicts { // Generate new data key. for _ in 0..GENERATE_DATA_KEY_LIMIT { - let (key_id, key) = generate_data_key(method); + let Ok((key_id, key)) = generate_data_key(method) else { + continue; + }; if key_id == 0 { // 0 is invalid continue; @@ -436,14 +439,12 @@ fn run_background_rotate_work( } } -fn generate_data_key(method: EncryptionMethod) -> (u64, Vec) { - use rand::{rngs::OsRng, RngCore}; - - let key_id = OsRng.next_u64(); +pub(crate) fn generate_data_key(method: EncryptionMethod) -> Result<(u64, Vec)> { + let key_id = rand::rand_u64()?; let key_length = crypter::get_method_key_length(method); let mut key = vec![0; key_length]; - OsRng.fill_bytes(&mut key); - (key_id, key) + rand::rand_bytes(&mut key)?; + Ok((key_id, key)) } pub struct DataKeyManager { @@ -1003,8 +1004,7 @@ impl<'a> DataKeyImporter<'a> { if key_id.is_none() { for _ in 0..GENERATE_DATA_KEY_LIMIT { // Match `generate_data_key`. - use rand::{rngs::OsRng, RngCore}; - let id = OsRng.next_u64(); + let id = rand::rand_u64()?; if let Entry::Vacant(e) = key_dict.keys.entry(id) { key_id = Some(id); e.insert(new_key); @@ -1858,11 +1858,11 @@ mod tests { ) .unwrap(); // different key - let (_, key2) = generate_data_key(EncryptionMethod::Aes192Ctr); + let (_, key2) = generate_data_key(EncryptionMethod::Aes192Ctr).unwrap(); importer .add( "2", - Iv::new_ctr().as_slice().to_owned(), + Iv::new_ctr().unwrap().as_slice().to_owned(), DataKey { key: key2.clone(), method: EncryptionMethod::Aes192Ctr, @@ -1896,7 +1896,7 @@ mod tests { importer .add( "2", - Iv::new_ctr().as_slice().to_owned(), + Iv::new_ctr().unwrap().as_slice().to_owned(), DataKey { key: key2.clone(), method: EncryptionMethod::Aes192Ctr, @@ -1918,7 +1918,7 @@ mod tests { let tmp_dir = tempfile::TempDir::new().unwrap(); let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); - let (_, key) = generate_data_key(EncryptionMethod::Aes192Ctr); + let (_, key) = generate_data_key(EncryptionMethod::Aes192Ctr).unwrap(); let file0 = manager.new_file("0").unwrap(); let now = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/components/encryption/src/master_key/file.rs b/components/encryption/src/master_key/file.rs index ad1bfb75a87c..1b24a95e497b 100644 --- a/components/encryption/src/master_key/file.rs +++ b/components/encryption/src/master_key/file.rs @@ -49,7 +49,7 @@ impl FileBackend { impl Backend for FileBackend { fn encrypt(&self, plaintext: &[u8]) -> Result { - let iv = Iv::new_gcm(); + let iv = Iv::new_gcm()?; self.backend.encrypt_content(plaintext, iv) } diff --git a/components/encryption/src/master_key/kms.rs b/components/encryption/src/master_key/kms.rs index 643cb08a0c6c..db3c62194fd0 100644 --- a/components/encryption/src/master_key/kms.rs +++ b/components/encryption/src/master_key/kms.rs @@ -158,7 +158,7 @@ impl KmsBackend { impl Backend for KmsBackend { fn encrypt(&self, plaintext: &[u8]) -> Result { - self.encrypt_content(plaintext, Iv::new_gcm()) + self.encrypt_content(plaintext, Iv::new_gcm()?) } fn decrypt(&self, content: &EncryptedContent) -> Result> { diff --git a/components/encryption/src/master_key/mem.rs b/components/encryption/src/master_key/mem.rs index 619acc38ebf1..c19351f5dc76 100644 --- a/components/encryption/src/master_key/mem.rs +++ b/components/encryption/src/master_key/mem.rs @@ -128,7 +128,9 @@ mod tests { .unwrap(); let backend = MemAesGcmBackend::new(key).unwrap(); - let encrypted_content = backend.encrypt_content(&pt, Iv::new_gcm()).unwrap(); + let encrypted_content = backend + .encrypt_content(&pt, Iv::new_gcm().unwrap()) + .unwrap(); let plaintext = backend.decrypt_content(&encrypted_content).unwrap(); assert_eq!(plaintext, pt); diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 55da894c6e8c..c378f0dbd908 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -68,7 +68,6 @@ raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } -rand = "0.8" resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index e09c0cd96de0..60bbde91c31a 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -10,6 +10,7 @@ base64 = "0.13" bstr = "0.2.8" byteorder = "1.2" codec = { workspace = true } +crypto = { workspace = true } file_system = { workspace = true } flate2 = { version = "=1.0.11", default-features = false, features = ["zlib"] } hex = "0.4" @@ -19,7 +20,6 @@ num = { version = "0.3", default-features = false } num-traits = "0.2" openssl = { workspace = true } protobuf = "2" -rand = "0.8.3" regex = "1.1" safemem = { version = "0.3", default-features = false } serde = "1.0" diff --git a/components/tidb_query_expr/src/impl_encryption.rs b/components/tidb_query_expr/src/impl_encryption.rs index 9c26826c03bb..03686d3755e0 100644 --- a/components/tidb_query_expr/src/impl_encryption.rs +++ b/components/tidb_query_expr/src/impl_encryption.rs @@ -3,13 +3,14 @@ use std::io::Read; use byteorder::{ByteOrder, LittleEndian}; +use crypto::rand; use flate2::{ read::{ZlibDecoder, ZlibEncoder}, Compression, }; use openssl::hash::{self, MessageDigest}; use tidb_query_codegen::rpn_fn; -use tidb_query_common::Result; +use tidb_query_common::{error::EvaluateError, Result}; use tidb_query_datatype::{ codec::data_type::*, expr::{Error, EvalContext}, @@ -190,9 +191,12 @@ pub fn random_bytes(_ctx: &mut EvalContext, arg: Option<&Int>) -> Result MAX_RAND_BYTES_LENGTH { return Err(Error::overflow("length", "random_bytes").into()); } - Ok(Some( - (0..*arg as usize).map(|_| rand::random::()).collect(), - )) + let len = *arg as usize; + let mut rand_bytes = vec![0; len]; + rand::rand_bytes(&mut rand_bytes).map_err(|_| { + EvaluateError::Other("SSL library can't generate random bytes".to_owned()) + })?; + Ok(Some(rand_bytes)) } _ => Ok(None), } diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index cdcfc4673c9f..908f32db86f8 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -32,7 +32,6 @@ use nix::{ sys::wait::{wait, WaitStatus}, unistd::{fork, ForkResult}, }; -use rand::rngs::ThreadRng; use crate::sys::thread::StdThreadBuildWrapper; @@ -134,38 +133,6 @@ pub fn slices_in_range(entry: &VecDeque, low: usize, high: usize) -> (&[T] } } -pub struct DefaultRng { - rng: ThreadRng, -} - -impl DefaultRng { - fn new() -> DefaultRng { - DefaultRng { - rng: rand::thread_rng(), - } - } -} - -impl Default for DefaultRng { - fn default() -> DefaultRng { - DefaultRng::new() - } -} - -impl Deref for DefaultRng { - type Target = ThreadRng; - - fn deref(&self) -> &ThreadRng { - &self.rng - } -} - -impl DerefMut for DefaultRng { - fn deref_mut(&mut self) -> &mut ThreadRng { - &mut self.rng - } -} - /// A handy shortcut to replace `RwLock` write/read().unwrap() pattern to /// shortcut wl and rl. pub trait HandyRwLock { diff --git a/scripts/check-bins.py b/scripts/check-bins.py index cbc748af958d..cd5a4879f272 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,7 +14,7 @@ "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", - "tracker", "test_raftstore_macro", "fips" + "tracker", "test_raftstore_macro", "crypto" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/src/lib.rs b/src/lib.rs index b300399e30a8..6d1cc5159074 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if fips::can_enable() { + if crypto::fips::can_enable() { format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") } else { env!("CARGO_PKG_VERSION").to_owned() From 51a5af2fb72133a9e0483bb0a9d6fc4331231ba3 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 12 Dec 2023 18:52:48 +0800 Subject: [PATCH 190/203] server: Fix heap profile temp file is dropped before reading (#16171) close tikv/tikv#16169 Fix heap profile temp file is dropped before reading Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/mod.rs | 6 +++--- src/server/status_server/profile.rs | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 9a2bb5743ae9..ff8909fa8521 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -136,10 +136,11 @@ where let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); let result = { - let path = match dump_one_heap_profile() { - Ok(path) => path, + let file = match dump_one_heap_profile() { + Ok(file) => file, Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), }; + let path = file.path(); if use_jeprof { jeprof_heap_profile(path.to_str().unwrap()) } else { @@ -1561,7 +1562,6 @@ mod tests { #[cfg(feature = "mem-profiling")] #[test] - #[ignore] fn test_pprof_heap_service() { let mut status_server = StatusServer::new( 1, diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 7d7e90741e44..582e02066f84 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -2,7 +2,6 @@ use std::{ fs::File, io::{Read, Write}, - path::PathBuf, pin::Pin, process::{Command, Stdio}, sync::Mutex, @@ -83,11 +82,11 @@ impl Future for ProfileRunner { } /// Trigger a heap profile and return the content. -pub fn dump_one_heap_profile() -> Result { +pub fn dump_one_heap_profile() -> Result { let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; let path = f.path(); dump_prof(path.to_str().unwrap()).map_err(|e| format!("dump_prof: {}", e))?; - Ok(path.to_owned()) + Ok(f) } /// Trigger one cpu profile. From 820b220f4e915fd98ea28195a256ef1952f9328c Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 13 Dec 2023 10:33:19 +0800 Subject: [PATCH 191/203] coprocessor: add query digest tracing in tikv slow query (#16172) ref tikv/tikv#15513 coprocessor: add query digest tracing in tikv slow query Signed-off-by: nolouch --- src/coprocessor/tracker.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index f6502c2459eb..cacf69d2c619 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -6,8 +6,10 @@ use ::tracker::{get_tls_tracker_token, with_tls_tracker}; use engine_traits::{PerfContext, PerfContextExt, PerfContextKind}; use kvproto::{kvrpcpb, kvrpcpb::ScanDetailV2}; use pd_client::BucketMeta; +use protobuf::Message; use tikv_kv::Engine; use tikv_util::time::{self, Duration, Instant}; +use tipb::ResourceGroupTag; use txn_types::Key; use super::metrics::*; @@ -266,9 +268,14 @@ impl Tracker { let source_stmt = self.req_ctx.context.get_source_stmt(); with_tls_tracker(|tracker| { + let mut req_tag = ResourceGroupTag::new(); + req_tag + .merge_from_bytes(&tracker.req_info.resource_group_tag) + .unwrap_or_default(); info!(#"slow_log", "slow-query"; "connection_id" => source_stmt.get_connection_id(), "session_alias" => source_stmt.get_session_alias(), + "query_digest" => hex::encode(req_tag.get_sql_digest()), "region_id" => &self.req_ctx.context.get_region_id(), "remote_host" => &self.req_ctx.peer, "total_lifetime" => ?self.req_lifetime, From 8e8c6ab6dbd41861d012aec0654f77ff678e4ef1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:00:20 +0800 Subject: [PATCH 192/203] In-memory Engine: integrate hybrid engine with TiKV (#16132) ref tikv/tikv#16141 Integrate hybrid engine with TiKV. User can choose to use hybrid engine by set `memory_engine_enabled` in TiKV config. Signed-off-by: SpadeA-Tang --- Cargo.lock | 25 +- Cargo.toml | 2 + components/engine_panic/src/misc.rs | 5 + components/engine_rocks/src/misc.rs | 5 + components/engine_traits/src/misc.rs | 3 + components/hybrid_engine/Cargo.toml | 3 +- components/hybrid_engine/src/engine.rs | 13 + components/hybrid_engine/src/lib.rs | 2 + components/hybrid_engine/src/misc.rs | 5 + .../raftstore/src/compacted_event_sender.rs | 18 +- .../region_cache_memory_engine/Cargo.toml | 13 + .../region_cache_memory_engine/src/engine.rs | 307 ++++++++++++++++++ .../region_cache_memory_engine/src/lib.rs | 7 + components/server/Cargo.toml | 2 + components/server/src/common.rs | 18 + components/server/src/server.rs | 112 +++++-- components/snap_recovery/src/init_cluster.rs | 16 +- .../src/region_meta_collector.rs | 35 +- components/snap_recovery/src/services.rs | 42 ++- src/config/mod.rs | 4 + 20 files changed, 566 insertions(+), 71 deletions(-) create mode 100644 components/region_cache_memory_engine/Cargo.toml create mode 100644 components/region_cache_memory_engine/src/engine.rs create mode 100644 components/region_cache_memory_engine/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0cd0c6cade14..bcbb57979ee3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2447,6 +2447,7 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" name = "hybrid_engine" version = "0.0.1" dependencies = [ + "engine_rocks", "engine_traits", "tikv_util", "txn_types", @@ -4439,6 +4440,15 @@ version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +[[package]] +name = "region_cache_memory_engine" +version = "0.0.1" +dependencies = [ + "collections", + "engine_traits", + "skiplist-rs", +] + [[package]] name = "remove_dir_all" version = "0.5.2" @@ -5104,6 +5114,7 @@ dependencies = [ "grpcio", "grpcio-health", "hex 0.4.2", + "hybrid_engine", "keys", "kvproto", "libc 0.2.146", @@ -5116,6 +5127,7 @@ dependencies = [ "raft_log_engine", "raftstore", "raftstore-v2", + "region_cache_memory_engine", "resolved_ts", "resource_control", "resource_metering", @@ -5195,6 +5207,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" +[[package]] +name = "skiplist-rs" +version = "0.1.0" +source = "git+https://github.com/tikv/skiplist-rs.git?branch=main#618af619d9348ef89eaa71c5f6fbddbd9a5c09bf" +dependencies = [ + "bytes", + "rand 0.8.5", + "slog", +] + [[package]] name = "slab" version = "0.4.2" @@ -6290,6 +6312,7 @@ dependencies = [ "raftstore-v2", "rand 0.7.3", "regex", + "region_cache_memory_engine", "reqwest", "resource_control", "resource_metering", @@ -6821,7 +6844,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 2d905a4115f2..3e5efdd40c36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,6 +129,7 @@ raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } rand = "0.7.3" regex = "1.3" +region_cache_memory_engine = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } rev_lines = "0.2.1" @@ -321,6 +322,7 @@ encryption_export = { path = "components/encryption/export" } engine_panic = { path = "components/engine_panic" } engine_rocks = { path = "components/engine_rocks" } hybrid_engine = { path = "components/hybrid_engine" } +region_cache_memory_engine = { path = "components/region_cache_memory_engine" } engine_rocks_helper = { path = "components/engine_rocks_helper" } engine_test = { path = "components/engine_test", default-features = false } engine_traits = { path = "components/engine_traits" } diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 8da5c48d3e64..6ebecd58a098 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -129,4 +129,9 @@ impl MiscExt for PanicEngine { fn get_accumulated_flush_count_cf(cf: &str) -> Result { panic!() } + + type DiskEngine = PanicEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + panic!() + } } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index b1406cacdb88..f82e1e688323 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -448,6 +448,11 @@ impl MiscExt for RocksEngine { .get(); Ok(n) } + + type DiskEngine = RocksEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + self + } } #[cfg(test)] diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 7871b3b8ecc5..ad93db442312 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -178,4 +178,7 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt + WriteBatchExt { } Ok(n) } + + type DiskEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine; } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index f38604a10c11..e0be90b179e9 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -10,4 +10,5 @@ testexport = [] [dependencies] engine_traits = { workspace = true } txn_types = { workspace = true } -tikv_util = { workspace = true } \ No newline at end of file +tikv_util = { workspace = true } +engine_rocks = { workspace = true } diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index deb544b91c62..072f1d028fff 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -42,6 +42,19 @@ where } } +impl HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn new(disk_engine: EK, region_cache_engine: EC) -> Self { + Self { + disk_engine, + region_cache_engine, + } + } +} + // todo: implement KvEngine methods as well as it's super traits. impl KvEngine for HybridEngine where diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs index 147fcc5a8a54..367d985b094a 100644 --- a/components/hybrid_engine/src/lib.rs +++ b/components/hybrid_engine/src/lib.rs @@ -22,3 +22,5 @@ mod sst; mod table_properties; mod ttl_properties; mod write_batch; + +pub use engine::HybridEngine; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs index 9575344e65cb..d761322ae760 100644 --- a/components/hybrid_engine/src/misc.rs +++ b/components/hybrid_engine/src/misc.rs @@ -124,4 +124,9 @@ where fn get_accumulated_flush_count_cf(cf: &str) -> Result { unimplemented!() } + + type DiskEngine = EK::DiskEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + self.disk_engine().get_disk_engine() + } } diff --git a/components/raftstore/src/compacted_event_sender.rs b/components/raftstore/src/compacted_event_sender.rs index 99ba70a05120..736332b52c51 100644 --- a/components/raftstore/src/compacted_event_sender.rs +++ b/components/raftstore/src/compacted_event_sender.rs @@ -1,18 +1,26 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::sync::Mutex; -use engine_rocks::{CompactedEventSender, RocksCompactedEvent, RocksEngine}; -use engine_traits::RaftEngine; +use engine_rocks::{CompactedEventSender, RocksCompactedEvent}; +use engine_traits::{KvEngine, RaftEngine}; use tikv_util::error_unknown; use crate::store::{fsm::store::RaftRouter, StoreMsg}; // raftstore v1's implementation -pub struct RaftRouterCompactedEventSender { - pub router: Mutex>, +pub struct RaftRouterCompactedEventSender +where + EK: KvEngine, + ER: RaftEngine, +{ + pub router: Mutex>, } -impl CompactedEventSender for RaftRouterCompactedEventSender { +impl CompactedEventSender for RaftRouterCompactedEventSender +where + EK: KvEngine, + ER: RaftEngine, +{ fn send(&self, event: RocksCompactedEvent) { let router = self.router.lock().unwrap(); let event = StoreMsg::CompactedEvent(event); diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml new file mode 100644 index 000000000000..89ae317aa94c --- /dev/null +++ b/components/region_cache_memory_engine/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "region_cache_memory_engine" +version = "0.0.1" +edition = "2021" +publish = false + +[features] +testexport = [] + +[dependencies] +engine_traits = { workspace = true } +collections = { workspace = true } +skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs new file mode 100644 index 000000000000..ad16e7e8407f --- /dev/null +++ b/components/region_cache_memory_engine/src/engine.rs @@ -0,0 +1,307 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::BTreeMap, + fmt::{self, Debug}, + ops::Deref, + sync::{Arc, Mutex}, +}; + +use collections::HashMap; +use engine_traits::{ + CfNamesExt, DbVector, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, + RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, +}; +use skiplist_rs::{ByteWiseComparator, IterRef, Skiplist}; + +/// RegionMemoryEngine stores data for a specific cached region +/// +/// todo: The skiplist used here currently is for test purpose. Replace it +/// with a formal implementation. +#[derive(Clone)] +pub struct RegionMemoryEngine { + data: [Arc>; 3], +} + +impl Debug for RegionMemoryEngine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +type SnapshotList = BTreeMap; + +#[derive(Default)] +pub struct RegionMemoryMeta { + // It records the snapshots that have been granted previsously with specific snapshot_ts. We + // should guarantee that the data visible to any one of the snapshot in it will not be removed. + snapshots: SnapshotList, + // It indicates whether the region is readable. False means integrity of the data in this + // cached region is not satisfied due to being evicted for instance. + can_read: bool, + // Request with read_ts below it is not eligible for granting snapshot. + // Note: different region can have different safe_ts. + safe_ts: u64, +} + +#[derive(Default)] +pub struct RegionCacheMemoryEngineCore { + engine: HashMap, + region_metats: HashMap, +} + +/// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in +/// the leaders' store. Incoming writes that are written to disk engine (now, +/// RocksDB) are also written to the RegionCacheMemoryEngine, leading to a +/// mirrored data set in the cached regions with the disk engine. +/// +/// A load/evict unit manages the memory, deciding which regions should be +/// evicted when the memory used by the RegionCacheMemoryEngine reaches a +/// certain limit, and determining which regions should be loaded when there is +/// spare memory capacity. +/// +/// The safe point lifetime differs between RegionCacheMemoryEngine and the disk +/// engine, often being much shorter in RegionCacheMemoryEngine. This means that +/// RegionCacheMemoryEngine may filter out some keys that still exist in the +/// disk engine, thereby improving read performance as fewer duplicated keys +/// will be read. If there's a need to read keys that may have been filtered by +/// RegionCacheMemoryEngine (as indicated by read_ts and safe_point of the +/// cached region), we resort to using a the disk engine's snapshot instead. +#[derive(Clone, Default)] +pub struct RegionCacheMemoryEngine { + core: Arc>, +} + +impl Debug for RegionCacheMemoryEngine { + fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +impl RegionCacheMemoryEngine { + pub fn new() -> Self { + RegionCacheMemoryEngine::default() + } +} + +impl RegionCacheEngine for RegionCacheMemoryEngine { + type Snapshot = RegionCacheSnapshot; + + fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { + unimplemented!() + } +} + +// todo: fill fields needed +pub struct RegionCacheWriteBatch; + +impl WriteBatchExt for RegionCacheMemoryEngine { + type WriteBatch = RegionCacheWriteBatch; + // todo: adjust it + const WRITE_BATCH_MAX_KEYS: usize = 256; + + fn write_batch(&self) -> Self::WriteBatch { + RegionCacheWriteBatch {} + } + + fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { + RegionCacheWriteBatch {} + } +} + +pub struct RegionCacheIterator { + valid: bool, + prefix_same_as_start: bool, + prefix: Option>, + iter: IterRef, ByteWiseComparator>, + lower_bound: Option>, + upper_bound: Option>, +} + +impl Iterable for RegionCacheMemoryEngine { + type Iterator = RegionCacheIterator; + + fn iterator(&self, cf: &str) -> Result { + unimplemented!() + } + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Iterator for RegionCacheIterator { + fn key(&self) -> &[u8] { + unimplemented!() + } + + fn value(&self) -> &[u8] { + unimplemented!() + } + + fn next(&mut self) -> Result { + unimplemented!() + } + + fn prev(&mut self) -> Result { + unimplemented!() + } + + fn seek(&mut self, key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_to_first(&mut self) -> Result { + unimplemented!() + } + + fn seek_to_last(&mut self) -> Result { + unimplemented!() + } + + fn valid(&self) -> Result { + unimplemented!() + } +} + +impl WriteBatch for RegionCacheWriteBatch { + fn write_opt(&mut self, _: &WriteOptions) -> Result { + unimplemented!() + } + + fn data_size(&self) -> usize { + unimplemented!() + } + + fn count(&self) -> usize { + unimplemented!() + } + + fn is_empty(&self) -> bool { + unimplemented!() + } + + fn should_write_to_engine(&self) -> bool { + unimplemented!() + } + + fn clear(&mut self) { + unimplemented!() + } + + fn set_save_point(&mut self) { + unimplemented!() + } + + fn pop_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn merge(&mut self, _: Self) -> Result<()> { + unimplemented!() + } +} + +impl Mutable for RegionCacheWriteBatch { + fn put(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&mut self, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&mut self, _: &str, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } +} + +#[derive(Clone, Debug)] +pub struct RegionCacheSnapshot { + region_id: u64, + snapshot_ts: u64, + engine: RegionMemoryEngine, +} + +impl Snapshot for RegionCacheSnapshot {} + +impl Iterable for RegionCacheSnapshot { + type Iterator = RegionCacheIterator; + + fn iterator(&self, cf: &str) -> Result { + unimplemented!() + } + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Peekable for RegionCacheSnapshot { + type DbVector = RegionCacheDbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl CfNamesExt for RegionCacheSnapshot { + fn cf_names(&self) -> Vec<&str> { + unimplemented!() + } +} + +impl SnapshotMiscExt for RegionCacheSnapshot { + fn sequence_number(&self) -> u64 { + self.snapshot_ts + } +} + +// todo: fill fields needed +#[derive(Debug)] +pub struct RegionCacheDbVector; + +impl Deref for RegionCacheDbVector { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unimplemented!() + } +} + +impl DbVector for RegionCacheDbVector {} + +impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { + fn eq(&self, rhs: &&[u8]) -> bool { + unimplemented!() + } +} diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs new file mode 100644 index 000000000000..d512847efb42 --- /dev/null +++ b/components/region_cache_memory_engine/src/lib.rs @@ -0,0 +1,7 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +#![allow(dead_code)] +#![allow(unused_variables)] + +mod engine; +pub use engine::RegionCacheMemoryEngine; diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index c378f0dbd908..9062a9f094eb 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -56,6 +56,7 @@ futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } hex = "0.4" +hybrid_engine = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } libc = "0.2" @@ -68,6 +69,7 @@ raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } +region_cache_memory_engine = { workspace = true } resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/server/src/common.rs b/components/server/src/common.rs index c8cf879d9052..a2415facad13 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -28,8 +28,10 @@ use engine_traits::{ use error_code::ErrorCodeExt; use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; use grpcio::Environment; +use hybrid_engine::HybridEngine; use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; +use region_cache_memory_engine::RegionCacheMemoryEngine; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, TikvConfig}, @@ -695,6 +697,22 @@ impl Stop for LazyWorker { } } +pub trait KvEngineBuilder: KvEngine { + fn build(disk_engine: RocksEngine) -> Self; +} + +impl KvEngineBuilder for RocksEngine { + fn build(disk_engine: RocksEngine) -> Self { + disk_engine + } +} + +impl KvEngineBuilder for HybridEngine { + fn build(_disk_engine: RocksEngine) -> Self { + unimplemented!() + } +} + pub trait ConfiguredRaftEngine: RaftEngine { fn build( _: &TikvConfig, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 00ab39a0e6af..594eac686fe9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -30,16 +30,19 @@ use backup_stream::{ use causal_ts::CausalTsProviderImpl; use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; +use engine_rocks::{ + from_rocks_compression_type, RocksCompactedEvent, RocksEngine, RocksStatistics, +}; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ - Engines, KvEngine, MiscExt, RaftEngine, SingletonFactory, TabletContext, TabletRegistry, - CF_DEFAULT, CF_WRITE, + Engines, KvEngine, RaftEngine, SingletonFactory, TabletContext, TabletRegistry, CF_DEFAULT, + CF_WRITE, }; use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; +use hybrid_engine::HybridEngine; use kvproto::{ brpb::create_backup, cdcpb::create_change_data, deadlock::create_deadlock, debugpb::create_debug, diagnosticspb::create_diagnostics, import_sstpb::create_import_sst, @@ -69,6 +72,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; +use region_cache_memory_engine::RegionCacheMemoryEngine; use resolved_ts::{LeadershipResolver, Task}; use resource_control::ResourceGroupManager; use security::SecurityManager; @@ -110,7 +114,7 @@ use tikv::{ use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, - config::VersionTrack, + config::{ReadableSize, VersionTrack}, memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, @@ -124,7 +128,10 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::{ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, TikvServerCore}, + common::{ + ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, KvEngineBuilder, + TikvServerCore, + }, memory::*, setup::*, signal_handler, @@ -132,12 +139,16 @@ use crate::{ }; #[inline] -fn run_impl( +fn run_impl( config: TikvConfig, service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, -) { - let mut tikv = TikvServer::::init(config, service_event_tx.clone()); +) where + EK: KvEngine + KvEngineBuilder, + CER: ConfiguredRaftEngine, + F: KvFormat, +{ + let mut tikv = TikvServer::::init(config, service_event_tx.clone()); // Must be called after `TikvServer::init`. let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; let high_water = (tikv.core.config.memory_usage_high_water * memory_limit as f64) as u64; @@ -209,9 +220,33 @@ pub fn run_tikv( dispatch_api_version!(config.storage.api_version(), { if !config.raft_engine.enable { - run_impl::(config, service_event_tx, service_event_rx) + if config.region_cache_memory_limit == ReadableSize(0) { + run_impl::( + config, + service_event_tx, + service_event_rx, + ) + } else { + run_impl::, RocksEngine, API>( + config, + service_event_tx, + service_event_rx, + ) + } } else { - run_impl::(config, service_event_tx, service_event_rx) + if config.region_cache_memory_limit == ReadableSize(0) { + run_impl::( + config, + service_event_tx, + service_event_rx, + ) + } else { + run_impl::, RaftLogEngine, API>( + config, + service_event_tx, + service_event_rx, + ) + } } }) } @@ -221,21 +256,26 @@ const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); /// A complete TiKV server. -struct TikvServer { +struct TikvServer +where + EK: KvEngine, + ER: RaftEngine, + F: KvFormat, +{ core: TikvServerCore, cfg_controller: Option, security_mgr: Arc, pd_client: Arc, - router: RaftRouter, - system: Option>, + router: RaftRouter, + system: Option>, resolver: Option, snap_mgr: Option, // Will be filled in `init_servers`. - engines: Option>, + engines: Option>, kv_statistics: Option>, raft_statistics: Option>, - servers: Option>, + servers: Option>, region_info_accessor: RegionInfoAccessor, - coprocessor_host: Option>, + coprocessor_host: Option>, concurrency_manager: ConcurrencyManager, env: Arc, check_leader_worker: Worker, @@ -270,12 +310,13 @@ struct Servers { type LocalServer = Server>; type LocalRaftKv = RaftKv>; -impl TikvServer +impl TikvServer where + EK: KvEngine, ER: RaftEngine, F: KvFormat, { - fn init(mut config: TikvConfig, tx: TikvMpsc::Sender) -> TikvServer { + fn init(mut config: TikvConfig, tx: TikvMpsc::Sender) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -436,7 +477,7 @@ where } } - fn init_engines(&mut self, engines: Engines) { + fn init_engines(&mut self, engines: Engines) { let store_meta = Arc::new(Mutex::new(StoreMeta::new(PENDING_MSG_CAP))); let engine = RaftKv::new( ServerRaftStoreRouter::new( @@ -458,9 +499,7 @@ where }); } - fn init_gc_worker( - &mut self, - ) -> GcWorker>> { + fn init_gc_worker(&mut self) -> GcWorker>> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( engines.engine.clone(), @@ -526,7 +565,7 @@ where if let Some(sst_worker) = &mut self.sst_worker { let sst_runner = RecoveryRunner::new( - engines.engines.kv.clone(), + engines.engines.kv.get_disk_engine().clone(), engines.store_meta.clone(), self.core .config @@ -1041,7 +1080,10 @@ where // Create Debugger. let mut debugger = DebuggerImpl::new( - engines.engines.clone(), + Engines::new( + engines.engines.kv.get_disk_engine().clone(), + engines.engines.raft.clone(), + ), self.cfg_controller.as_ref().unwrap().clone(), Some(storage), ); @@ -1163,7 +1205,7 @@ where let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); let backup_service = - backup::Service::::with_router(backup_scheduler, self.router.clone()); + backup::Service::::with_router(backup_scheduler, self.router.clone()); if servers .server .register_service(create_backup(backup_service)) @@ -1282,7 +1324,7 @@ where ); } - fn init_storage_stats_task(&self, engines: Engines) { + fn init_storage_stats_task(&self, engines: Engines) { let config_disk_capacity: u64 = self.core.config.raft_store.capacity.0; let data_dir = self.core.config.storage.data_dir.clone(); let store_path = self.core.store_path.clone(); @@ -1509,11 +1551,16 @@ where } } -impl TikvServer { +impl TikvServer +where + EK: KvEngine + KvEngineBuilder, + CER: ConfiguredRaftEngine, + F: KvFormat, +{ fn init_raw_engines( &mut self, flow_listener: engine_rocks::FlowListener, - ) -> (Engines, Arc) { + ) -> (Engines, Arc) { let block_cache = self.core.config.storage.block_cache.build_shared_cache(); let env = self .core @@ -1547,23 +1594,24 @@ impl TikvServer { .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); let factory = Box::new(builder.build()); - let kv_engine = factory + let disk_engine = factory .create_shared_db(&self.core.store_path) .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); + let kv_engine: EK = KvEngineBuilder::build(disk_engine.clone()); self.kv_statistics = Some(factory.rocks_statistics()); - let engines = Engines::new(kv_engine.clone(), raft_engine); + let engines = Engines::new(kv_engine, raft_engine); let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, Box::new(DbConfigManger::new( cfg_controller.get_current().rocksdb, - kv_engine.clone(), + disk_engine.clone(), DbType::Kv, )), ); let reg = TabletRegistry::new( - Box::new(SingletonFactory::new(kv_engine)), + Box::new(SingletonFactory::new(disk_engine)), &self.core.store_path, ) .unwrap(); diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 7ece321d9dd8..c6a14c1e0d37 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -3,8 +3,8 @@ use std::{cmp, error::Error as StdError, i32, result, sync::Arc, thread, time::Duration}; use encryption_export::data_key_manager_from_config; -use engine_rocks::{util::new_engine_opt, RocksEngine}; -use engine_traits::{Engines, Error as EngineError, Peekable, RaftEngine, SyncMutable}; +use engine_rocks::util::new_engine_opt; +use engine_traits::{Engines, Error as EngineError, KvEngine, RaftEngine}; use kvproto::{metapb, raft_serverpb::StoreIdent}; use pd_client::{Error as PdError, PdClient}; use raft_log_engine::RaftLogEngine; @@ -251,21 +251,21 @@ pub trait LocalEngineService { } // init engine and read local engine info -pub struct LocalEngines { - engines: Engines, +pub struct LocalEngines { + engines: Engines, } -impl LocalEngines { - pub fn new(engines: Engines) -> LocalEngines { +impl LocalEngines { + pub fn new(engines: Engines) -> LocalEngines { LocalEngines { engines } } - pub fn get_engine(&self) -> &Engines { + pub fn get_engine(&self) -> &Engines { &self.engines } } -impl LocalEngineService for LocalEngines { +impl LocalEngineService for LocalEngines { fn set_cluster_id(&self, cluster_id: u64) { let res = self .get_engine() diff --git a/components/snap_recovery/src/region_meta_collector.rs b/components/snap_recovery/src/region_meta_collector.rs index e3542d6691b2..3a88931fae42 100644 --- a/components/snap_recovery/src/region_meta_collector.rs +++ b/components/snap_recovery/src/region_meta_collector.rs @@ -2,8 +2,7 @@ use std::{cell::RefCell, error::Error as StdError, result, thread::JoinHandle}; -use engine_rocks::RocksEngine; -use engine_traits::{Engines, Iterable, Peekable, RaftEngine, CF_RAFT}; +use engine_traits::{Engines, KvEngine, RaftEngine, CF_RAFT}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, @@ -30,9 +29,13 @@ pub enum Error { } /// `RegionMetaCollector` is the collector that collector all region meta -pub struct RegionMetaCollector { +pub struct RegionMetaCollector +where + EK: KvEngine, + ER: RaftEngine, +{ /// The engine we are working on - engines: Engines, + engines: Engines, /// region meta report to br tx: UnboundedSender, /// Current working workers @@ -40,8 +43,12 @@ pub struct RegionMetaCollector { } #[allow(dead_code)] -impl RegionMetaCollector { - pub fn new(engines: Engines, tx: UnboundedSender) -> Self { +impl RegionMetaCollector +where + EK: KvEngine, + ER: RaftEngine, +{ + pub fn new(engines: Engines, tx: UnboundedSender) -> Self { RegionMetaCollector { engines, tx, @@ -74,14 +81,22 @@ impl RegionMetaCollector { } } -struct CollectWorker { +struct CollectWorker +where + EK: KvEngine, + ER: RaftEngine, +{ /// The engine we are working on - engines: Engines, + engines: Engines, tx: UnboundedSender, } -impl CollectWorker { - pub fn new(engines: Engines, tx: UnboundedSender) -> Self { +impl CollectWorker +where + EK: KvEngine, + ER: RaftEngine, +{ + pub fn new(engines: Engines, tx: UnboundedSender) -> Self { CollectWorker { engines, tx } } diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index daf6e7ed30ff..6bf706e158f9 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -19,7 +19,7 @@ use engine_rocks::{ util::get_cf_handle, RocksEngine, }; -use engine_traits::{CfNamesExt, CfOptionsExt, Engines, Peekable, RaftEngine}; +use engine_traits::{CfNamesExt, CfOptionsExt, Engines, KvEngine, RaftEngine}; use futures::{ channel::mpsc, executor::{ThreadPool, ThreadPoolBuilder}, @@ -67,11 +67,16 @@ pub enum Error { #[error("{0:?}")] Other(#[from] Box), } + /// Service handles the recovery messages from backup restore. #[derive(Clone)] -pub struct RecoveryService { - engines: Engines, - router: RaftRouter, +pub struct RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ + engines: Engines, + router: RaftRouter, threads: ThreadPool, /// The handle to last call of recover region RPC. @@ -113,13 +118,14 @@ impl RecoverRegionState { } } -impl RecoveryService { +impl RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a /// `thread pool`. - pub fn new( - engines: Engines, - router: RaftRouter, - ) -> RecoveryService { + pub fn new(engines: Engines, router: RaftRouter) -> RecoveryService { let props = tikv_util::thread_group::current_properties(); let threads = ThreadPoolBuilder::new() .pool_size(4) @@ -136,7 +142,7 @@ impl RecoveryService { // config rocksdb l0 to optimize the restore // also for massive data applied during the restore, it easy to reach the write // stop - let db = engines.kv.clone(); + let db: &RocksEngine = engines.kv.get_disk_engine(); for cf_name in db.cf_names() { Self::set_db_options(cf_name, db.clone()).expect("set db option failure"); } @@ -218,7 +224,7 @@ impl RecoveryService { // a new wait apply syncer share with all regions, // when all region reached the target index, share reference decreased to 0, // trigger closure to send finish info back. - pub fn wait_apply_last(router: RaftRouter, sender: SyncSender) { + pub fn wait_apply_last(router: RaftRouter, sender: SyncSender) { let wait_apply = SnapshotRecoveryWaitApplySyncer::new(0, sender); router.broadcast_normal(|| { PeerMsg::SignificantMsg(SignificantMsg::SnapshotRecoveryWaitApply( @@ -261,7 +267,11 @@ fn compact(engine: RocksEngine) -> Result<()> { Ok(()) } -impl RecoverData for RecoveryService { +impl RecoverData for RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ // 1. br start to ready region meta fn read_region_meta( &mut self, @@ -444,10 +454,14 @@ impl RecoverData for RecoveryService { // implement a resolve/delete data funciton let resolved_ts = req.get_resolved_ts(); let (tx, rx) = mpsc::unbounded(); - let resolver = DataResolverManager::new(self.engines.kv.clone(), tx, resolved_ts.into()); + let resolver = DataResolverManager::new( + self.engines.kv.get_disk_engine().clone(), + tx, + resolved_ts.into(), + ); info!("start to resolve kv data"); resolver.start(); - let db = self.engines.kv.clone(); + let db = self.engines.kv.get_disk_engine().clone(); let store_id = self.get_store_id(); let send_task = async move { let id = store_id?; diff --git a/src/config/mod.rs b/src/config/mod.rs index aac91aaaedf0..c0c2a679b5ab 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3400,6 +3400,9 @@ pub struct TikvConfig { #[online_config(skip)] pub memory_usage_high_water: f64, + // Memory quota used for in-memory engine. 0 means not enable it. + pub region_cache_memory_limit: ReadableSize, + #[online_config(submodule)] pub log: LogConfig, @@ -3499,6 +3502,7 @@ impl Default for TikvConfig { abort_on_panic: false, memory_usage_limit: None, memory_usage_high_water: 0.9, + region_cache_memory_limit: ReadableSize::mb(0), log: LogConfig::default(), memory: MemoryConfig::default(), quota: QuotaConfig::default(), From ae8902d026479425c84e4e0f55fa96936609f297 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 13 Dec 2023 18:25:49 +0800 Subject: [PATCH 193/203] cloud: update gcs sdk to support fips 140 (#16177) close tikv/tikv#16097 update cloud sdk Signed-off-by: Leavrth Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 32 ++------------------------------ Cargo.toml | 3 +++ 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcbb57979ee3..44cc3d1d1c77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4629,21 +4629,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "ring" -version = "0.16.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b72b84d47e8ec5a4f2872e8262b8f8256c5be1c938a7d6d3a867a3ba8f722f74" -dependencies = [ - "cc", - "libc 0.2.146", - "once_cell", - "spin", - "untrusted", - "web-sys", - "winapi 0.3.9", -] - [[package]] name = "rocksdb" version = "0.3.0" @@ -5378,12 +5363,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "sst_importer" version = "0.1.0" @@ -5622,15 +5601,14 @@ dependencies = [ [[package]] name = "tame-oauth" version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9435c9348e480fad0f2215d5602e2dfad03df8a6398c4e7ceaeaa42758f26a8a" +source = "git+https://github.com/tikv/tame-oauth?branch=fips#176e3c69e9b5cd04b4248824ae6ee38ef57385be" dependencies = [ "base64 0.13.0", "chrono", "http", "lock_api", + "openssl", "parking_lot 0.11.1", - "ring", "serde", "serde_json", "twox-hash", @@ -6928,12 +6906,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "url" version = "2.2.2" diff --git a/Cargo.toml b/Cargo.toml index 3e5efdd40c36..7bf16f3e0927 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -199,6 +199,9 @@ rusoto_mock = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr rusoto_s3 = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } +# NOTICE: use openssl for signature to support fips 140 +tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips" } + snappy-sys = { git = "https://github.com/busyjay/rust-snappy.git", branch = "static-link" } # remove this when https://github.com/danburkert/fs2-rs/pull/42 is merged. From f9727af132109754e63fbb4910b73563d0b1da45 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:40:50 +0800 Subject: [PATCH 194/203] In-memory Engine: implement read flow part 1 (#16163) ref tikv/tikv#16141 implement in-memory engine read flow -- part 1 Signed-off-by: SpadeA-Tang --- Cargo.lock | 2 + components/backup-stream/src/utils.rs | 2 +- components/cdc/src/observer.rs | 3 +- components/cdc/src/old_value.rs | 6 +- components/engine_panic/src/engine.rs | 4 +- components/engine_rocks/src/engine.rs | 8 +- components/engine_traits/src/engine.rs | 10 +- components/engine_traits/src/errors.rs | 3 + .../engine_traits_tests/src/iterator.rs | 40 +- .../src/read_consistency.rs | 6 +- .../engine_traits_tests/src/snapshot_basic.rs | 8 +- components/error_code/src/engine.rs | 3 +- components/hybrid_engine/src/engine.rs | 6 +- .../src/operation/query/capture.rs | 2 +- .../raftstore-v2/src/operation/query/local.rs | 6 +- .../src/store/async_io/write_tests.rs | 4 +- components/raftstore/src/store/fsm/apply.rs | 6 +- components/raftstore/src/store/peer.rs | 2 +- .../raftstore/src/store/peer_storage.rs | 4 +- .../raftstore/src/store/region_snapshot.rs | 2 +- components/raftstore/src/store/snap.rs | 16 +- components/raftstore/src/store/snap/io.rs | 4 +- .../src/store/worker/consistency_check.rs | 2 +- components/raftstore/src/store/worker/read.rs | 4 +- .../raftstore/src/store/worker/region.rs | 4 +- .../region_cache_memory_engine/Cargo.toml | 2 + .../region_cache_memory_engine/src/engine.rs | 521 ++++++++++++++++-- .../region_cache_memory_engine/src/lib.rs | 2 + components/tikv_kv/src/rocksdb_engine.rs | 2 +- src/server/node.rs | 2 +- tests/benches/misc/raftkv/mod.rs | 4 +- tests/integrations/raftstore/test_snap.rs | 2 +- tests/integrations/storage/test_titan.rs | 4 +- 33 files changed, 597 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44cc3d1d1c77..335c4a5301f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4444,9 +4444,11 @@ checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" name = "region_cache_memory_engine" version = "0.0.1" dependencies = [ + "bytes", "collections", "engine_traits", "skiplist-rs", + "tikv_util", ] [[package]] diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 974b1762cf26..ed8b75795871 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -996,7 +996,7 @@ mod test { let (items, size) = super::with_record_read_throughput(|| { let mut items = vec![]; - let snap = engine.snapshot(); + let snap = engine.snapshot(None); snap.scan(CF_DEFAULT, b"", b"", false, |k, v| { items.push((k.to_owned(), v.to_owned())); Ok(true) diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index cfcedfeb59da..965a31ac7ff3 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -121,7 +121,8 @@ impl CmdObserver for CdcObserver { // Create a snapshot here for preventing the old value was GC-ed. // TODO: only need it after enabling old value, may add a flag to indicate // whether to get it. - let snapshot = RegionSnapshot::from_snapshot(Arc::new(engine.snapshot()), Arc::new(region)); + let snapshot = + RegionSnapshot::from_snapshot(Arc::new(engine.snapshot(None)), Arc::new(region)); let get_old_value = move |key, query_ts, old_value_cache: &mut OldValueCache, diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index e343ccc226fa..02f1bd005077 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -308,7 +308,7 @@ mod tests { value: Option, ) -> Statistics { let key = key.clone().append_ts(ts.into()); - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let mut cursor = new_write_cursor_on_key(&snapshot, &key); let load_default = Either::Left(&snapshot); let mut stats = Statistics::default(); @@ -527,7 +527,7 @@ mod tests { must_commit(&mut engine, &key, 200, 201); } - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let mut cursor = new_old_value_cursor(&snapshot, CF_WRITE); let mut default_cursor = new_old_value_cursor(&snapshot, CF_DEFAULT); let mut load_default = |use_default_cursor: bool| { @@ -598,7 +598,7 @@ mod tests { } let key = format!("zkey-{:0>3}", 0).into_bytes(); - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let perf_instant = ReadPerfInstant::new(); let value = get_old_value( &snapshot, diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index b5ce0d1516e9..3f75d64f78d7 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SyncMutable, + IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, WriteOptions, }; @@ -13,7 +13,7 @@ pub struct PanicEngine; impl KvEngine for PanicEngine { type Snapshot = PanicSnapshot; - fn snapshot(&self) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { panic!() } fn sync(&self) -> Result<()> { diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 293b74e3bca9..e0eed00ad53d 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -2,7 +2,9 @@ use std::{any::Any, sync::Arc}; -use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable}; +use engine_traits::{ + IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, +}; use rocksdb::{DBIterator, Writable, DB}; use crate::{ @@ -182,7 +184,7 @@ impl RocksEngine { impl KvEngine for RocksEngine { type Snapshot = RocksSnapshot; - fn snapshot(&self) -> RocksSnapshot { + fn snapshot(&self, _: Option) -> RocksSnapshot { RocksSnapshot::new(self.db.clone()) } @@ -292,7 +294,7 @@ mod tests { engine.put_msg(key, &r).unwrap(); engine.put_msg_cf(cf, key, &r).unwrap(); - let snap = engine.snapshot(); + let snap = engine.snapshot(None); let mut r1: Region = engine.get_msg(key).unwrap().unwrap(); assert_eq!(r, r1); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index cc90f2ce075a..44539f194195 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -39,7 +39,10 @@ pub trait KvEngine: type Snapshot: Snapshot; /// Create a snapshot - fn snapshot(&self) -> Self::Snapshot; + /// + /// SnapCtx will only be used by some type of trait implementors (ex: + /// HybridEngine) + fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; /// Syncs any writes to disk fn sync(&self) -> Result<()>; @@ -78,3 +81,8 @@ pub trait KvEngine: #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize; } + +pub struct SnapCtx { + pub region_id: u64, + pub read_ts: u64, +} diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 6ef46ff7a70e..6df2ef5a992d 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -149,6 +149,8 @@ pub enum Error { EntriesUnavailable, #[error("The entries of region is compacted")] EntriesCompacted, + #[error("Iterator of RegionCacheSnapshot is only supported with boundary set")] + BoundaryNotSet, } pub type Result = result::Result; @@ -165,6 +167,7 @@ impl ErrorCodeExt for Error { Error::Other(_) => error_code::UNKNOWN, Error::EntriesUnavailable => error_code::engine::DATALOSS, Error::EntriesCompacted => error_code::engine::DATACOMPACTED, + Error::BoundaryNotSet => error_code::engine::BOUNDARY_NOT_SET, } } } diff --git a/components/engine_traits_tests/src/iterator.rs b/components/engine_traits_tests/src/iterator.rs index 714ca4cb0b49..fee6cda6f021 100644 --- a/components/engine_traits_tests/src/iterator.rs +++ b/components/engine_traits_tests/src/iterator.rs @@ -41,7 +41,9 @@ fn iter_empty_engine() { #[test] fn iter_empty_snapshot() { let db = default_engine(); - iter_empty(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_empty(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_forward(e: &E, i: IF) @@ -99,7 +101,9 @@ fn iter_forward_engine() { #[test] fn iter_forward_snapshot() { let db = default_engine(); - iter_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_reverse(e: &E, i: IF) @@ -157,7 +161,9 @@ fn iter_reverse_engine() { #[test] fn iter_reverse_snapshot() { let db = default_engine(); - iter_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_to_key_then_forward(e: &E, i: IF) @@ -198,7 +204,9 @@ fn seek_to_key_then_forward_engine() { #[test] fn seek_to_key_then_forward_snapshot() { let db = default_engine(); - seek_to_key_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_to_key_then_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_to_key_then_reverse(e: &E, i: IF) @@ -239,7 +247,9 @@ fn seek_to_key_then_reverse_engine() { #[test] fn seek_to_key_then_reverse_snapshot() { let db = default_engine(); - seek_to_key_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_to_key_then_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_forward_then_reverse(e: &E, i: IF) @@ -300,7 +310,9 @@ fn iter_forward_then_reverse_engine() { #[test] fn iter_forward_then_reverse_snapshot() { let db = default_engine(); - iter_forward_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_forward_then_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_reverse_then_forward(e: &E, i: IF) @@ -361,7 +373,9 @@ fn iter_reverse_then_forward_engine() { #[test] fn iter_reverse_then_forward_snapshot() { let db = default_engine(); - iter_reverse_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_reverse_then_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } // When seek finds an exact key then seek_for_prev behaves just like seek @@ -405,7 +419,9 @@ fn seek_for_prev_engine() { #[test] fn seek_for_prev_snapshot() { let db = default_engine(); - seek_for_prev(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_for_prev(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } // When Seek::Key doesn't find an exact match, @@ -440,7 +456,9 @@ fn seek_key_miss_engine() { #[test] fn seek_key_miss_snapshot() { let db = default_engine(); - seek_key_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_key_miss(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_key_prev_miss(e: &E, i: IF) @@ -472,5 +490,7 @@ fn seek_key_prev_miss_engine() { #[test] fn seek_key_prev_miss_snapshot() { let db = default_engine(); - seek_key_prev_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_key_prev_miss(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } diff --git a/components/engine_traits_tests/src/read_consistency.rs b/components/engine_traits_tests/src/read_consistency.rs index 8c7ab50657fd..35d0262fbcb3 100644 --- a/components/engine_traits_tests/src/read_consistency.rs +++ b/components/engine_traits_tests/src/read_consistency.rs @@ -12,7 +12,7 @@ fn snapshot_with_writes() { db.engine.put(b"a", b"aa").unwrap(); - let snapshot = db.engine.snapshot(); + let snapshot = db.engine.snapshot(None); assert_eq!(snapshot.get_value(b"a").unwrap().unwrap(), b"aa"); @@ -77,5 +77,7 @@ fn iterator_with_writes_engine() { #[test] fn iterator_with_writes_snapshot() { let db = default_engine(); - iterator_with_writes(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iterator_with_writes(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } diff --git a/components/engine_traits_tests/src/snapshot_basic.rs b/components/engine_traits_tests/src/snapshot_basic.rs index c0f934808302..83248abfb6eb 100644 --- a/components/engine_traits_tests/src/snapshot_basic.rs +++ b/components/engine_traits_tests/src/snapshot_basic.rs @@ -10,7 +10,7 @@ fn snapshot_get_value() { db.engine.put(b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); let value = snap.get_value(b"a").unwrap(); let value = value.unwrap(); @@ -26,7 +26,7 @@ fn snapshot_get_value_after_put() { db.engine.put(b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); db.engine.put(b"a", b"aaa").unwrap(); @@ -41,7 +41,7 @@ fn snapshot_get_value_cf() { db.engine.put_cf(CF_WRITE, b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); let value = snap.get_value_cf(CF_WRITE, b"a").unwrap(); let value = value.unwrap(); @@ -57,7 +57,7 @@ fn snapshot_get_value_cf_after_put() { db.engine.put_cf(CF_WRITE, b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); db.engine.put_cf(CF_WRITE, b"a", b"aaa").unwrap(); diff --git a/components/error_code/src/engine.rs b/components/error_code/src/engine.rs index 4bb66f09753d..4ae712ffa581 100644 --- a/components/error_code/src/engine.rs +++ b/components/error_code/src/engine.rs @@ -10,5 +10,6 @@ define_error_codes!( CF_NAME => ("CfName", "", ""), CODEC => ("Codec", "", ""), DATALOSS => ("DataLoss", "", ""), - DATACOMPACTED => ("DataCompacted", "", "") + DATACOMPACTED => ("DataCompacted", "", ""), + BOUNDARY_NOT_SET => ("BoundaryNotSet", "", "") ); diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index 072f1d028fff..6ccb223baf12 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,6 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SyncMutable}; +use engine_traits::{ + KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapCtx, SyncMutable, +}; use crate::snapshot::HybridEngineSnapshot; @@ -63,7 +65,7 @@ where { type Snapshot = HybridEngineSnapshot; - fn snapshot(&self) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { unimplemented!() } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 5dd43f14e191..bc7e93a394ba 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -116,7 +116,7 @@ impl Apply { self.flush(); let (applied_index, _) = self.apply_progress(); let snap = RegionSnapshot::from_snapshot( - Arc::new(self.tablet().snapshot()), + Arc::new(self.tablet().snapshot(None)), Arc::new(self.region().clone()), ); snap.set_apply_index(applied_index); diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index ea802650f3d8..1829628ae48b 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -209,7 +209,7 @@ where ReadRequestPolicy::ReadLocal => { let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); @@ -240,7 +240,7 @@ where let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); @@ -264,7 +264,7 @@ where let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 24abf24c4fd7..97e865a6bfea 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -330,7 +330,7 @@ fn test_worker() { t.worker.write_to_db(true); - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); assert_eq!(snapshot.get_value(b"kv_k1").unwrap().unwrap(), b"kv_v1"); assert_eq!(snapshot.get_value(b"kv_k2").unwrap().unwrap(), b"kv_v2"); assert_eq!(snapshot.get_value(b"kv_k3").unwrap().unwrap(), b"kv_v3"); @@ -536,7 +536,7 @@ fn test_basic_flow() { must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); assert!(snapshot.get_value(b"kv_k1").unwrap().is_none()); assert_eq!(snapshot.get_value(b"kv_k2").unwrap().unwrap(), b"kv_v2"); assert_eq!(snapshot.get_value(b"kv_k3").unwrap().unwrap(), b"kv_v3"); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index f70e0a31181e..221e5b1dcea9 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3244,7 +3244,7 @@ where // open files in rocksdb. // TODO: figure out another way to do consistency check without snapshot // or short life snapshot. - snap: ctx.engine.snapshot(), + snap: ctx.engine.snapshot(None), }) }, )) @@ -4198,7 +4198,7 @@ where } if let Err(e) = snap_task.generate_and_schedule_snapshot::( - apply_ctx.engine.snapshot(), + apply_ctx.engine.snapshot(None), self.delegate.applied_term, self.delegate.apply_state.clone(), &apply_ctx.region_scheduler, @@ -4270,7 +4270,7 @@ where ReadResponse { response: Default::default(), snapshot: Some(RegionSnapshot::from_snapshot( - Arc::new(apply_ctx.engine.snapshot()), + Arc::new(apply_ctx.engine.snapshot(None)), Arc::new(self.delegate.region.clone()), )), txn_extra_op: TxnExtraOp::Noop, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 2d304490bb7a..382b9e53b4b1 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5736,7 +5736,7 @@ where } fn get_snapshot(&mut self, _: &Option>) -> Arc { - Arc::new(self.engines.kv.snapshot()) + Arc::new(self.engines.kv.snapshot(None)) } } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a888929ca985..44ae3543e95a 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -372,7 +372,7 @@ where #[inline] pub fn raw_snapshot(&self) -> EK::Snapshot { - self.engines.kv.snapshot() + self.engines.kv.snapshot(None) } #[inline] @@ -1607,7 +1607,7 @@ pub mod tests { .unwrap() .unwrap(); gen_task.generate_and_schedule_snapshot::( - engines.kv.clone().snapshot(), + engines.kv.clone().snapshot(None), entry.get_term(), apply_state, sched, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index bc22dfbf5866..1c2c6251eba8 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -59,7 +59,7 @@ where where EK: KvEngine, { - RegionSnapshot::from_snapshot(Arc::new(db.snapshot()), Arc::new(region)) + RegionSnapshot::from_snapshot(Arc::new(db.snapshot(None)), Arc::new(region)) } pub fn from_snapshot(snap: Arc, region: Arc) -> RegionSnapshot { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6976f4614df7..e7e7c6ccb10d 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2639,7 +2639,7 @@ pub mod tests { .tempdir() .unwrap(); let db = get_db(src_db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let src_dir = Builder::new() .prefix("test-snap-file-db-src") @@ -2747,7 +2747,7 @@ pub mod tests { .tempdir() .unwrap(); let db = get_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-validation") @@ -2900,7 +2900,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-corruption") @@ -2975,7 +2975,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db_with_100keys(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-corruption-meta") @@ -3056,7 +3056,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key1 = SnapKey::new(1, 1, 1); let mgr_core = create_manager_core(&path, u64::MAX); let mut s1 = Snapshot::new_for_building(&path, &key1, &mgr_core).unwrap(); @@ -3127,7 +3127,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(src_db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key = SnapKey::new(1, 1, 1); let region = gen_test_region(1, 1, 1); @@ -3209,7 +3209,7 @@ pub mod tests { .max_total_size(max_total_size) .build::<_>(snapfiles_path.path().to_str().unwrap()); snap_mgr.init().unwrap(); - let snapshot = engine.kv.snapshot(); + let snapshot = engine.kv.snapshot(None); // Add an oldest snapshot for receiving. let recv_key = SnapKey::new(100, 100, 100); @@ -3334,7 +3334,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(kv_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key = SnapKey::new(1, 1, 1); let region = gen_test_region(1, 1, 1); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index c897aaa25979..48919474135d 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -378,7 +378,7 @@ mod tests { .unwrap(); let db1: KvTestEngine = open_test_empty_db(dir1.path(), db_opt, None).unwrap(); - let snap = db.snapshot(); + let snap = db.snapshot(None); for cf in SNAPSHOT_CFS { let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); let mut cf_file = CfFile { @@ -462,7 +462,7 @@ mod tests { let stats = build_sst_cf_file_list::( &mut cf_file, &db, - &db.snapshot(), + &db.snapshot(None), &keys::data_key(b"a"), &keys::data_key(b"z"), *max_file_size, diff --git a/components/raftstore/src/store/worker/consistency_check.rs b/components/raftstore/src/store/worker/consistency_check.rs index fef2bae332c4..d034cd8604fa 100644 --- a/components/raftstore/src/store/worker/consistency_check.rs +++ b/components/raftstore/src/store/worker/consistency_check.rs @@ -162,7 +162,7 @@ mod tests { index: 10, context: vec![ConsistencyCheckMethod::Raw as u8], region: region.clone(), - snap: db.snapshot(), + snap: db.snapshot(None), }); let mut checksum_bytes = vec![]; checksum_bytes.write_u32::(sum).unwrap(); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 2694481494f1..dbec805fe971 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -242,7 +242,7 @@ where } self.snap_cache.cached_read_id = self.read_id.clone(); - self.snap_cache.snapshot = Some(Arc::new(engine.snapshot())); + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(None))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -250,7 +250,7 @@ where } else { // read_id being None means the snapshot acquired will only be used in this // request - self.snapshot = Some(Arc::new(engine.snapshot())); + self.snapshot = Some(Arc::new(engine.snapshot(None))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 068904b2a677..dd2c8f90de19 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -1091,7 +1091,7 @@ pub(crate) mod tests { ranges.push(key); } engine.kv.put(b"k1", b"v1").unwrap(); - let snap = engine.kv.snapshot(); + let snap = engine.kv.snapshot(None); engine.kv.put(b"k2", b"v2").unwrap(); sched @@ -1204,7 +1204,7 @@ pub(crate) mod tests { sched .schedule(Task::Gen { region_id: id, - kv_snap: engine.kv.snapshot(), + kv_snap: engine.kv.snapshot(None), last_applied_term: entry.get_term(), last_applied_state: apply_state, canceled: Arc::new(AtomicBool::new(false)), diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 89ae317aa94c..c529698fa143 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -11,3 +11,5 @@ testexport = [] engine_traits = { workspace = true } collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } +bytes = "1.0" +tikv_util = { workspace = true } \ No newline at end of file diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index ad16e7e8407f..93e4c1a6d74d 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -1,5 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +use core::slice::SlicePattern; use std::{ collections::BTreeMap, fmt::{self, Debug}, @@ -7,12 +8,24 @@ use std::{ sync::{Arc, Mutex}, }; +use bytes::Bytes; use collections::HashMap; use engine_traits::{ - CfNamesExt, DbVector, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, + CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, + CF_DEFAULT, CF_LOCK, CF_WRITE, }; use skiplist_rs::{ByteWiseComparator, IterRef, Skiplist}; +use tikv_util::config::ReadableSize; + +fn cf_to_id(cf: &str) -> usize { + match cf { + CF_DEFAULT => 0, + CF_LOCK => 1, + CF_WRITE => 2, + _ => panic!("unrecognized cf {}", cf), + } +} /// RegionMemoryEngine stores data for a specific cached region /// @@ -23,19 +36,69 @@ pub struct RegionMemoryEngine { data: [Arc>; 3], } +impl RegionMemoryEngine { + pub fn with_capacity(arena_size: usize) -> Self { + RegionMemoryEngine { + data: [ + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + ], + } + } +} + +impl Default for RegionMemoryEngine { + fn default() -> Self { + RegionMemoryEngine::with_capacity(ReadableSize::mb(1).0 as usize) + } +} + impl Debug for RegionMemoryEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - unimplemented!() + write!(f, "Region Memory Engine") } } -type SnapshotList = BTreeMap; +// read_ts -> ref_count +#[derive(Default)] +struct SnapshotList(BTreeMap); + +impl SnapshotList { + fn new_snapshot(&mut self, read_ts: u64) { + // snapshot with this ts may be granted before + let count = self.0.get(&read_ts).unwrap_or(&0) + 1; + self.0.insert(read_ts, count); + } + + fn remove_snapshot(&mut self, read_ts: u64) { + let count = self.0.get_mut(&read_ts).unwrap(); + assert!(*count >= 1); + if *count == 1 { + self.0.remove(&read_ts).unwrap(); + } else { + *count -= 1; + } + } +} #[derive(Default)] pub struct RegionMemoryMeta { // It records the snapshots that have been granted previsously with specific snapshot_ts. We // should guarantee that the data visible to any one of the snapshot in it will not be removed. - snapshots: SnapshotList, + snapshot_list: SnapshotList, // It indicates whether the region is readable. False means integrity of the data in this // cached region is not satisfied due to being evicted for instance. can_read: bool, @@ -47,7 +110,7 @@ pub struct RegionMemoryMeta { #[derive(Default)] pub struct RegionCacheMemoryEngineCore { engine: HashMap, - region_metats: HashMap, + region_metas: HashMap, } /// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in @@ -73,22 +136,29 @@ pub struct RegionCacheMemoryEngine { } impl Debug for RegionCacheMemoryEngine { - fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { - unimplemented!() + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Region Cache Memory Engine") } } impl RegionCacheMemoryEngine { - pub fn new() -> Self { - RegionCacheMemoryEngine::default() + pub fn new_region(&self, region_id: u64) { + let mut core = self.core.lock().unwrap(); + + assert!(core.engine.get(®ion_id).is_none()); + assert!(core.region_metas.get(®ion_id).is_none()); + core.engine.insert(region_id, RegionMemoryEngine::default()); + core.region_metas + .insert(region_id, RegionMemoryMeta::default()); } } impl RegionCacheEngine for RegionCacheMemoryEngine { type Snapshot = RegionCacheSnapshot; + // todo(SpadeA): add sequence number logic fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { - unimplemented!() + RegionCacheSnapshot::new(self.clone(), region_id, read_ts) } } @@ -110,12 +180,14 @@ impl WriteBatchExt for RegionCacheMemoryEngine { } pub struct RegionCacheIterator { + cf: String, valid: bool, prefix_same_as_start: bool, prefix: Option>, iter: IterRef, ByteWiseComparator>, - lower_bound: Option>, - upper_bound: Option>, + // The lower bound is inclusive while the upper bound is exclusive if set + lower_bound: Vec, + upper_bound: Vec, } impl Iterable for RegionCacheMemoryEngine { @@ -132,39 +204,84 @@ impl Iterable for RegionCacheMemoryEngine { impl Iterator for RegionCacheIterator { fn key(&self) -> &[u8] { - unimplemented!() + assert!(self.valid); + self.iter.key().as_slice() } fn value(&self) -> &[u8] { - unimplemented!() + assert!(self.valid); + self.iter.value().as_slice() } fn next(&mut self) -> Result { - unimplemented!() + assert!(self.valid); + self.iter.next(); + self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + Ok(self.valid) } fn prev(&mut self) -> Result { - unimplemented!() + assert!(self.valid); + self.iter.prev(); + self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + Ok(self.valid) } fn seek(&mut self, key: &[u8]) -> Result { - unimplemented!() + let seek_key = if key < self.lower_bound.as_slice() { + self.lower_bound.as_slice() + } else { + key + }; + self.iter.seek(seek_key); + self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + Ok(self.valid) } fn seek_for_prev(&mut self, key: &[u8]) -> Result { - unimplemented!() + let end = if key > self.upper_bound.as_slice() { + self.upper_bound.as_slice() + } else { + key + }; + self.iter.seek_for_prev(end); + self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + Ok(self.valid) } fn seek_to_first(&mut self) -> Result { - unimplemented!() + let lower_bound = self.lower_bound.clone(); + self.seek(lower_bound.as_slice()) } fn seek_to_last(&mut self) -> Result { - unimplemented!() + let upper_bound = self.upper_bound.clone(); + self.seek_for_prev(upper_bound.as_slice()) } fn valid(&self) -> Result { - unimplemented!() + Ok(self.valid) } } @@ -240,7 +357,40 @@ impl Mutable for RegionCacheWriteBatch { pub struct RegionCacheSnapshot { region_id: u64, snapshot_ts: u64, - engine: RegionMemoryEngine, + region_memory_engine: RegionMemoryEngine, + engine: RegionCacheMemoryEngine, +} + +impl RegionCacheSnapshot { + pub fn new(engine: RegionCacheMemoryEngine, region_id: u64, read_ts: u64) -> Option { + let mut core = engine.core.lock().unwrap(); + let region_meta = core.region_metas.get_mut(®ion_id)?; + if !region_meta.can_read { + return None; + } + + if read_ts <= region_meta.safe_ts { + // todo(SpadeA): add metrics for it + return None; + } + + region_meta.snapshot_list.new_snapshot(read_ts); + + Some(RegionCacheSnapshot { + region_id, + snapshot_ts: read_ts, + region_memory_engine: core.engine.get(®ion_id).unwrap().clone(), + engine: engine.clone(), + }) + } +} + +impl Drop for RegionCacheSnapshot { + fn drop(&mut self) { + let mut core = self.engine.core.lock().unwrap(); + let meta = core.region_metas.get_mut(&self.region_id).unwrap(); + meta.snapshot_list.remove_snapshot(self.snapshot_ts); + } } impl Snapshot for RegionCacheSnapshot {} @@ -248,12 +398,23 @@ impl Snapshot for RegionCacheSnapshot {} impl Iterable for RegionCacheSnapshot { type Iterator = RegionCacheIterator; - fn iterator(&self, cf: &str) -> Result { - unimplemented!() - } - fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { - unimplemented!() + let iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); + let prefix_same_as_start = opts.prefix_same_as_start(); + let (lower_bound, upper_bound) = opts.build_bounds(); + // only support with lower/upper bound set + if lower_bound.is_none() || upper_bound.is_none() { + return Err(Error::BoundaryNotSet); + } + Ok(RegionCacheIterator { + cf: String::from(cf), + valid: false, + prefix_same_as_start, + prefix: None, + lower_bound: lower_bound.unwrap(), + upper_bound: upper_bound.unwrap(), + iter, + }) } } @@ -261,16 +422,19 @@ impl Peekable for RegionCacheSnapshot { type DbVector = RegionCacheDbVector; fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { - unimplemented!() + self.get_value_cf_opt(opts, CF_DEFAULT, key) } fn get_value_cf_opt( &self, - opts: &ReadOptions, + _: &ReadOptions, cf: &str, key: &[u8], ) -> Result> { - unimplemented!() + Ok(self.region_memory_engine.data[cf_to_id(cf)] + .get(key) + .cloned() + .map(|v| RegionCacheDbVector(v))) } } @@ -286,15 +450,14 @@ impl SnapshotMiscExt for RegionCacheSnapshot { } } -// todo: fill fields needed #[derive(Debug)] -pub struct RegionCacheDbVector; +pub struct RegionCacheDbVector(Bytes); impl Deref for RegionCacheDbVector { type Target = [u8]; fn deref(&self) -> &[u8] { - unimplemented!() + self.0.as_slice() } } @@ -302,6 +465,296 @@ impl DbVector for RegionCacheDbVector {} impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { fn eq(&self, rhs: &&[u8]) -> bool { - unimplemented!() + self.0.as_slice() == *rhs + } +} + +#[cfg(test)] +mod tests { + use core::ops::Range; + use std::{iter::StepBy, sync::Arc}; + + use bytes::Bytes; + use engine_traits::{ + IterOptions, Iterable, Iterator, Peekable, ReadOptions, RegionCacheEngine, + }; + use skiplist_rs::{ByteWiseComparator, Skiplist}; + + use super::{cf_to_id, RegionCacheIterator}; + use crate::RegionCacheMemoryEngine; + + #[test] + fn test_snapshot() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + + let verify_snapshot_count = |snapshot_ts, count| { + let core = engine.core.lock().unwrap(); + if count > 0 { + assert_eq!( + *core + .region_metas + .get(&1) + .unwrap() + .snapshot_list + .0 + .get(&snapshot_ts) + .unwrap(), + count + ); + } else { + assert!( + core.region_metas + .get(&1) + .unwrap() + .snapshot_list + .0 + .get(&snapshot_ts) + .is_none() + ) + } + }; + + assert!(engine.snapshot(1, 5).is_none()); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + } + let s1 = engine.snapshot(1, 5).unwrap(); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + } + assert!(engine.snapshot(1, 5).is_none()); + let s2 = engine.snapshot(1, 10).unwrap(); + + verify_snapshot_count(5, 1); + verify_snapshot_count(10, 1); + let s3 = engine.snapshot(1, 10).unwrap(); + verify_snapshot_count(10, 2); + + drop(s1); + verify_snapshot_count(5, 0); + drop(s2); + verify_snapshot_count(10, 1); + let s4 = engine.snapshot(1, 10).unwrap(); + verify_snapshot_count(10, 2); + drop(s4); + verify_snapshot_count(10, 1); + drop(s3); + verify_snapshot_count(10, 0); + } + + fn construct_key(i: i32) -> String { + format!("key-{:08}", i) + } + + fn construct_value(i: i32) -> String { + format!("value-{:08}", i) + } + + fn fill_data_in_skiplist(sl: Arc>, range: StepBy>) { + for i in range { + let key = construct_key(i); + let val = construct_value(i); + sl.put(Bytes::from(key), Bytes::from(val)); + } + } + + fn verify_key_value(k: &[u8], v: &[u8], i: i32) { + let key = construct_key(i); + let val = construct_value(i); + assert_eq!(k, key.as_bytes()); + assert_eq!(v, val.as_bytes()); + } + + fn verify_key_values( + iter: &mut RegionCacheIterator, + step: i32, + mut start_idx: i32, + end_idx: i32, + ) { + let forward = step > 0; + while iter.valid().unwrap() { + let k = iter.key(); + let val = iter.value(); + verify_key_value(k, val, start_idx); + if forward { + iter.next().unwrap(); + } else { + iter.prev().unwrap(); + } + start_idx += step; + } + + if forward { + assert!(start_idx - step < end_idx); + } else { + assert!(start_idx - step > end_idx); + } + } + + #[test] + fn test_get_value() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(1)); + } + + let snapshot = engine.snapshot(1, 10).unwrap(); + let opts = ReadOptions::default(); + for i in 1..100 { + let k = construct_key(i); + let v = snapshot + .get_value_cf_opt(&opts, "write", k.as_bytes()) + .unwrap() + .unwrap(); + verify_key_value(k.as_bytes(), &v, i); + } + + let k = construct_key(100); + assert!( + snapshot + .get_value_cf_opt(&opts, "write", k.as_bytes()) + .unwrap() + .is_none() + ); + } + + #[test] + fn test_iterator_forawrd() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + } + + let mut iter_opt = IterOptions::default(); + let snapshot = engine.snapshot(1, 10).unwrap(); + // boundaries are not set + assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); + + let lower_bound = construct_key(1); + let upper_bound = construct_key(100); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + + let mut iter = snapshot.iterator_opt("lock", iter_opt.clone()).unwrap(); + assert!(!iter.seek_to_first().unwrap()); + + let mut iter = snapshot.iterator_opt("default", iter_opt.clone()).unwrap(); + assert!(!iter.seek_to_first().unwrap()); + + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values(&mut iter, step, 1, i32::MAX); + + // seek key that is in the skiplist + let seek_key = construct_key(11); + iter.seek(seek_key.as_bytes()).unwrap(); + verify_key_values(&mut iter, step, 11, i32::MAX); + + // seek key that is not in the skiplist + let seek_key = construct_key(12); + iter.seek(seek_key.as_bytes()).unwrap(); + verify_key_values(&mut iter, step, 13, i32::MAX); + + // with bounds + let lower_bound = construct_key(20); + let upper_bound = construct_key(40); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + assert!(iter.seek_to_first().unwrap()); + verify_key_values(&mut iter, step, 21, 40); + + // seek a key that is below the lower bound is the same with seek_to_first + let seek_key = construct_key(11); + assert!(iter.seek(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 21, 40); + + // seek a key that is larger or equal to upper bound won't get any key + let seek_key = construct_key(40); + assert!(!iter.seek(seek_key.as_bytes()).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(22); + assert!(iter.seek(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 23, 40); + } + + #[test] + fn test_iterator_backward() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let mut step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + } + step = -step; + + let mut iter_opt = IterOptions::default(); + let lower_bound = construct_key(1); + let upper_bound = construct_key(100); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + + let snapshot = engine.snapshot(1, 10).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + verify_key_values(&mut iter, step, 99, i32::MIN); + + // seek key that is in the skiplist + let seek_key = construct_key(81); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 81, i32::MIN); + + // seek key that is in the skiplist + let seek_key = construct_key(80); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 79, i32::MIN); + + let lower_bound = construct_key(20); + let upper_bound = construct_key(40); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + assert!(iter.seek_to_last().unwrap()); + verify_key_values(&mut iter, step, 39, 20); + + // seek a key that is above the upper bound is the same with seek_to_last + let seek_key = construct_key(45); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 39, 20); + + // seek a key that is less than the lower bound won't get any key + let seek_key = construct_key(19); + assert!(!iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(38); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 37, 20); } } diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index d512847efb42..fe15f4f936b4 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -2,6 +2,8 @@ #![allow(dead_code)] #![allow(unused_variables)] +#![feature(let_chains)] +#![feature(slice_pattern)] mod engine; pub use engine::RegionCacheMemoryEngine; diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 21099974d2d4..551b933faeb0 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -64,7 +64,7 @@ impl Runnable for Runner { match t { Task::Write(modifies, cb) => cb(write_modifies(&self.0.kv, modifies)), Task::Snapshot(sender) => { - let _ = sender.send(Arc::new(self.0.kv.snapshot())); + let _ = sender.send(Arc::new(self.0.kv.snapshot(None))); } Task::Pause(dur) => std::thread::sleep(dur), } diff --git a/src/server/node.rs b/src/server/node.rs index fb2f28d9c1aa..bf19cb6c0056 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -291,7 +291,7 @@ where }; if should_check { // Check if there are only TiDB data in the engine - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); for cf in DATA_CFS { for (start, end) in TIDB_RANGES_COMPLEMENT { let mut unexpected_data_key = None; diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index d567edd5add9..eab0f38d749e 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -53,7 +53,7 @@ impl SyncBenchRouter { cmd_resp::bind_term(&mut response, 1); match cmd.callback { Callback::Read { cb, .. } => { - let snapshot = self.db.snapshot(); + let snapshot = self.db.snapshot(None); let region = Arc::new(self.region.to_owned()); cb(ReadResponse { response, @@ -142,7 +142,7 @@ fn new_engine() -> (TempDir, RocksEngine) { #[bench] fn bench_async_snapshots_noop(b: &mut test::Bencher) { let (_dir, db) = new_engine(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let resp = ReadResponse { response: RaftCmdResponse::default(), snapshot: Some(RegionSnapshot::from_snapshot( diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 0b71978f63bd..edef780ce31b 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -619,7 +619,7 @@ fn test_gen_during_heavy_recv() { let snap = do_snapshot( snap_mgr.clone(), &engine, - engine.snapshot(), + engine.snapshot(None), r2, snap_term, snap_apply_state, diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 752c6aaee1ac..fc84d56fd00d 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -371,7 +371,7 @@ fn test_delete_files_in_range_for_titan() { build_sst_cf_file_list::( &mut cf_file, &engines.kv, - &engines.kv.snapshot(), + &engines.kv.snapshot(None), b"", b"{", u64::MAX, @@ -388,7 +388,7 @@ fn test_delete_files_in_range_for_titan() { build_sst_cf_file_list::( &mut cf_file_write, &engines.kv, - &engines.kv.snapshot(), + &engines.kv.snapshot(None), b"", b"{", u64::MAX, From 4626f8d774b720a10b094cc39e0a78b3cd27dd2b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:30:22 +0800 Subject: [PATCH 195/203] In-memory Engine: integrate HybridEngine with integration test framework (#16186) ref tikv/tikv#16141 integrate HybridEngine with integration test framework Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 + components/backup-stream/tests/suite.rs | 3 +- components/cdc/tests/mod.rs | 11 +- components/resolved_ts/tests/mod.rs | 8 +- components/test_backup/Cargo.toml | 1 + components/test_backup/src/lib.rs | 3 +- components/test_raftstore/Cargo.toml | 2 + components/test_raftstore/src/cluster.rs | 64 ++++--- components/test_raftstore/src/lib.rs | 1 + components/test_raftstore/src/node.rs | 79 ++++---- components/test_raftstore/src/server.rs | 181 +++++++++++------- .../test_raftstore/src/transport_simulate.rs | 45 +++-- components/test_raftstore/src/util.rs | 139 +++++++++----- components/test_storage/Cargo.toml | 1 + components/test_storage/src/assert_storage.rs | 23 +-- components/test_storage/src/util.rs | 11 +- tests/benches/raftstore/mod.rs | 25 +-- tests/failpoints/cases/test_bootstrap.rs | 5 +- .../cases/test_cmd_epoch_checker.rs | 9 +- tests/failpoints/cases/test_conf_change.rs | 4 +- tests/failpoints/cases/test_merge.rs | 3 +- tests/failpoints/cases/test_rawkv.rs | 3 +- tests/failpoints/cases/test_replica_read.rs | 4 +- .../cases/test_replica_stale_read.rs | 15 +- tests/failpoints/cases/test_sst_recovery.rs | 7 +- tests/failpoints/cases/test_stale_read.rs | 7 +- tests/failpoints/cases/test_witness.rs | 5 +- tests/integrations/coprocessor/test_select.rs | 76 ++++---- tests/integrations/import/util.rs | 19 +- .../integrations/raftstore/test_bootstrap.rs | 6 +- .../raftstore/test_clear_stale_data.rs | 2 +- .../raftstore/test_compact_after_delete.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 12 +- .../raftstore/test_compact_log.rs | 13 +- .../raftstore/test_early_apply.rs | 13 +- .../integrations/raftstore/test_flashback.rs | 12 +- .../integrations/raftstore/test_hibernate.rs | 6 +- .../raftstore/test_joint_consensus.rs | 13 +- .../integrations/raftstore/test_lease_read.rs | 8 +- tests/integrations/raftstore/test_multi.rs | 46 +++-- tests/integrations/raftstore/test_prevote.rs | 20 +- .../raftstore/test_region_change_observer.rs | 3 +- .../raftstore/test_region_heartbeat.rs | 3 +- .../raftstore/test_region_info_accessor.rs | 6 +- .../raftstore/test_replication_mode.rs | 27 +-- tests/integrations/raftstore/test_snap.rs | 6 +- .../raftstore/test_split_region.rs | 6 +- .../integrations/raftstore/test_stale_peer.rs | 10 +- tests/integrations/raftstore/test_stats.rs | 24 ++- .../integrations/raftstore/test_tombstone.rs | 7 +- .../integrations/raftstore/test_transport.rs | 3 +- .../raftstore/test_update_region_size.rs | 7 +- tests/integrations/raftstore/test_witness.rs | 5 +- .../resource_metering/test_read_keys.rs | 10 +- tests/integrations/server/kv_service.rs | 4 +- tests/integrations/server/lock_manager.rs | 35 +++- tests/integrations/server_encryption.rs | 3 +- .../integrations/storage/test_raft_storage.rs | 7 +- tests/integrations/storage/test_raftkv.rs | 3 +- .../storage/test_region_info_accessor.rs | 5 +- 60 files changed, 681 insertions(+), 406 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 335c4a5301f8..eb1fe04b4244 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5685,6 +5685,7 @@ dependencies = [ "collections", "concurrency_manager", "crc64fast", + "engine_rocks", "engine_traits", "external_storage", "file_system", @@ -5784,6 +5785,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "hybrid_engine", "keys", "kvproto", "lazy_static", @@ -5793,6 +5795,7 @@ dependencies = [ "raft", "raftstore", "rand 0.8.5", + "region_cache_memory_engine", "resolved_ts", "resource_control", "resource_metering", @@ -5887,6 +5890,7 @@ version = "0.0.1" dependencies = [ "api_version", "collections", + "engine_rocks", "futures 0.3.15", "kvproto", "pd_client", diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 2886bb4f5d7d..434d81fff489 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -21,6 +21,7 @@ use backup_stream::{ utils, BackupStreamResolver, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; +use engine_rocks::RocksEngine; use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt}; use grpcio::{ChannelBuilder, Server, ServerBuilder}; use kvproto::{ @@ -249,7 +250,7 @@ impl MetaStore for ErrorStore { pub struct Suite { pub endpoints: HashMap>, pub meta_store: ErrorStore, - pub cluster: Cluster, + pub cluster: Cluster>, tikv_cli: HashMap, log_backup_cli: HashMap, obs: HashMap, diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index afd209af2d3a..b85c1db4493a 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -130,7 +130,7 @@ fn create_event_feed( } pub struct TestSuiteBuilder { - cluster: Option>, + cluster: Option>>, memory_quota: Option, } @@ -143,7 +143,10 @@ impl TestSuiteBuilder { } #[must_use] - pub fn cluster(mut self, cluster: Cluster) -> TestSuiteBuilder { + pub fn cluster( + mut self, + cluster: Cluster>, + ) -> TestSuiteBuilder { self.cluster = Some(cluster); self } @@ -160,7 +163,7 @@ impl TestSuiteBuilder { pub fn build_with_cluster_runner(self, mut runner: F) -> TestSuite where - F: FnMut(&mut Cluster), + F: FnMut(&mut Cluster>), { init(); let memory_quota = self.memory_quota.unwrap_or(usize::MAX); @@ -249,7 +252,7 @@ impl TestSuiteBuilder { } pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub obs: HashMap, tikv_cli: HashMap, diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 830e2156e9f5..fc3d57209294 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -4,6 +4,7 @@ use std::{sync::*, time::Duration}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment, Result, WriteFlags}; use kvproto::{ @@ -26,7 +27,7 @@ pub fn init() { } pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub obs: HashMap, tikv_cli: HashMap, @@ -44,7 +45,10 @@ impl TestSuite { Self::with_cluster(count, cluster) } - pub fn with_cluster(count: usize, mut cluster: Cluster) -> Self { + pub fn with_cluster( + count: usize, + mut cluster: Cluster>, + ) -> Self { init(); let pd_cli = cluster.pd_client.clone(); let mut endpoints = HashMap::default(); diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index c13b3008df98..1dbe232fd9ea 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -10,6 +10,7 @@ backup = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crc64fast = "0.1" +engine_rocks = { workspace = true } engine_traits = { workspace = true } external_storage ={ workspace = true } file_system = { workspace = true } diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 6c6eae961d7f..4331f0727506 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -11,6 +11,7 @@ use std::{ use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawValue}; use backup::Task; use collections::HashMap; +use engine_rocks::RocksEngine; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; use external_storage::make_local_backend; use futures::{channel::mpsc as future_mpsc, executor::block_on}; @@ -39,7 +40,7 @@ use tikv_util::{ use txn_types::TimeStamp; pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub tikv_cli: TikvClient, pub context: Context, diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index d48acc4e92ba..33430ba3fa85 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -39,6 +39,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +hybrid_engine = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" @@ -48,6 +49,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } raftstore = { workspace = true, features = ["testexport"] } rand = "0.8" +region_cache_memory_engine = { workspace = true } resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index a08f858c0316..1e0c57c3706f 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,14 +12,15 @@ use std::{ time::Duration, }; +use ::server::common::KvEngineBuilder; use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{RocksCompactedEvent, RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, - WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, + Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, WriteBatch, + CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; @@ -64,12 +65,15 @@ use txn_types::WriteBatchFlags; use super::*; use crate::Config; +pub trait KvEngineWithRocks = + KvEngine + KvEngineBuilder; + // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id // isn't allocated by pd, and node id, store id are same. // E,g, for node 1, the node id and store id are both 1. -pub trait Simulator { +pub trait Simulator { // Pass 0 to let pd allocate a node id if db is empty. // If node id > 0, the node must be created in db already, // and the node id must be the same as given argument. @@ -79,11 +83,11 @@ pub trait Simulator { &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult; fn stop_node(&mut self, node_id: u64); @@ -92,7 +96,7 @@ pub trait Simulator { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) -> Result<()> { self.async_command_on_node_with_opts(node_id, request, cb, Default::default()) } @@ -100,13 +104,13 @@ pub trait Simulator { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()>; fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; fn get_snap_dir(&self, node_id: u64) -> String; fn get_snap_mgr(&self, node_id: u64) -> &SnapManager; - fn get_router(&self, node_id: u64) -> Option>; + fn get_router(&self, node_id: u64) -> Option>; fn add_send_filter(&mut self, node_id: u64, filter: Box); fn clear_send_filters(&mut self, node_id: u64); fn add_recv_filter(&mut self, node_id: u64, filter: Box); @@ -124,7 +128,7 @@ pub trait Simulator { timeout: Duration, ) -> Result { let node_id = request.get_header().get_peer().get_store_id(); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); self.async_read(node_id, batch_id, request, cb); rx.recv_timeout(timeout) .map_err(|_| Error::Timeout(format!("request timeout for {:?}", timeout))) @@ -135,7 +139,7 @@ pub trait Simulator { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ); fn call_command_on_node( @@ -144,7 +148,7 @@ pub trait Simulator { request: RaftCmdRequest, timeout: Duration, ) -> Result { - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); match self.async_command_on_node(node_id, request, cb) { Ok(()) => {} @@ -159,17 +163,17 @@ pub trait Simulator { } } -pub struct Cluster { +pub struct Cluster> { pub cfg: Config, leaders: HashMap, pub count: usize, pub paths: Vec, - pub dbs: Vec>, + pub dbs: Vec>, pub store_metas: HashMap>>, key_managers: Vec>>, pub io_rate_limiter: Option>, - pub engines: HashMap>, + pub engines: HashMap>, key_managers_map: HashMap>>, pub labels: HashMap>, group_props: HashMap, @@ -182,7 +186,11 @@ pub struct Cluster { resource_manager: Option>, } -impl Cluster { +impl Cluster +where + EK: KvEngineWithRocks, + T: Simulator, +{ // Create the default Store cluster. pub fn new( id: u64, @@ -190,7 +198,7 @@ impl Cluster { sim: Arc>, pd_client: Arc, api_version: ApiVersion, - ) -> Cluster { + ) -> Cluster { // TODO: In the future, maybe it's better to test both case where // `use_delete_range` is true and false Cluster { @@ -250,7 +258,7 @@ impl Cluster { assert!(self.sst_workers_map.insert(node_id, offset).is_none()); } - fn create_engine(&mut self, router: Option>) { + fn create_engine(&mut self, router: Option>) { let (engines, key_manager, dir, sst_worker, kv_statistics, raft_statistics) = create_test_engine(router, self.io_rate_limiter.clone(), &self.cfg); self.dbs.push(engines); @@ -405,7 +413,7 @@ impl Cluster { tikv_util::thread_group::set_properties(previous_prop); } - pub fn get_engine(&self, node_id: u64) -> RocksEngine { + pub fn get_engine(&self, node_id: u64) -> EK { self.engines[&node_id].kv.clone() } @@ -413,7 +421,7 @@ impl Cluster { self.engines[&node_id].raft.clone() } - pub fn get_all_engines(&self, node_id: u64) -> Engines { + pub fn get_all_engines(&self, node_id: u64) -> Engines { self.engines[&node_id].clone() } @@ -770,7 +778,7 @@ impl Cluster { self.leaders.remove(®ion_id); } - pub fn assert_quorum bool>(&self, mut condition: F) { + pub fn assert_quorum bool>(&self, mut condition: F) { if self.engines.is_empty() { return; } @@ -983,7 +991,7 @@ impl Cluster { let region_id = req.get_header().get_region_id(); let leader = self.leader_of_region(region_id).unwrap(); req.mut_header().set_peer(leader.clone()); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb::(&req); self.sim .rl() .async_command_on_node_with_opts(leader.get_store_id(), req, cb, opts)?; @@ -1325,7 +1333,7 @@ impl Cluster { } } - pub fn restore_kv_meta(&self, region_id: u64, store_id: u64, snap: &RocksSnapshot) { + pub fn restore_kv_meta(&self, region_id: u64, store_id: u64, snap: &EK::Snapshot) { let (meta_start, meta_end) = ( keys::region_meta_prefix(region_id), keys::region_meta_prefix(region_id + 1), @@ -1453,7 +1461,7 @@ impl Cluster { &mut self, region: &metapb::Region, split_key: &[u8], - cb: Callback, + cb: Callback, ) { let leader = self.leader_of_region(region.get_id()).unwrap(); let router = self.sim.rl().get_router(leader.get_store_id()).unwrap(); @@ -1693,7 +1701,7 @@ impl Cluster { ) } - pub fn merge_region(&mut self, source: u64, target: u64, cb: Callback) { + pub fn merge_region(&mut self, source: u64, target: u64, cb: Callback) { let mut req = self.new_prepare_merge(source, target); let leader = self.leader_of_region(source).unwrap(); req.mut_header().set_peer(leader.clone()); @@ -1864,7 +1872,7 @@ impl Cluster { ctx } - pub fn get_router(&self, node_id: u64) -> Option> { + pub fn get_router(&self, node_id: u64) -> Option> { self.sim.rl().get_router(node_id) } @@ -1966,7 +1974,7 @@ impl Cluster { } } -impl Drop for Cluster { +impl> Drop for Cluster { fn drop(&mut self) { test_util::clear_failpoints(); self.shutdown(); diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 04dfbd24de17..be38155af6c0 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] +#![feature(trait_alias)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index f429f27ff8b6..7564da0e27e1 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -8,9 +8,9 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, MiscExt, Peekable}; +use engine_traits::{Engines, KvEngine}; use kvproto::{ kvrpcpb::ApiVersion, metapb, @@ -49,18 +49,18 @@ use tikv_util::{ use super::*; use crate::Config; -pub struct ChannelTransportCore { +pub struct ChannelTransportCore { snap_paths: HashMap, - routers: HashMap>>, + routers: HashMap, EK>>, } #[derive(Clone)] -pub struct ChannelTransport { - core: Arc>, +pub struct ChannelTransport { + core: Arc>>, } -impl ChannelTransport { - pub fn new() -> ChannelTransport { +impl ChannelTransport { + pub fn new() -> ChannelTransport { ChannelTransport { core: Arc::new(Mutex::new(ChannelTransportCore { snap_paths: HashMap::default(), @@ -70,13 +70,13 @@ impl ChannelTransport { } } -impl Default for ChannelTransport { +impl Default for ChannelTransport { fn default() -> Self { Self::new() } } -impl Transport for ChannelTransport { +impl Transport for ChannelTransport { fn send(&mut self, msg: RaftMessage) -> Result<()> { let from_store = msg.get_from_peer().get_store_id(); let to_store = msg.get_to_peer().get_store_id(); @@ -149,22 +149,22 @@ impl Transport for ChannelTransport { fn flush(&mut self) {} } -type SimulateChannelTransport = SimulateTransport; +type SimulateChannelTransport = SimulateTransport, EK>; -pub struct NodeCluster { - trans: ChannelTransport, +pub struct NodeCluster { + trans: ChannelTransport, pd_client: Arc, - nodes: HashMap>, + nodes: HashMap>, snap_mgrs: HashMap, cfg_controller: HashMap, - simulate_trans: HashMap, + simulate_trans: HashMap>, concurrency_managers: HashMap, #[allow(clippy::type_complexity)] - post_create_coprocessor_host: Option)>>, + post_create_coprocessor_host: Option)>>, } -impl NodeCluster { - pub fn new(pd_client: Arc) -> NodeCluster { +impl NodeCluster { + pub fn new(pd_client: Arc) -> NodeCluster { NodeCluster { trans: ChannelTransport::new(), pd_client, @@ -178,12 +178,12 @@ impl NodeCluster { } } -impl NodeCluster { +impl NodeCluster { #[allow(dead_code)] pub fn get_node_router( &self, node_id: u64, - ) -> SimulateTransport> { + ) -> SimulateTransport, EK> { self.trans .core .lock() @@ -198,17 +198,14 @@ impl NodeCluster { // first argument of `op` is the node_id. // Set this before invoking `run_node`. #[allow(clippy::type_complexity)] - pub fn post_create_coprocessor_host( - &mut self, - op: Box)>, - ) { + pub fn post_create_coprocessor_host(&mut self, op: Box)>) { self.post_create_coprocessor_host = Some(op) } pub fn get_node( &mut self, node_id: u64, - ) -> Option<&mut Node> { + ) -> Option<&mut Node> { self.nodes.get_mut(&node_id) } @@ -221,16 +218,16 @@ impl NodeCluster { } } -impl Simulator for NodeCluster { +impl Simulator for NodeCluster { fn run_node( &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, _resource_manager: &Option>, ) -> ServerResult { assert!(node_id == 0 || !self.nodes.contains_key(&node_id)); @@ -434,7 +431,7 @@ impl Simulator for NodeCluster { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()> { if !self @@ -465,7 +462,7 @@ impl Simulator for NodeCluster { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) { if !self .trans @@ -514,14 +511,25 @@ impl Simulator for NodeCluster { trans.routers.get_mut(&node_id).unwrap().clear_filters(); } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.nodes.get(&node_id).map(|node| node.get_router()) } } // Compare to server cluster, node cluster does not have server layer and // storage layer. -pub fn new_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_node_cluster(id: u64, count: usize) -> Cluster> { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +// the hybrid engine with disk engine "RocksEngine" and region cache engine +// "RegionCacheMemoryEngine" is used in the node cluster. +pub fn new_node_cluster_with_hybrid_engine( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) @@ -529,7 +537,10 @@ pub fn new_node_cluster(id: u64, count: usize) -> Cluster { // This cluster does not support batch split, we expect it to transfer the // `BatchSplit` request to `split` request -pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_node_cluster( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 20e651ea1dc9..bbc4ee2cf497 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -13,9 +13,9 @@ use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, MiscExt}; +use engine_traits::{Engines, KvEngine}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; @@ -86,12 +86,12 @@ use txn_types::TxnExtraScheduler; use super::*; use crate::Config; -type SimulateStoreTransport = SimulateTransport>; +type SimulateStoreTransport = SimulateTransport, EK>; -pub type SimulateEngine = RaftKv; -type SimulateRaftExtension = ::RaftExtension; -type SimulateServerTransport = - SimulateTransport>; +pub type SimulateEngine = RaftKv>; +type SimulateRaftExtension = as Engine>::RaftExtension; +type SimulateServerTransport = + SimulateTransport, PdStoreAddrResolver>, EK>; #[derive(Default, Clone)] pub struct AddressMap { @@ -127,29 +127,29 @@ impl StoreAddrResolver for AddressMap { } } -struct ServerMeta { - node: Node, - server: Server, - sim_router: SimulateStoreTransport, - sim_trans: SimulateServerTransport, - raw_router: RaftRouter, - raw_apply_router: ApplyRouter, - gc_worker: GcWorker>, +struct ServerMeta { + node: Node, + server: Server>, + sim_router: SimulateStoreTransport, + sim_trans: SimulateServerTransport, + raw_router: RaftRouter, + raw_apply_router: ApplyRouter, + gc_worker: GcWorker>>, rts_worker: Option>, rsmeter_cleanup: Box, } type PendingServices = Vec Service>>; -type CopHooks = Vec)>>; +type CopHooks = Vec)>>; -pub struct ServerCluster { - metas: HashMap, +pub struct ServerCluster { + metas: HashMap>, addrs: AddressMap, - pub storages: HashMap, + pub storages: HashMap>, pub region_info_accessors: HashMap, - pub importers: HashMap>>, + pub importers: HashMap>>, pub pending_services: HashMap, - pub coprocessor_hooks: HashMap, + pub coprocessor_hooks: HashMap>, pub health_services: HashMap, pub security_mgr: Arc, pub txn_extra_schedulers: HashMap>, @@ -163,8 +163,8 @@ pub struct ServerCluster { pub causal_ts_providers: HashMap>, } -impl ServerCluster { - pub fn new(pd_client: Arc) -> ServerCluster { +impl ServerCluster { + pub fn new(pd_client: Arc) -> ServerCluster { let env = Arc::new( EnvBuilder::new() .cq_count(2) @@ -211,19 +211,16 @@ impl ServerCluster { self.addrs.get(node_id).unwrap() } - pub fn get_apply_router(&self, node_id: u64) -> ApplyRouter { + pub fn get_apply_router(&self, node_id: u64) -> ApplyRouter { self.metas.get(&node_id).unwrap().raw_apply_router.clone() } - pub fn get_server_router(&self, node_id: u64) -> SimulateStoreTransport { + pub fn get_server_router(&self, node_id: u64) -> SimulateStoreTransport { self.metas.get(&node_id).unwrap().sim_router.clone() } /// To trigger GC manually. - pub fn get_gc_worker( - &self, - node_id: u64, - ) -> &GcWorker> { + pub fn get_gc_worker(&self, node_id: u64) -> &GcWorker>> { &self.metas.get(&node_id).unwrap().gc_worker } @@ -264,11 +261,11 @@ impl ServerCluster { &mut self, node_id: u64, mut cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult { let (tmp_str, tmp) = if node_id == 0 || !self.snap_paths.contains_key(&node_id) { @@ -496,7 +493,7 @@ impl ServerCluster { ); let debugger = DebuggerImpl::new( - engines.clone(), + Engines::new(engines.kv.get_disk_engine().clone(), engines.raft.clone()), ConfigController::new(cfg.tikv.clone()), Some(store.clone()), ); @@ -668,16 +665,16 @@ impl ServerCluster { } } -impl Simulator for ServerCluster { +impl Simulator for ServerCluster { fn run_node( &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult { dispatch_api_version!( @@ -728,7 +725,7 @@ impl Simulator for ServerCluster { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()> { let router = match self.metas.get(&node_id) { @@ -743,7 +740,7 @@ impl Simulator for ServerCluster { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) { match self.metas.get_mut(&node_id) { None => { @@ -800,13 +797,13 @@ impl Simulator for ServerCluster { .clear_filters(); } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.metas.get(&node_id).map(|m| m.raw_router.clone()) } } -impl Cluster { - pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { +impl Cluster> { + pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { self.must_get_snapshot_of_region_with_ctx(region_id, Default::default()) } @@ -814,8 +811,8 @@ impl Cluster { &mut self, region_id: u64, snap_ctx: SnapContext<'_>, - ) -> RegionSnapshot { - let mut try_snapshot = || -> Option> { + ) -> RegionSnapshot { + let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; let store_id = leader.store_id; let epoch = self.get_region_epoch(region_id); @@ -840,7 +837,7 @@ impl Cluster { panic!("failed to get snapshot of region {}", region_id); } - pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { + pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { self.sim.rl().storages[&node_id].raft_extension() } @@ -848,11 +845,7 @@ impl Cluster { self.sim.rl().get_addr(node_id) } - pub fn register_hook( - &self, - node_id: u64, - register: Box)>, - ) { + pub fn register_hook(&self, node_id: u64, register: Box)>) { self.sim .wl() .coprocessor_hooks @@ -862,7 +855,21 @@ impl Cluster { } } -pub fn new_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_server_cluster( + id: u64, + count: usize, +) -> Cluster> { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +// the hybrid engine with disk engine "RocksEngine" and region cache engine +// "RegionCacheMemoryEngine" is used in the server cluster. +pub fn new_server_cluster_with_hybrid_engine( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) @@ -872,32 +879,49 @@ pub fn new_server_cluster_with_api_ver( id: u64, count: usize, api_ver: ApiVersion, -) -> Cluster { +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, api_ver) } -pub fn new_incompatible_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_server_cluster( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) } -pub fn must_new_cluster_mul(count: usize) -> (Cluster, metapb::Peer, Context) { +pub fn must_new_cluster_mul( + count: usize, +) -> ( + Cluster>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(count, |_| ()) } pub fn must_new_and_configure_cluster( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(1, configure) } fn must_new_and_configure_cluster_mul( count: usize, - mut configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + mut configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + metapb::Peer, + Context, +) { let mut cluster = new_server_cluster(0, count); configure(&mut cluster); cluster.run(); @@ -912,13 +936,21 @@ fn must_new_and_configure_cluster_mul( (cluster, leader, ctx) } -pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) { +pub fn must_new_cluster_and_kv_client() -> ( + Cluster>, + TikvClient, + Context, +) { must_new_cluster_and_kv_client_mul(1) } pub fn must_new_cluster_and_kv_client_mul( count: usize, -) -> (Cluster, TikvClient, Context) { +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); @@ -929,7 +961,11 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } -pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) { +pub fn must_new_cluster_and_debug_client() -> ( + Cluster>, + DebugClient, + u64, +) { let (cluster, leader, _) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); @@ -940,8 +976,12 @@ pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClie (cluster, client, leader.get_store_id()) } -pub fn must_new_cluster_kv_client_and_debug_client() --> (Cluster, TikvClient, DebugClient, Context) { +pub fn must_new_cluster_kv_client_and_debug_client() -> ( + Cluster>, + TikvClient, + DebugClient, + Context, +) { let (cluster, leader, ctx) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); @@ -955,8 +995,12 @@ pub fn must_new_cluster_kv_client_and_debug_client() } pub fn must_new_and_configure_cluster_and_kv_client( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { + configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_and_configure_cluster(configure); let env = Arc::new(Environment::new(1)); @@ -967,7 +1011,12 @@ pub fn must_new_and_configure_cluster_and_kv_client( (cluster, client, ctx) } -pub fn setup_cluster() -> (Cluster, TikvClient, String, Context) { +pub fn setup_cluster() -> ( + Cluster>, + TikvClient, + String, + Context, +) { let mut cluster = new_server_cluster(0, 3); cluster.run(); diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index ef569e3987a4..4c21552cee51 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -11,7 +11,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_traits::KvEngine; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::eraftpb::MessageType; use raftstore::{ @@ -140,16 +140,19 @@ impl Filter for DelayFilter { } #[derive(Clone)] -pub struct SimulateTransport { +pub struct SimulateTransport { filters: Arc>>>, ch: C, + + _p: PhantomData, } -impl SimulateTransport { - pub fn new(ch: C) -> SimulateTransport { +impl SimulateTransport { + pub fn new(ch: C) -> SimulateTransport { SimulateTransport { filters: Arc::new(RwLock::new(vec![])), ch, + _p: PhantomData, } } @@ -195,7 +198,7 @@ where res } -impl Transport for SimulateTransport { +impl Transport for SimulateTransport { fn send(&mut self, m: RaftMessage) -> Result<()> { let ch = &mut self.ch; filter_send(&self.filters, m, |m| ch.send(m)) @@ -214,47 +217,49 @@ impl Transport for SimulateTransport { } } -impl> StoreRouter for SimulateTransport { - fn send(&self, msg: StoreMsg) -> Result<()> { +impl> StoreRouter for SimulateTransport { + fn send(&self, msg: StoreMsg) -> Result<()> { StoreRouter::send(&self.ch, msg) } } -impl> ProposalRouter for SimulateTransport { +impl> ProposalRouter<::Snapshot> + for SimulateTransport +{ fn send( &self, - cmd: RaftCommand, - ) -> std::result::Result<(), TrySendError>> { - ProposalRouter::::send(&self.ch, cmd) + cmd: RaftCommand<::Snapshot>, + ) -> std::result::Result<(), TrySendError::Snapshot>>> { + ProposalRouter::<::Snapshot>::send(&self.ch, cmd) } } -impl> CasualRouter for SimulateTransport { - fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { - CasualRouter::::send(&self.ch, region_id, msg) +impl> CasualRouter for SimulateTransport { + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::::send(&self.ch, region_id, msg) } } -impl> SignificantRouter for SimulateTransport { - fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { +impl> SignificantRouter for SimulateTransport { + fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { self.ch.significant_send(region_id, msg) } } -impl> RaftStoreRouter for SimulateTransport { +impl> RaftStoreRouter for SimulateTransport { fn send_raft_msg(&self, msg: RaftMessage) -> Result<()> { filter_send(&self.filters, msg, |m| self.ch.send_raft_msg(m)) } - fn broadcast_normal(&self, _: impl FnMut() -> PeerMsg) {} + fn broadcast_normal(&self, _: impl FnMut() -> PeerMsg) {} } -impl> LocalReadRouter for SimulateTransport { +impl> LocalReadRouter for SimulateTransport { fn read( &mut self, read_id: Option, req: RaftCmdRequest, - cb: Callback, + cb: Callback, ) -> RaftStoreResult<()> { self.ch.read(read_id, req, cb) } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 0bb948f13c9b..8933f4dca743 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -13,7 +13,9 @@ use collections::HashMap; use encryption_export::{ data_key_manager_from_config, DataKeyManager, FileConfig, MasterKeyConfig, }; -use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{ + config::BlobRunMode, RocksCompactedEvent, RocksEngine, RocksSnapshot, RocksStatistics, +}; use engine_test::raft::RaftTestEngine; use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, @@ -23,6 +25,7 @@ use fail::fail_point; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; use grpcio::{ChannelBuilder, Environment}; +use hybrid_engine::HybridEngine; use kvproto::{ encryptionpb::EncryptionMethod, kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, @@ -44,7 +47,8 @@ use raftstore::{ RaftRouterCompactedEventSender, Result, }; use rand::{seq::SliceRandom, RngCore}; -use server::common::ConfiguredRaftEngine; +use region_cache_memory_engine::RegionCacheMemoryEngine; +use server::common::{ConfiguredRaftEngine, KvEngineBuilder}; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -61,7 +65,9 @@ use tikv_util::{ }; use txn_types::Key; -use crate::{Cluster, Config, RawEngine, ServerCluster, Simulator}; +use crate::{Cluster, Config, KvEngineWithRocks, RawEngine, ServerCluster, Simulator}; + +pub type HybridEngineImpl = HybridEngine; pub fn must_get( engine: &impl RawEngine, @@ -396,14 +402,20 @@ pub fn check_raft_cmd_request(cmd: &RaftCmdRequest) -> bool { is_read } -pub fn make_cb( +pub fn make_cb_rocks( cmd: &RaftCmdRequest, ) -> (Callback, future::Receiver) { + make_cb::(cmd) +} + +pub fn make_cb( + cmd: &RaftCmdRequest, +) -> (Callback, future::Receiver) { let is_read = check_raft_cmd_request(cmd); let (tx, rx) = future::bounded(1, future::WakePolicy::Immediately); let mut detector = CallbackLeakDetector::default(); let cb = if is_read { - Callback::read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { detector.called = true; // we don't care error actually. let _ = tx.send(resp.response); @@ -418,12 +430,12 @@ pub fn make_cb( (cb, rx) } -pub fn make_cb_ext( +pub fn make_cb_ext( cmd: &RaftCmdRequest, proposed: Option, committed: Option, -) -> (Callback, future::Receiver) { - let (cb, receiver) = make_cb(cmd); +) -> (Callback, future::Receiver) { + let (cb, receiver) = make_cb::(cmd); if let Callback::Write { cb, .. } = cb { (Callback::write_ext(cb, proposed, committed), receiver) } else { @@ -432,8 +444,8 @@ pub fn make_cb_ext( } // Issue a read request on the specified peer. -pub fn read_on_peer( - cluster: &mut Cluster, +pub fn read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -450,8 +462,8 @@ pub fn read_on_peer( cluster.read(None, request, timeout) } -pub fn async_read_on_peer( - cluster: &mut Cluster, +pub fn async_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -476,10 +488,10 @@ pub fn async_read_on_peer( }) } -pub fn batch_read_on_peer( - cluster: &mut Cluster, +pub fn batch_read_on_peer>( + cluster: &mut Cluster, requests: &[(metapb::Peer, metapb::Region)], -) -> Vec> { +) -> Vec> { let batch_id = Some(ThreadReadId::new()); let (tx, rx) = mpsc::sync_channel(3); let mut results = vec![]; @@ -510,8 +522,8 @@ pub fn batch_read_on_peer( results.into_iter().map(|resp| resp.1).collect() } -pub fn read_index_on_peer( - cluster: &mut Cluster, +pub fn read_index_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, read_quorum: bool, @@ -527,8 +539,8 @@ pub fn read_index_on_peer( cluster.read(None, request, timeout) } -pub fn async_read_index_on_peer( - cluster: &mut Cluster, +pub fn async_read_index_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -556,12 +568,12 @@ pub fn async_read_index_on_peer( }) } -pub fn async_command_on_node( - cluster: &mut Cluster, +pub fn async_command_on_node>( + cluster: &mut Cluster, node_id: u64, request: RaftCmdRequest, ) -> BoxFuture<'static, RaftCmdResponse> { - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); cluster .sim .rl() @@ -583,8 +595,8 @@ pub fn must_get_value(resp: &RaftCmdResponse) -> Vec { resp.get_responses()[0].get_get().get_value().to_vec() } -pub fn must_read_on_peer( - cluster: &mut Cluster, +pub fn must_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -602,8 +614,8 @@ pub fn must_read_on_peer( } } -pub fn must_error_read_on_peer( - cluster: &mut Cluster, +pub fn must_error_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -628,19 +640,22 @@ pub fn must_contains_error(resp: &RaftCmdResponse, msg: &str) { assert!(err_msg.contains(msg), "{:?}", resp); } -pub fn create_test_engine( +pub fn create_test_engine( // TODO: pass it in for all cases. - router: Option>, + router: Option>, limiter: Option>, cfg: &Config, ) -> ( - Engines, + Engines, Option>, TempDir, LazyWorker, Arc, Option>, -) { +) +where + EK: KvEngine + KvEngineBuilder, +{ let dir = test_util::temp_dir("test_cluster", cfg.prefer_mem); let mut cfg = cfg.clone(); cfg.storage.data_dir = dir.path().to_str().unwrap().to_string(); @@ -668,8 +683,9 @@ pub fn create_test_engine( })); } let factory = builder.build(); - let engine = factory.create_shared_db(dir.path()).unwrap(); - let engines = Engines::new(engine, raft_engine); + let disk_engine = factory.create_shared_db(dir.path()).unwrap(); + let kv_engine: EK = KvEngineBuilder::build(disk_engine); + let engines = Engines::new(kv_engine, raft_engine); ( engines, key_manager, @@ -746,8 +762,8 @@ pub fn configure_for_lease_read( election_timeout } -pub fn configure_for_enable_titan( - cluster: &mut Cluster, +pub fn configure_for_enable_titan>( + cluster: &mut Cluster, min_blob_size: ReadableSize, ) { cluster.cfg.rocksdb.titan.enabled = true; @@ -758,11 +774,15 @@ pub fn configure_for_enable_titan( cluster.cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize::kb(0); } -pub fn configure_for_disable_titan(cluster: &mut Cluster) { +pub fn configure_for_disable_titan>( + cluster: &mut Cluster, +) { cluster.cfg.rocksdb.titan.enabled = false; } -pub fn configure_for_encryption(cluster: &mut Cluster) { +pub fn configure_for_encryption>( + cluster: &mut Cluster, +) { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let master_key_file = manifest_dir.join("src/master-key.data"); @@ -776,8 +796,8 @@ pub fn configure_for_encryption(cluster: &mut Cluster) { } } -pub fn configure_for_causal_ts( - cluster: &mut Cluster, +pub fn configure_for_causal_ts>( + cluster: &mut Cluster, renew_interval: &str, renew_batch_min_size: u32, ) { @@ -787,24 +807,24 @@ pub fn configure_for_causal_ts( } /// Keep putting random kvs until specified size limit is reached. -pub fn put_till_size( - cluster: &mut Cluster, +pub fn put_till_size>( + cluster: &mut Cluster, limit: u64, range: &mut dyn Iterator, ) -> Vec { put_cf_till_size(cluster, CF_DEFAULT, limit, range) } -pub fn put_till_count( - cluster: &mut Cluster, +pub fn put_till_count>( + cluster: &mut Cluster, limit: u64, range: &mut dyn Iterator, ) -> Vec { put_cf_till_count(cluster, CF_WRITE, limit, range) } -pub fn put_cf_till_size( - cluster: &mut Cluster, +pub fn put_cf_till_size>( + cluster: &mut Cluster, cf: &'static str, limit: u64, range: &mut dyn Iterator, @@ -835,8 +855,8 @@ pub fn put_cf_till_size( key.into_bytes() } -pub fn put_cf_till_count( - cluster: &mut Cluster, +pub fn put_cf_till_count>( + cluster: &mut Cluster, cf: &'static str, limit: u64, range: &mut dyn Iterator, @@ -1513,7 +1533,11 @@ pub struct PeerClient { } impl PeerClient { - pub fn new(cluster: &Cluster, region_id: u64, peer: metapb::Peer) -> PeerClient { + pub fn new( + cluster: &Cluster>, + region_id: u64, + peer: metapb::Peer, + ) -> PeerClient { let cli = { let env = Arc::new(Environment::new(1)); let channel = @@ -1620,7 +1644,11 @@ pub fn peer_on_store(region: &metapb::Region, store_id: u64) -> metapb::Peer { .clone() } -pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, region_id: u64) { +pub fn wait_for_synced( + cluster: &mut Cluster>, + node_id: u64, + region_id: u64, +) { let mut storage = cluster .sim .read() @@ -1650,7 +1678,10 @@ pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, regio assert!(snapshot.ext().is_max_ts_synced()); } -pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { +pub fn test_delete_range>( + cluster: &mut Cluster, + cf: CfName, +) { let data_set: Vec<_> = (1..500) .map(|i| { ( @@ -1683,8 +1714,8 @@ pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { } } -pub fn put_with_timeout( - cluster: &mut Cluster, +pub fn put_with_timeout>( + cluster: &mut Cluster, node_id: u64, key: &[u8], value: &[u8], @@ -1701,7 +1732,11 @@ pub fn put_with_timeout( cluster.call_command_on_node(node_id, req, timeout) } -pub fn wait_down_peers(cluster: &Cluster, count: u64, peer: Option) { +pub fn wait_down_peers>( + cluster: &Cluster, + count: u64, + peer: Option, +) { let mut peers = cluster.get_down_peers(); for _ in 1..1000 { if peers.len() == count as usize && peer.as_ref().map_or(true, |p| peers.contains_key(p)) { diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 17fa91f3005b..97ea7bf0d245 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -23,6 +23,7 @@ test-engines-panic = [ [dependencies] api_version = { workspace = true } collections = { workspace = true } +engine_rocks = { workspace = true } futures = "0.3" kvproto = { workspace = true } pd_client = { workspace = true } diff --git a/components/test_storage/src/assert_storage.rs b/components/test_storage/src/assert_storage.rs index 3a641a322a22..d4cdbdb2698a 100644 --- a/components/test_storage/src/assert_storage.rs +++ b/components/test_storage/src/assert_storage.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use api_version::{ApiV1, KvFormat}; +use engine_rocks::RocksEngine as RocksDb; use kvproto::{ kvrpcpb::{Context, KeyRange, LockInfo}, metapb, @@ -44,11 +45,11 @@ impl AssertionStorage { } } -impl AssertionStorage { +impl AssertionStorage, F> { pub fn new_raft_storage_with_store_count( count: usize, key: &str, - ) -> (Cluster, Self) { + ) -> (Cluster>, Self) { let (cluster, store, ctx) = new_raft_storage_with_store_count::(count, key); let storage = Self { store, ctx }; (cluster, storage) @@ -56,7 +57,7 @@ impl AssertionStorage { pub fn update_with_key_byte( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ) -> metapb::Region { // ensure the leader of range which contains current key has been elected @@ -79,7 +80,7 @@ impl AssertionStorage { pub fn delete_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -98,7 +99,7 @@ impl AssertionStorage { fn get_from_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ts: impl Into, ) -> Option { @@ -116,7 +117,7 @@ impl AssertionStorage { pub fn get_none_from_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ts: impl Into, ) { @@ -125,7 +126,7 @@ impl AssertionStorage { pub fn put_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], value: &[u8], start_ts: impl Into, @@ -138,7 +139,7 @@ impl AssertionStorage { pub fn batch_put_ok_for_cluster<'a>( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, keys: &[impl AsRef<[u8]>], vals: impl Iterator, start_ts: impl Into, @@ -162,7 +163,7 @@ impl AssertionStorage { fn two_pc_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, prewrite_mutations: Vec, key: &[u8], commit_keys: Vec, @@ -206,7 +207,7 @@ impl AssertionStorage { pub fn gc_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, region_key: &[u8], mut region: metapb::Region, safe_point: impl Into, @@ -225,7 +226,7 @@ impl AssertionStorage { pub fn test_txn_store_gc3_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key_prefix: u8, ) { let key_len = 10_000; diff --git a/components/test_storage/src/util.rs b/components/test_storage/src/util.rs index e91125ba0011..54f82375afeb 100644 --- a/components/test_storage/src/util.rs +++ b/components/test_storage/src/util.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use api_version::KvFormat; +use engine_rocks::RocksEngine; use kvproto::kvrpcpb::Context; use test_raftstore::{new_server_cluster, Cluster, ServerCluster, SimulateEngine}; use tikv_util::HandyRwLock; @@ -55,7 +56,11 @@ macro_rules! follower_raft_engine { pub fn new_raft_engine( count: usize, key: &str, -) -> (Cluster, SimulateEngine, Context) { +) -> ( + Cluster>, + SimulateEngine, + Context, +) { let mut cluster = new_server_cluster(0, count); let (engine, ctx) = prepare_raft_engine!(cluster, key); (cluster, engine, ctx) @@ -65,8 +70,8 @@ pub fn new_raft_storage_with_store_count( count: usize, key: &str, ) -> ( - Cluster, - SyncTestStorage, + Cluster>, + SyncTestStorage, F>, Context, ) { let (cluster, engine, ctx) = new_raft_engine(count, key); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 05c602824c20..98b348722da8 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -18,7 +18,10 @@ fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { wb.write().unwrap(); } -fn prepare_cluster(cluster: &mut Cluster, initial_kvs: &[(Vec, Vec)]) { +fn prepare_cluster>( + cluster: &mut Cluster, + initial_kvs: &[(Vec, Vec)], +) { cluster.run(); for engines in cluster.engines.values() { enc_write_kvs(&engines.kv, initial_kvs); @@ -35,7 +38,7 @@ struct SetConfig { fn bench_set(b: &mut Bencher<'_>, input: &SetConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -57,7 +60,7 @@ struct GetConfig { fn bench_get(b: &mut Bencher<'_>, input: &GetConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -84,7 +87,7 @@ struct DeleteConfig { fn bench_delete(b: &mut Bencher<'_>, input: &DeleteConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -105,7 +108,7 @@ where fn bench_raft_cluster(c: &mut Criterion, factory: F, label: &str) where - T: Simulator + 'static, + T: Simulator + 'static, F: ClusterFactory, { let nodes_coll = vec![1, 3, 5]; @@ -136,15 +139,15 @@ where group.finish(); } -trait ClusterFactory: Clone + fmt::Debug + 'static { - fn build(&self, nodes: usize) -> Cluster; +trait ClusterFactory>: Clone + fmt::Debug + 'static { + fn build(&self, nodes: usize) -> Cluster; } #[derive(Clone)] struct NodeClusterFactory; -impl ClusterFactory for NodeClusterFactory { - fn build(&self, nodes: usize) -> Cluster { +impl ClusterFactory> for NodeClusterFactory { + fn build(&self, nodes: usize) -> Cluster> { new_node_cluster(1, nodes) } } @@ -158,8 +161,8 @@ impl fmt::Debug for NodeClusterFactory { #[derive(Clone)] struct ServerClusterFactory; -impl ClusterFactory for ServerClusterFactory { - fn build(&self, nodes: usize) -> Cluster { +impl ClusterFactory> for ServerClusterFactory { + fn build(&self, nodes: usize) -> Cluster> { new_server_cluster(1, nodes) } } diff --git a/tests/failpoints/cases/test_bootstrap.rs b/tests/failpoints/cases/test_bootstrap.rs index 8dc2eb8b371a..9b4663616edb 100644 --- a/tests/failpoints/cases/test_bootstrap.rs +++ b/tests/failpoints/cases/test_bootstrap.rs @@ -2,6 +2,7 @@ use std::sync::{Arc, RwLock}; +use engine_rocks::RocksEngine; use engine_traits::Peekable; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb}; use test_pd_client::TestPdClient; @@ -9,7 +10,9 @@ use test_raftstore::*; fn test_bootstrap_half_way_failure(fp: &str) { let pd_client = Arc::new(TestPdClient::new(0, false)); - let sim = Arc::new(RwLock::new(NodeCluster::new(pd_client.clone()))); + let sim = Arc::new(RwLock::new(NodeCluster::::new( + pd_client.clone(), + ))); let mut cluster = Cluster::new(0, 5, sim, pd_client, ApiVersion::V1); // Try to start this node, return after persisted some keys. diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 8af8e29f3ac7..7c39dd2589b7 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -5,7 +5,7 @@ use std::{ time::Duration, }; -use engine_rocks::RocksSnapshot; +use engine_rocks::{RocksEngine, RocksSnapshot}; use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raft::eraftpb::MessageType; use raftstore::store::msg::*; @@ -61,7 +61,7 @@ impl CbReceivers { fn make_cb(cmd: &RaftCmdRequest) -> (Callback, CbReceivers) { let (proposed_tx, proposed_rx) = mpsc::channel(); let (committed_tx, committed_rx) = mpsc::channel(); - let (cb, applied_rx) = make_cb_ext( + let (cb, applied_rx) = make_cb_ext::( cmd, Some(Box::new(move || proposed_tx.send(()).unwrap())), Some(Box::new(move || committed_tx.send(()).unwrap())), @@ -76,7 +76,10 @@ fn make_cb(cmd: &RaftCmdRequest) -> (Callback, CbReceivers) { ) } -fn make_write_req(cluster: &mut Cluster, k: &[u8]) -> RaftCmdRequest { +fn make_write_req( + cluster: &mut Cluster>, + k: &[u8], +) -> RaftCmdRequest { let r = cluster.get_region(k); let mut req = new_request( r.get_id(), diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index c3612e64127c..6f91a2ff55bf 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -110,7 +110,7 @@ fn test_write_after_destroy() { let mut epoch = cluster.pd_client.get_region_epoch(r1); let mut admin_req = new_admin_request(r1, &epoch, conf_change); admin_req.mut_header().set_peer(new_peer(1, 1)); - let (cb1, mut rx1) = make_cb(&admin_req); + let (cb1, mut rx1) = make_cb_rocks(&admin_req); let engines_3 = cluster.get_all_engines(3); let region = block_on(cluster.pd_client.get_region_by_id(r1)) .unwrap() @@ -126,7 +126,7 @@ fn test_write_after_destroy() { .async_command_on_node(1, admin_req, cb1) .unwrap(); for _ in 0..100 { - let (cb2, _rx2) = make_cb(&put); + let (cb2, _rx2) = make_cb_rocks(&put); cluster .sim .rl() diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 929afeb70f48..cc7311bfe75f 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -10,6 +10,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_RAFT}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -1232,7 +1233,7 @@ fn test_prewrite_before_max_ts_is_synced() { let channel = ChannelBuilder::new(env).connect(&addr); let client = TikvClient::new(channel); - let do_prewrite = |cluster: &mut Cluster| { + let do_prewrite = |cluster: &mut Cluster>| { let region_id = right.get_id(); let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index a795422c120c..b7886ce8267c 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -3,6 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; +use engine_rocks::RocksEngine; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -14,7 +15,7 @@ use test_raftstore::*; use tikv_util::{time::Instant, HandyRwLock}; struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, api_version: ApiVersion, } diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 773d721da8bb..624e7a6f7886 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -315,7 +315,7 @@ fn test_read_after_cleanup_range_for_snap() { request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); // Send follower read request to peer 3 - let (cb1, mut rx1) = make_cb(&request); + let (cb1, mut rx1) = make_cb_rocks(&request); cluster .sim .rl() @@ -619,7 +619,7 @@ fn test_batch_read_index_after_transfer_leader() { let mut req = new_request(1, epoch, vec![new_read_index_cmd()], true); req.mut_header().set_peer(new_peer(2, 2)); - let (cb, rx) = make_cb(&req); + let (cb, rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(2, req, cb).unwrap(); resps.push(rx); } diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index cb986250d82e..30ccda4fe211 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -2,20 +2,31 @@ use std::{sync::Arc, time::Duration}; +use engine_rocks::RocksEngine; use kvproto::{kvrpcpb::Op, metapb::Peer}; use pd_client::PdClient; use raft::eraftpb::MessageType; use test_pd_client::TestPdClient; use test_raftstore::*; -fn prepare_for_stale_read(leader: Peer) -> (Cluster, Arc, PeerClient) { +fn prepare_for_stale_read( + leader: Peer, +) -> ( + Cluster>, + Arc, + PeerClient, +) { prepare_for_stale_read_before_run(leader, None) } fn prepare_for_stale_read_before_run( leader: Peer, before_run: Option>, -) -> (Cluster, Arc, PeerClient) { +) -> ( + Cluster>, + Arc, + PeerClient, +) { let mut cluster = new_server_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index da5a3da1a329..05b0badd662c 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -179,8 +179,11 @@ fn compact_files_to_target_level( engine.compact_files_cf(CF_DEFAULT, file_names, Some(level), 1, false) } -fn create_tikv_cluster_with_one_node_damaged() --> (Cluster, Arc, RocksEngine) { +fn create_tikv_cluster_with_one_node_damaged() -> ( + Cluster>, + Arc, + RocksEngine, +) { let mut cluster = new_server_cluster(0, 3); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index a9c6fa5d6e6d..ceb018fc610e 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::{Peer, Region}; use pd_client::PdClient; use raft::eraftpb::MessageType; @@ -83,7 +84,7 @@ fn stale_read_during_splitting(right_derive: bool) { } fn must_not_stale_read( - cluster: &mut Cluster, + cluster: &mut Cluster>, stale_key: &[u8], old_region: &Region, old_leader: &Peer, @@ -166,7 +167,7 @@ fn must_not_stale_read( } fn must_not_eq_on_key( - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], value: &[u8], read_quorum: bool, @@ -455,7 +456,7 @@ fn test_read_after_peer_destroyed() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 33a62f0532b7..e207525bcea6 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -3,6 +3,7 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; use collections::HashMap; +use engine_rocks::RocksEngine; use futures::executor::block_on; use kvproto::{metapb, raft_serverpb::RaftApplyState}; use pd_client::PdClient; @@ -521,8 +522,8 @@ fn test_non_witness_replica_read() { assert_eq!(resp.get_header().has_error(), false); } -fn must_get_error_is_witness( - cluster: &mut Cluster, +fn must_get_error_is_witness>( + cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, ) { diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 9af28b6e3d61..5bcd258947c8 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2,6 +2,7 @@ use std::{cmp, thread, time::Duration}; +use engine_rocks::RocksEngine; use engine_traits::CF_LOCK; use kvproto::{ coprocessor::{Request, Response, StoreBatchTask, StoreBatchTaskResponse}, @@ -2208,43 +2209,44 @@ fn test_batch_request() { true, ), ]; - let prepare_req = - |cluster: &mut Cluster, ranges: &Vec| -> Request { - let original_range = ranges.get(0).unwrap(); - let key_range = product.get_record_range(original_range.start, original_range.end); - let region_key = Key::from_raw(&key_range.start); - let mut req = DagSelect::from(&product) - .key_ranges(vec![key_range]) - .build_with(ctx.clone(), &[0]); - let mut new_ctx = Context::default(); - let new_region = cluster.get_region(region_key.as_encoded()); - let leader = cluster.leader_of_region(new_region.get_id()).unwrap(); - new_ctx.set_region_id(new_region.get_id()); - new_ctx.set_region_epoch(new_region.get_region_epoch().clone()); - new_ctx.set_peer(leader); - req.set_context(new_ctx); - req.set_start_ts(100); - - let batch_handle_ranges = &ranges.as_slice()[1..]; - for handle_range in batch_handle_ranges.iter() { - let range_start_key = Key::from_raw( - &product - .get_record_range(handle_range.start, handle_range.end) - .start, - ); - let batch_region = cluster.get_region(range_start_key.as_encoded()); - let batch_leader = cluster.leader_of_region(batch_region.get_id()).unwrap(); - let batch_key_ranges = - vec![product.get_record_range(handle_range.start, handle_range.end)]; - let mut store_batch_task = StoreBatchTask::new(); - store_batch_task.set_region_id(batch_region.get_id()); - store_batch_task.set_region_epoch(batch_region.get_region_epoch().clone()); - store_batch_task.set_peer(batch_leader); - store_batch_task.set_ranges(batch_key_ranges.into()); - req.tasks.push(store_batch_task); - } - req - }; + let prepare_req = |cluster: &mut Cluster>, + ranges: &Vec| + -> Request { + let original_range = ranges.get(0).unwrap(); + let key_range = product.get_record_range(original_range.start, original_range.end); + let region_key = Key::from_raw(&key_range.start); + let mut req = DagSelect::from(&product) + .key_ranges(vec![key_range]) + .build_with(ctx.clone(), &[0]); + let mut new_ctx = Context::default(); + let new_region = cluster.get_region(region_key.as_encoded()); + let leader = cluster.leader_of_region(new_region.get_id()).unwrap(); + new_ctx.set_region_id(new_region.get_id()); + new_ctx.set_region_epoch(new_region.get_region_epoch().clone()); + new_ctx.set_peer(leader); + req.set_context(new_ctx); + req.set_start_ts(100); + + let batch_handle_ranges = &ranges.as_slice()[1..]; + for handle_range in batch_handle_ranges.iter() { + let range_start_key = Key::from_raw( + &product + .get_record_range(handle_range.start, handle_range.end) + .start, + ); + let batch_region = cluster.get_region(range_start_key.as_encoded()); + let batch_leader = cluster.leader_of_region(batch_region.get_id()).unwrap(); + let batch_key_ranges = + vec![product.get_record_range(handle_range.start, handle_range.end)]; + let mut store_batch_task = StoreBatchTask::new(); + store_batch_task.set_region_id(batch_region.get_id()); + store_batch_task.set_region_epoch(batch_region.get_region_epoch().clone()); + store_batch_task.set_peer(batch_leader); + store_batch_task.set_ranges(batch_key_ranges.into()); + req.tasks.push(store_batch_task); + } + req + }; let verify_response = |result: &QueryResult, resp: &Response| { let (data, details, region_err, locked, other_err) = ( resp.get_data(), diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index 96ebc071bbc8..92804860dd93 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -30,7 +30,7 @@ use uuid::Uuid; const CLEANUP_SST_MILLIS: u64 = 10; -pub fn new_cluster(cfg: TikvConfig) -> (Cluster, Context) { +pub fn new_cluster(cfg: TikvConfig) -> (Cluster>, Context) { let count = 1; let mut cluster = new_server_cluster(0, count); cluster.cfg = Config { @@ -77,7 +77,12 @@ pub fn new_cluster_v2( pub fn open_cluster_and_tikv_import_client( cfg: Option, -) -> (Cluster, Context, TikvClient, ImportSstClient) { +) -> ( + Cluster>, + Context, + TikvClient, + ImportSstClient, +) { let cfg = cfg.unwrap_or_else(|| { let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); @@ -150,14 +155,18 @@ pub fn open_cluster_and_tikv_import_client_v2( (cluster, ctx, tikv, import) } -pub fn new_cluster_and_tikv_import_client() --> (Cluster, Context, TikvClient, ImportSstClient) { +pub fn new_cluster_and_tikv_import_client() -> ( + Cluster>, + Context, + TikvClient, + ImportSstClient, +) { open_cluster_and_tikv_import_client(None) } pub fn new_cluster_and_tikv_import_client_tde() -> ( tempfile::TempDir, - Cluster, + Cluster>, Context, TikvClient, ImportSstClient, diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index bca389b26e67..74b4a73da438 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -6,6 +6,7 @@ use std::{ }; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use engine_traits::{ DbOptionsExt, Engines, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -27,7 +28,7 @@ use tikv_util::{ worker::{dummy_scheduler, Builder as WorkerBuilder, LazyWorker}, }; -fn test_bootstrap_idempotent(cluster: &mut Cluster) { +fn test_bootstrap_idempotent>(cluster: &mut Cluster) { // assume that there is a node bootstrap the cluster and add region in pd // successfully cluster.add_first_region().unwrap(); @@ -49,7 +50,8 @@ fn test_node_bootstrap_with_prepared_data() { let cfg = new_tikv_config(0); let (_, system) = fsm::create_raft_batch_system(&cfg.raft_store, &None); - let simulate_trans = SimulateTransport::new(ChannelTransport::new()); + let simulate_trans = + SimulateTransport::<_, RocksEngine>::new(ChannelTransport::::new()); let tmp_path = Builder::new().prefix("test_cluster").tempdir().unwrap(); let engine = engine_rocks::util::new_engine(tmp_path.path().to_str().unwrap(), ALL_CFS).unwrap(); diff --git a/tests/integrations/raftstore/test_clear_stale_data.rs b/tests/integrations/raftstore/test_clear_stale_data.rs index 8010d4c956cf..69696a191d4d 100644 --- a/tests/integrations/raftstore/test_clear_stale_data.rs +++ b/tests/integrations/raftstore/test_clear_stale_data.rs @@ -47,7 +47,7 @@ fn check_kv_in_all_cfs(db: &RocksEngine, i: u8, found: bool) { } } -fn test_clear_stale_data(cluster: &mut Cluster) { +fn test_clear_stale_data>(cluster: &mut Cluster) { // Disable compaction at level 0. cluster .cfg diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 24034c831924..1bea73d85ea3 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -6,7 +6,7 @@ use std::{ }; use collections::HashMap; -use engine_rocks::{raw::Range, util::get_cf_handle}; +use engine_rocks::{raw::Range, util::get_cf_handle, RocksEngine}; use engine_traits::{CachedTablet, MiscExt, CF_WRITE}; use keys::{data_key, DATA_MAX_KEY}; use test_raftstore::*; @@ -32,7 +32,7 @@ fn gen_delete_k(k: &[u8], commit_ts: TimeStamp) -> Vec { k.as_encoded().clone() } -fn test_compact_after_delete(cluster: &mut Cluster) { +fn test_compact_after_delete>(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index fbc7629c73fe..2f3f882927e6 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -1,17 +1,21 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::raw::DBStatisticsTickerType; +use engine_rocks::{raw::DBStatisticsTickerType, RocksEngine}; use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush>(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { +fn flush_then_check>( + cluster: &mut Cluster, + interval: u64, + written: bool, +) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); @@ -26,7 +30,7 @@ fn flush_then_check(cluster: &mut Cluster, interval: u64, writt } } -fn test_compact_lock_cf(cluster: &mut Cluster) { +fn test_compact_lock_cf>(cluster: &mut Cluster) { let interval = 500; // Set lock_cf_compact_interval. cluster.cfg.raft_store.lock_cf_compact_interval = ReadableDuration::millis(interval); diff --git a/tests/integrations/raftstore/test_compact_log.rs b/tests/integrations/raftstore/test_compact_log.rs index bc097dd27e9b..fcafec4a82ee 100644 --- a/tests/integrations/raftstore/test_compact_log.rs +++ b/tests/integrations/raftstore/test_compact_log.rs @@ -1,12 +1,13 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use collections::HashMap; +use engine_rocks::RocksEngine; use kvproto::raft_serverpb::RaftApplyState; use raftstore::store::*; use test_raftstore::*; use tikv_util::config::*; -fn test_compact_log(cluster: &mut Cluster) { +fn test_compact_log>(cluster: &mut Cluster) { cluster.run(); let mut before_states = HashMap::default(); @@ -42,7 +43,7 @@ fn test_compact_log(cluster: &mut Cluster) { ); } -fn test_compact_count_limit(cluster: &mut Cluster) { +fn test_compact_count_limit>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_threshold = 500; cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); @@ -107,7 +108,7 @@ fn test_compact_count_limit(cluster: &mut Cluster) { ); } -fn test_compact_many_times(cluster: &mut Cluster) { +fn test_compact_many_times>(cluster: &mut Cluster) { let gc_limit: u64 = 100; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(gc_limit); cluster.cfg.raft_store.raft_log_gc_threshold = 500; @@ -176,7 +177,7 @@ fn test_node_compact_many_times() { test_compact_many_times(&mut cluster); } -fn test_compact_size_limit(cluster: &mut Cluster) { +fn test_compact_size_limit>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100000); cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(1)); cluster.run(); @@ -251,7 +252,9 @@ fn test_node_compact_size_limit() { test_compact_size_limit(&mut cluster); } -fn test_compact_reserve_max_ticks(cluster: &mut Cluster) { +fn test_compact_reserve_max_ticks>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_threshold = 500; cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index 44537e8b4095..91a63b1878c5 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -2,6 +2,7 @@ use std::time::Duration; +use engine_rocks::RocksEngine; use engine_traits::{RaftEngine, RaftEngineDebug}; use kvproto::raft_serverpb::RaftLocalState; use raft::eraftpb::MessageType; @@ -43,10 +44,14 @@ enum DataLost { AllLost, } -fn test(cluster: &mut Cluster, action: A, check: C, mode: DataLost) -where - A: FnOnce(&mut Cluster), - C: FnOnce(&mut Cluster), +fn test( + cluster: &mut Cluster>, + action: A, + check: C, + mode: DataLost, +) where + A: FnOnce(&mut Cluster>), + C: FnOnce(&mut Cluster>), { let filter = match mode { DataLost::AllLost | DataLost::LeaderCommit => RegionPacketFilter::new(1, 1) diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 5a28646db659..9ca6092e624c 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -555,9 +555,11 @@ trait ClusterI { ) -> raftstore::Result; } -impl ClusterI for Cluster { +impl ClusterI for Cluster> { fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { - Cluster::::region_local_state(self, region_id, store_id) + Cluster::>::region_local_state( + self, region_id, store_id, + ) } fn query_leader( &self, @@ -565,14 +567,16 @@ impl ClusterI for Cluster { region_id: u64, timeout: Duration, ) -> Option { - Cluster::::query_leader(self, store_id, region_id, timeout) + Cluster::>::query_leader( + self, store_id, region_id, timeout, + ) } fn call_command( &self, request: RaftCmdRequest, timeout: Duration, ) -> raftstore::Result { - Cluster::::call_command(self, request, timeout) + Cluster::>::call_command(self, request, timeout) } } diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 86962330f0f7..6e3c64d78515 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -62,7 +62,7 @@ fn test_proposal_prevent_sleep() { true, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); // send to peer 2 cluster .sim @@ -90,7 +90,7 @@ fn test_proposal_prevent_sleep() { let conf_change = new_change_peer_request(ConfChangeType::RemoveNode, new_peer(3, 3)); let mut admin_req = new_admin_request(1, region.get_region_epoch(), conf_change); admin_req.mut_header().set_peer(new_peer(1, 1)); - let (cb, _rx) = make_cb(&admin_req); + let (cb, _rx) = make_cb_rocks(&admin_req); cluster .sim .rl() @@ -482,7 +482,7 @@ fn test_leader_demoted_when_hibernated() { ); request.mut_header().set_peer(new_peer(3, 3)); // In case peer 3 is hibernated. - let (cb, _rx) = make_cb(&request); + let (cb, _rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_joint_consensus.rs b/tests/integrations/raftstore/test_joint_consensus.rs index 55def7a099b3..e682aa9a6568 100644 --- a/tests/integrations/raftstore/test_joint_consensus.rs +++ b/tests/integrations/raftstore/test_joint_consensus.rs @@ -2,6 +2,7 @@ use std::{sync::Arc, time::*}; +use engine_rocks::RocksEngine; use kvproto::{ metapb::{self, PeerRole, Region}, raft_cmdpb::{ChangePeerRequest, RaftCmdRequest, RaftCmdResponse}, @@ -473,12 +474,12 @@ fn test_leader_down_in_joint_state() { } fn call_conf_change_v2( - cluster: &mut Cluster, + cluster: &mut Cluster, region_id: u64, changes: Vec, ) -> Result where - T: Simulator, + T: Simulator, { let conf_change = new_change_peer_v2_request(changes); let epoch = cluster.pd_client.get_region_epoch(region_id); @@ -487,13 +488,13 @@ where } fn call_conf_change( - cluster: &mut Cluster, + cluster: &mut Cluster, region_id: u64, conf_change_type: ConfChangeType, peer: metapb::Peer, ) -> Result where - T: Simulator, + T: Simulator, { let conf_change = new_change_peer_request(conf_change_type, peer); let epoch = cluster.pd_client.get_region_epoch(region_id); @@ -501,9 +502,9 @@ where cluster.call_command_on_leader(admin_req, Duration::from_secs(3)) } -fn leave_joint(cluster: &mut Cluster, region_id: u64) -> Result +fn leave_joint(cluster: &mut Cluster, region_id: u64) -> Result where - T: Simulator, + T: Simulator, { call_conf_change_v2(cluster, region_id, vec![]) } diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index abf17e01e9d9..f9e6747b660a 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -427,7 +427,7 @@ fn test_node_callback_when_destroyed() { let get = new_get_cmd(b"k1"); let mut req = new_request(1, epoch, vec![get], true); req.mut_header().set_peer(leader); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster .sim .rl() @@ -648,7 +648,7 @@ fn test_not_leader_read_lease() { true, ); req.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.must_transfer_leader(region_id, new_peer(3, 3)); @@ -701,7 +701,7 @@ fn test_read_index_after_write() { req.mut_header() .set_peer(new_peer(1, region_on_store1.get_id())); // Don't care about the first one's read index - let (cb, _) = make_cb(&req); + let (cb, _) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.must_put(b"k2", b"v2"); @@ -715,7 +715,7 @@ fn test_read_index_after_write() { ); req.mut_header() .set_peer(new_peer(1, region_on_store1.get_id())); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.sim.wl().clear_recv_filters(2); diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 8093a30872da..f40e6695599b 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::Peekable; use kvproto::raft_cmdpb::RaftCmdResponse; use raft::eraftpb::MessageType; @@ -16,13 +17,15 @@ use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; -fn test_multi_base(cluster: &mut Cluster) { +fn test_multi_base>(cluster: &mut Cluster) { cluster.run(); test_multi_base_after_bootstrap(cluster); } -fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { +fn test_multi_base_after_bootstrap>( + cluster: &mut Cluster, +) { let (key, value) = (b"k1", b"v1"); cluster.must_put(key, value); @@ -49,7 +52,7 @@ fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { // TODO add epoch not match test cases. } -fn test_multi_leader_crash(cluster: &mut Cluster) { +fn test_multi_leader_crash>(cluster: &mut Cluster) { cluster.run(); let (key1, value1) = (b"k1", b"v1"); @@ -90,7 +93,7 @@ fn test_multi_leader_crash(cluster: &mut Cluster) { must_get_none(&cluster.engines[&last_leader.get_store_id()].kv, key1); } -fn test_multi_cluster_restart(cluster: &mut Cluster) { +fn test_multi_cluster_restart>(cluster: &mut Cluster) { cluster.run(); let (key, value) = (b"k1", b"v1"); @@ -110,7 +113,10 @@ fn test_multi_cluster_restart(cluster: &mut Cluster) { assert_eq!(cluster.get(key), Some(value.to_vec())); } -fn test_multi_lost_majority(cluster: &mut Cluster, count: usize) { +fn test_multi_lost_majority>( + cluster: &mut Cluster, + count: usize, +) { cluster.run(); let leader = cluster.leader_of_region(1); @@ -129,8 +135,8 @@ fn test_multi_lost_majority(cluster: &mut Cluster, count: usize assert!(cluster.leader_of_region(1).is_none()); } -fn test_multi_random_restart( - cluster: &mut Cluster, +fn test_multi_random_restart>( + cluster: &mut Cluster, node_count: usize, restart_count: u32, ) { @@ -173,7 +179,7 @@ fn test_multi_server_base() { test_multi_base(&mut cluster) } -fn test_multi_latency(cluster: &mut Cluster) { +fn test_multi_latency>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(DelayFilter::new(Duration::from_millis( 30, @@ -195,7 +201,7 @@ fn test_multi_server_latency() { test_multi_latency(&mut cluster); } -fn test_multi_random_latency(cluster: &mut Cluster) { +fn test_multi_random_latency>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(RandomLatencyFilter::new(50))); test_multi_base_after_bootstrap(cluster); @@ -215,7 +221,7 @@ fn test_multi_server_random_latency() { test_multi_random_latency(&mut cluster); } -fn test_multi_drop_packet(cluster: &mut Cluster) { +fn test_multi_drop_packet>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(DropPacketFilter::new(30))); test_multi_base_after_bootstrap(cluster); @@ -295,7 +301,9 @@ fn test_multi_server_random_restart() { test_multi_random_restart(&mut cluster, count, 10); } -fn test_leader_change_with_uncommitted_log(cluster: &mut Cluster) { +fn test_leader_change_with_uncommitted_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -485,7 +493,9 @@ fn test_node_leader_change_with_log_overlap() { panic!("callback has not been called after 5s."); } -fn test_read_leader_with_unapplied_log(cluster: &mut Cluster) { +fn test_read_leader_with_unapplied_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -574,8 +584,8 @@ fn test_server_read_leader_with_unapplied_log() { test_read_leader_with_unapplied_log(&mut cluster); } -fn get_with_timeout( - cluster: &mut Cluster, +fn get_with_timeout>( + cluster: &mut Cluster, key: &[u8], read_quorum: bool, timeout: Duration, @@ -591,7 +601,9 @@ fn get_with_timeout( cluster.call_command_on_leader(req, timeout) } -fn test_remove_leader_with_uncommitted_log(cluster: &mut Cluster) { +fn test_remove_leader_with_uncommitted_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -717,7 +729,7 @@ fn test_node_dropped_proposal() { .expect("callback should have been called with in 5s."); } -fn test_consistency_check(cluster: &mut Cluster) { +fn test_consistency_check>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -740,7 +752,7 @@ fn test_node_consistency_check() { test_consistency_check(&mut cluster); } -fn test_batch_write(cluster: &mut Cluster) { +fn test_batch_write>(cluster: &mut Cluster) { cluster.run(); let r = cluster.get_region(b""); cluster.must_split(&r, b"k3"); diff --git a/tests/integrations/raftstore/test_prevote.rs b/tests/integrations/raftstore/test_prevote.rs index c81b34f0435c..c843154b1215 100644 --- a/tests/integrations/raftstore/test_prevote.rs +++ b/tests/integrations/raftstore/test_prevote.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use raft::eraftpb::MessageType; use test_raftstore::*; use tikv_util::HandyRwLock; @@ -15,7 +16,10 @@ enum FailureType<'a> { Reboot(&'a [u64]), } -fn attach_prevote_notifiers(cluster: &Cluster, peer: u64) -> mpsc::Receiver<()> { +fn attach_prevote_notifiers>( + cluster: &Cluster, + peer: u64, +) -> mpsc::Receiver<()> { // Setup a notifier let (tx, rx) = mpsc::channel(); let response_notifier = Box::new(MessageTypeNotifier::new( @@ -37,8 +41,8 @@ fn attach_prevote_notifiers(cluster: &Cluster, peer: u64) -> mp // Validate that prevote is used in elections after partition or reboot of some // nodes. -fn test_prevote( - cluster: &mut Cluster, +fn test_prevote>( + cluster: &mut Cluster, failure_type: FailureType<'_>, leader_after_failure_id: impl Into>, detect_during_failure: impl Into>, @@ -219,7 +223,7 @@ fn test_prevote_reboot_minority_followers() { // Test isolating a minority of the cluster and make sure that the remove // themselves. -fn test_pair_isolated(cluster: &mut Cluster) { +fn test_pair_isolated>(cluster: &mut Cluster) { let region = 1; let pd_client = Arc::clone(&cluster.pd_client); @@ -246,7 +250,9 @@ fn test_server_pair_isolated() { test_pair_isolated(&mut cluster); } -fn test_isolated_follower_leader_does_not_change(cluster: &mut Cluster) { +fn test_isolated_follower_leader_does_not_change>( + cluster: &mut Cluster, +) { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -282,7 +288,9 @@ fn test_server_isolated_follower_leader_does_not_change() { test_isolated_follower_leader_does_not_change(&mut cluster); } -fn test_create_peer_from_pre_vote(cluster: &mut Cluster) { +fn test_create_peer_from_pre_vote>( + cluster: &mut Cluster, +) { let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_change_observer.rs b/tests/integrations/raftstore/test_region_change_observer.rs index 72bbfdc9b8f2..4b37e8aa9628 100644 --- a/tests/integrations/raftstore/test_region_change_observer.rs +++ b/tests/integrations/raftstore/test_region_change_observer.rs @@ -9,6 +9,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raft::StateRole; use raftstore::coprocessor::{ @@ -39,7 +40,7 @@ impl RegionChangeObserver for TestObserver { } } -fn test_region_change_observer_impl(mut cluster: Cluster) { +fn test_region_change_observer_impl(mut cluster: Cluster>) { let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_heartbeat.rs b/tests/integrations/raftstore/test_region_heartbeat.rs index 29f7e8b10dd6..1f9b7cb1eb8b 100644 --- a/tests/integrations/raftstore/test_region_heartbeat.rs +++ b/tests/integrations/raftstore/test_region_heartbeat.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv_util::{ @@ -91,7 +92,7 @@ fn test_server_down_peers_without_hibernate_regions() { test_down_peers!(&mut cluster); } -fn test_pending_peers(cluster: &mut Cluster) { +fn test_pending_peers>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_info_accessor.rs b/tests/integrations/raftstore/test_region_info_accessor.rs index 24d90b66327c..6da6c062e9ea 100644 --- a/tests/integrations/raftstore/test_region_info_accessor.rs +++ b/tests/integrations/raftstore/test_region_info_accessor.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raft::StateRole; use raftstore::coprocessor::{RangeKey, RegionInfo, RegionInfoAccessor}; @@ -47,7 +48,10 @@ fn check_region_ranges(regions: &[(Region, StateRole)], ranges: &[(&[u8], &[u8]) }) } -fn test_region_info_accessor_impl(cluster: &mut Cluster, c: &RegionInfoAccessor) { +fn test_region_info_accessor_impl( + cluster: &mut Cluster>, + c: &RegionInfoAccessor, +) { for i in 0..9 { let k = format!("k{}", i).into_bytes(); let v = format!("v{}", i).into_bytes(); diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 76059fa8f876..db3731064028 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -2,13 +2,14 @@ use std::{iter::FromIterator, sync::Arc, thread, time::Duration}; +use engine_rocks::RocksEngine; use kvproto::replication_modepb::*; use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use test_raftstore::*; use tikv_util::{config::*, mpsc::future, HandyRwLock}; -fn prepare_cluster() -> Cluster { +fn prepare_cluster() -> Cluster> { let mut cluster = new_server_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.pd_client.configure_dr_auto_sync("zone"); @@ -20,7 +21,7 @@ fn prepare_cluster() -> Cluster { cluster } -fn configure_for_snapshot(cluster: &mut Cluster) { +fn configure_for_snapshot(cluster: &mut Cluster>) { // Truncate the log quickly so that we can force sending snapshot. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(2); @@ -28,13 +29,13 @@ fn configure_for_snapshot(cluster: &mut Cluster) { cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); } -fn run_cluster(cluster: &mut Cluster) { +fn run_cluster(cluster: &mut Cluster>) { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v0"); } -fn prepare_labels(cluster: &mut Cluster) { +fn prepare_labels(cluster: &mut Cluster>) { cluster.add_label(1, "dc", "dc1"); cluster.add_label(2, "dc", "dc1"); cluster.add_label(3, "dc", "dc2"); @@ -61,7 +62,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -83,7 +84,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -174,7 +175,7 @@ fn test_sync_recover_after_apply_snapshot() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -321,7 +322,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -357,7 +358,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -385,7 +386,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -416,7 +417,7 @@ fn test_replication_mode_allowlist() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -504,7 +505,7 @@ fn test_migrate_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -550,7 +551,7 @@ fn test_migrate_majority_to_drautosync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index edef780ce31b..23b2a37e6c9b 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -12,6 +12,7 @@ use std::{ }; use collections::HashMap; +use engine_rocks::RocksEngine; use engine_traits::{Checkpointer, KvEngine, RaftEngineDebug}; use file_system::{IoOp, IoType}; use futures::executor::block_on; @@ -39,7 +40,10 @@ use tikv_util::{ HandyRwLock, }; -fn test_huge_snapshot(cluster: &mut Cluster, max_snapshot_file_size: u64) { +fn test_huge_snapshot>( + cluster: &mut Cluster, + max_snapshot_file_size: u64, +) { cluster.cfg.rocksdb.titan.enabled = true; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 3affbadec4bf..1dd5e7db6d03 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -7,6 +7,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; use kvproto::{ @@ -629,7 +630,10 @@ fn test_node_split_region_after_reboot_with_config_change() { } } -fn test_split_epoch_not_match(cluster: &mut Cluster, right_derive: bool) { +fn test_split_epoch_not_match>( + cluster: &mut Cluster, + right_derive: bool, +) { cluster.cfg.raft_store.right_derive_when_split = right_derive; cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index f76373756f92..5ef90e30e94d 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -4,6 +4,7 @@ use std::{sync::Arc, thread, time::*}; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RegionLocalState}; use pd_client::PdClient; @@ -30,7 +31,7 @@ use tikv_util::{config::ReadableDuration, HandyRwLock}; /// time, and it would check with pd to confirm whether it's still a member of /// the cluster. If not, it should destroy itself as a stale peer which is /// removed out already. -fn test_stale_peer_out_of_region(cluster: &mut Cluster) { +fn test_stale_peer_out_of_region>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -113,7 +114,10 @@ fn test_server_stale_peer_out_of_region() { /// time, and it's an initialized peer without any data. It would destroy itself /// as stale peer directly and should not impact other region data on the /// same store. -fn test_stale_peer_without_data(cluster: &mut Cluster, right_derive: bool) { +fn test_stale_peer_without_data>( + cluster: &mut Cluster, + right_derive: bool, +) { cluster.cfg.raft_store.right_derive_when_split = right_derive; let pd_client = Arc::clone(&cluster.pd_client); @@ -299,7 +303,7 @@ fn test_stale_learner_with_read_index() { ); request.mut_header().set_peer(new_peer(3, 3)); request.mut_header().set_replica_read(true); - let (cb, _) = make_cb(&request); + let (cb, _) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 60f10936f2d0..821fc19dff8e 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -7,6 +7,7 @@ use std::{ }; use api_version::{test_kv_format_impl, KvFormat}; +use engine_rocks::RocksEngine; use engine_traits::MiscExt; use futures::{executor::block_on, SinkExt, StreamExt}; use grpcio::*; @@ -17,7 +18,7 @@ use test_raftstore::*; use tikv_util::{config::*, store::QueryStats}; use txn_types::Key; -fn check_available(cluster: &mut Cluster) { +fn check_available>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); let engine = cluster.get_engine(1); @@ -43,7 +44,7 @@ fn check_available(cluster: &mut Cluster) { panic!("available not changed") } -fn test_simple_store_stats(cluster: &mut Cluster) { +fn test_simple_store_stats>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(20); @@ -142,7 +143,14 @@ fn test_store_heartbeat_report_hotspots() { fail::remove("mock_hotspot_threshold"); } -type Query = dyn Fn(Context, &Cluster, TikvClient, u64, u64, Vec); +type Query = dyn Fn( + Context, + &Cluster>, + TikvClient, + u64, + u64, + Vec, +); #[test] fn test_query_stats() { @@ -435,7 +443,7 @@ fn test_txn_query_stats_tmpl() { } fn raw_put( - _cluster: &Cluster, + _cluster: &Cluster>, client: &TikvClient, ctx: &Context, _store_id: u64, @@ -453,7 +461,7 @@ fn raw_put( } fn put( - cluster: &Cluster, + cluster: &Cluster>, client: &TikvClient, ctx: &Context, store_id: u64, @@ -674,7 +682,7 @@ fn test_txn_delete_query() { } fn check_query_num_read( - cluster: &Cluster, + cluster: &Cluster>, store_id: u64, region_id: u64, kind: QueryKind, @@ -700,7 +708,7 @@ fn check_query_num_read( } fn check_query_num_write( - cluster: &Cluster, + cluster: &Cluster>, store_id: u64, kind: QueryKind, expect: u64, @@ -720,7 +728,7 @@ fn check_query_num_write( } fn check_split_key( - cluster: &Cluster, + cluster: &Cluster>, start_key: Vec, end_key: Option>, ) -> bool { diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index c1cd0befcf1a..f5c419ac65bd 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -3,6 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use crossbeam::channel; +use engine_rocks::RocksEngine; use engine_traits::{CfNamesExt, Iterable, Peekable, RaftEngineDebug, SyncMutable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState, StoreIdent}; use protobuf::Message; @@ -10,7 +11,7 @@ use raft::eraftpb::MessageType; use test_raftstore::*; use tikv_util::{config::*, time::Instant}; -fn test_tombstone(cluster: &mut Cluster) { +fn test_tombstone>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -113,7 +114,7 @@ fn test_server_tombstone() { test_tombstone(&mut cluster); } -fn test_fast_destroy(cluster: &mut Cluster) { +fn test_fast_destroy>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. @@ -158,7 +159,7 @@ fn test_server_fast_destroy() { test_fast_destroy(&mut cluster); } -fn test_readd_peer(cluster: &mut Cluster) { +fn test_readd_peer>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_transport.rs b/tests/integrations/raftstore/test_transport.rs index 4ed3d8da1601..cb1bcefbcad6 100644 --- a/tests/integrations/raftstore/test_transport.rs +++ b/tests/integrations/raftstore/test_transport.rs @@ -1,8 +1,9 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +use engine_rocks::RocksEngine; use test_raftstore::*; -fn test_partition_write(cluster: &mut Cluster) { +fn test_partition_write>(cluster: &mut Cluster) { cluster.run(); let (key, value) = (b"k1", b"v1"); diff --git a/tests/integrations/raftstore/test_update_region_size.rs b/tests/integrations/raftstore/test_update_region_size.rs index f2ff0d4f2176..22a5e1f4534b 100644 --- a/tests/integrations/raftstore/test_update_region_size.rs +++ b/tests/integrations/raftstore/test_update_region_size.rs @@ -2,18 +2,19 @@ use std::{sync::Arc, thread, time}; +use engine_rocks::RocksEngine; use engine_traits::MiscExt; use pd_client::PdClient; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush>(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cfs(&[], true).unwrap(); } } -fn test_update_region_size(cluster: &mut Cluster) { +fn test_update_region_size>(cluster: &mut Cluster) { cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(50); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); @@ -24,7 +25,7 @@ fn test_update_region_size(cluster: &mut Cluster) { .level0_file_num_compaction_trigger = 10; cluster.start().unwrap(); - let batch_put = |cluster: &mut Cluster, mut start, end| { + let batch_put = |cluster: &mut Cluster, mut start, end| { while start < end { let next = std::cmp::min(end, start + 50); let requests = (start..next) diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index d4332403ceaf..7879ffc49be7 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -7,6 +7,7 @@ use std::{ }; use collections::HashMap; +use engine_rocks::RocksEngine; use futures::executor::block_on; use kvproto::{ metapb, @@ -484,8 +485,8 @@ fn test_witness_replica_read() { ); } -fn must_get_error_is_witness( - cluster: &mut Cluster, +fn must_get_error_is_witness>( + cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, ) { diff --git a/tests/integrations/resource_metering/test_read_keys.rs b/tests/integrations/resource_metering/test_read_keys.rs index f7a4ef869065..64c291049d98 100644 --- a/tests/integrations/resource_metering/test_read_keys.rs +++ b/tests/integrations/resource_metering/test_read_keys.rs @@ -4,6 +4,7 @@ use std::{sync::Arc, time::Duration}; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{unbounded, Receiver, RecvTimeoutError, Sender}; +use engine_rocks::RocksEngine as RocksDb; use grpcio::{ChannelBuilder, Environment}; use kvproto::{coprocessor, kvrpcpb::*, resource_usage_agent::ResourceUsageRecord, tikvpb::*}; use protobuf::Message; @@ -108,7 +109,14 @@ pub fn test_read_keys() { }); } -fn new_cluster(port: u16, env: Arc) -> (Cluster, TikvClient, Context) { +fn new_cluster( + port: u16, + env: Arc, +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { cluster.cfg.resource_metering.receiver_address = format!("127.0.0.1:{}", port); cluster.cfg.resource_metering.precision = ReadableDuration::millis(100); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 24b6a87bfa5d..4e087bb07b05 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -10,6 +10,7 @@ use std::{ use api_version::{ApiV1, ApiV1Ttl, ApiV2, KvFormat}; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use engine_traits::{ MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -1383,7 +1384,8 @@ fn test_double_run_node() { let mut sim = cluster.sim.wl(); let node = sim.get_node(id).unwrap(); let pd_worker = LazyWorker::new("test-pd-worker"); - let simulate_trans = SimulateTransport::new(ChannelTransport::new()); + let simulate_trans = + SimulateTransport::<_, RocksEngine>::new(ChannelTransport::::new()); let tmp = Builder::new().prefix("test_cluster").tempdir().unwrap(); let snap_mgr = SnapManager::new(tmp.path().to_str().unwrap()); let coprocessor_host = CoprocessorHost::new(router, raftstore::coprocessor::Config::default()); diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index 289b10303a8f..2d8b8d326e3c 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -10,6 +10,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::*, @@ -69,7 +70,10 @@ fn deadlock(client: &TikvClient, ctx: Context, key1: &[u8], ts: u64) -> bool { resp.errors[0].has_deadlock() } -fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (TikvClient, Context) { +fn build_leader_client( + cluster: &mut Cluster>, + key: &[u8], +) -> (TikvClient, Context) { let region_id = cluster.get_region_id(key); let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); @@ -88,7 +92,11 @@ fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (Tik } /// Creates a deadlock on the store containing key. -fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u64) { +fn must_detect_deadlock( + cluster: &mut Cluster>, + key: &[u8], + ts: u64, +) { // Sometimes, deadlocks can't be detected at once due to leader change, but it // will be detected. for _ in 0..5 { @@ -100,7 +108,10 @@ fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u6 panic!("failed to detect deadlock"); } -fn deadlock_detector_leader_must_be(cluster: &mut Cluster, store_id: u64) { +fn deadlock_detector_leader_must_be( + cluster: &mut Cluster>, + store_id: u64, +) { let leader_region = cluster.get_region(b""); assert_eq!( cluster @@ -115,7 +126,11 @@ fn deadlock_detector_leader_must_be(cluster: &mut Cluster, store_ .region_leader_must_be(leader_region.get_id(), leader_peer); } -fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], store_id: u64) { +fn must_transfer_leader( + cluster: &mut Cluster>, + region_key: &[u8], + store_id: u64, +) { let region = cluster.get_region(region_key); let target_peer = find_peer_of_store(®ion, store_id); cluster.must_transfer_leader(region.get_id(), target_peer.clone()); @@ -130,7 +145,7 @@ fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], /// REQUIRE: The source store must be the leader the region and the target store /// must not have this region. fn must_transfer_region( - cluster: &mut Cluster, + cluster: &mut Cluster>, region_key: &[u8], source_store_id: u64, target_store_id: u64, @@ -149,14 +164,18 @@ fn must_transfer_region( cluster.must_put(region_key, b"v"); } -fn must_split_region(cluster: &mut Cluster, region_key: &[u8], split_key: &[u8]) { +fn must_split_region( + cluster: &mut Cluster>, + region_key: &[u8], + split_key: &[u8], +) { let region = cluster.get_region(region_key); cluster.must_split(®ion, split_key); cluster.must_put(split_key, b"v"); } fn must_merge_region( - cluster: &mut Cluster, + cluster: &mut Cluster>, source_region_key: &[u8], target_region_key: &[u8], ) { @@ -179,7 +198,7 @@ fn find_peer_of_store(region: &Region, store_id: u64) -> Peer { /// Creates a cluster with only one region and store(1) is the leader of the /// region. -fn new_cluster_for_deadlock_test(count: usize) -> Cluster { +fn new_cluster_for_deadlock_test(count: usize) -> Cluster> { let mut cluster = new_server_cluster(0, count); cluster.cfg.pessimistic_txn.wait_for_lock_timeout = ReadableDuration::millis(500); cluster.cfg.pessimistic_txn.pipelined = false; diff --git a/tests/integrations/server_encryption.rs b/tests/integrations/server_encryption.rs index 7c88afde76a7..041b15fd953f 100644 --- a/tests/integrations/server_encryption.rs +++ b/tests/integrations/server_encryption.rs @@ -1,8 +1,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use engine_rocks::RocksEngine; use test_raftstore::*; -fn test_snapshot_encryption(cluster: &mut Cluster) { +fn test_snapshot_encryption>(cluster: &mut Cluster) { configure_for_encryption(cluster); cluster.pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); diff --git a/tests/integrations/storage/test_raft_storage.rs b/tests/integrations/storage/test_raft_storage.rs index 684b7a261fb2..1b3ba6dc43bf 100644 --- a/tests/integrations/storage/test_raft_storage.rs +++ b/tests/integrations/storage/test_raft_storage.rs @@ -8,6 +8,7 @@ use std::{ use api_version::{ApiV1, KvFormat}; use collections::HashMap; +use engine_rocks::RocksEngine; use error_code::{raftstore::STALE_COMMAND, ErrorCodeExt}; use kvproto::kvrpcpb::Context; use test_raftstore::*; @@ -25,8 +26,8 @@ use tikv_util::HandyRwLock; use txn_types::{Key, Mutation, TimeStamp}; fn new_raft_storage() -> ( - Cluster, - SyncTestStorageApiV1, + Cluster>, + SyncTestStorageApiV1>, Context, ) { new_raft_storage_with_store_count::(1, "") @@ -234,7 +235,7 @@ fn write_test_data( } fn check_data( - cluster: &mut Cluster, + cluster: &mut Cluster>, storages: &HashMap>, test_data: &[(Vec, Vec)], ts: impl Into, diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 1fb8075e10f6..4129d5bc721d 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -4,6 +4,7 @@ use std::{ thread, time, }; +use engine_rocks::RocksEngine as RocksDb; use engine_traits::{CfName, IterOptions, CF_DEFAULT}; use futures::executor::block_on; use kvproto::kvrpcpb::{Context, KeyRange}; @@ -323,7 +324,7 @@ fn test_invalid_read_index_when_no_leader() { true, ); request.mut_header().set_peer(follower.clone()); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); cluster .sim .rl() diff --git a/tests/integrations/storage/test_region_info_accessor.rs b/tests/integrations/storage/test_region_info_accessor.rs index 2df7238e1a9e..344f9c6607ec 100644 --- a/tests/integrations/storage/test_region_info_accessor.rs +++ b/tests/integrations/storage/test_region_info_accessor.rs @@ -3,12 +3,15 @@ use std::{sync::mpsc::channel, thread, time::Duration}; use collections::HashMap; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raftstore::coprocessor::{RegionInfoAccessor, RegionInfoProvider}; use test_raftstore::*; use tikv_util::HandyRwLock; -fn prepare_cluster(cluster: &mut Cluster) -> Vec { +fn prepare_cluster>( + cluster: &mut Cluster, +) -> Vec { for i in 0..15 { let i = i + b'0'; let key = vec![b'k', i]; From 37b1dce422fd2bd077d7cfd707a2e8ec716c2aae Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 20 Dec 2023 00:16:52 +0800 Subject: [PATCH 196/203] *: add log and metric in raft_client (#16143) ref tikv/tikv#12362 add log and metric in raft_client. Signed-off-by: crazycs520 --- metrics/grafana/tikv_details.dashboard.py | 11 + metrics/grafana/tikv_details.json | 1055 +++++++++++++-------- metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/metrics.rs | 7 + src/server/raft_client.rs | 21 +- 5 files changed, 719 insertions(+), 377 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 5c84152174e8..291597409678 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -1857,6 +1857,17 @@ def RaftIO() -> RowPanel: metric="tikv_raftstore_apply_log_duration_seconds", ) ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Raft Client Wait Connection Ready Duration", + heatmap_description="The time consumed for Raft Client wait connection ready", + graph_title="99% Raft Client Wait Connection Ready Duration", + graph_description="The time consumed for Raft Client wait connection ready per TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_server_raft_client_wait_ready_duration", + graph_by_labels=["to"], + ) + ) layout.row( [ graph_panel( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 8c6bea179929..88821ac75381 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -16736,6 +16736,311 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed for Raft Client wait connection ready", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 121, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Client Wait Connection Ready Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed for Raft Client wait connection ready per TiKV instance", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 122, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{to}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{to}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{to}}", + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{to}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "99% Raft Client Wait Connection Ready Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, { "aliasColors": {}, "bars": false, @@ -16764,11 +17069,11 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 121, + "id": 123, "interval": null, "isNew": true, "legend": { @@ -16912,11 +17217,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 122, + "id": 124, "interval": null, "isNew": true, "legend": { @@ -17052,7 +17357,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 123, + "id": 125, "interval": null, "links": [], "maxDataPoints": 100, @@ -17091,7 +17396,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 124, + "id": 126, "interval": null, "isNew": true, "legend": { @@ -17224,7 +17529,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 125, + "id": 127, "interval": null, "isNew": true, "legend": { @@ -17357,7 +17662,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 126, + "id": 128, "interval": null, "isNew": true, "legend": { @@ -17490,7 +17795,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 127, + "id": 129, "interval": null, "isNew": true, "legend": { @@ -17630,7 +17935,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 128, + "id": 130, "interval": null, "legend": { "show": false @@ -17727,7 +18032,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 129, + "id": 131, "interval": null, "isNew": true, "legend": { @@ -17935,7 +18240,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 130, + "id": 132, "interval": null, "legend": { "show": false @@ -18032,7 +18337,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 131, + "id": 133, "interval": null, "isNew": true, "legend": { @@ -18240,7 +18545,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 132, + "id": 134, "interval": null, "legend": { "show": false @@ -18337,7 +18642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 133, + "id": 135, "interval": null, "isNew": true, "legend": { @@ -18545,7 +18850,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 134, + "id": 136, "interval": null, "legend": { "show": false @@ -18649,7 +18954,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 135, + "id": 137, "interval": null, "legend": { "show": false @@ -18746,7 +19051,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 136, + "id": 138, "interval": null, "isNew": true, "legend": { @@ -18879,7 +19184,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 137, + "id": 139, "interval": null, "isNew": true, "legend": { @@ -19030,7 +19335,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 138, + "id": 140, "interval": null, "links": [], "maxDataPoints": 100, @@ -19069,7 +19374,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 139, + "id": 141, "interval": null, "isNew": true, "legend": { @@ -19217,7 +19522,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 140, + "id": 142, "interval": null, "isNew": true, "legend": { @@ -19372,7 +19677,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 141, + "id": 143, "interval": null, "legend": { "show": false @@ -19476,7 +19781,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 142, + "id": 144, "interval": null, "legend": { "show": false @@ -19576,7 +19881,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 143, + "id": 145, "interval": null, "links": [], "maxDataPoints": 100, @@ -19615,7 +19920,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 144, + "id": 146, "interval": null, "isNew": true, "legend": { @@ -19748,7 +20053,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 145, + "id": 147, "interval": null, "isNew": true, "legend": { @@ -19881,7 +20186,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 146, + "id": 148, "interval": null, "isNew": true, "legend": { @@ -20014,7 +20319,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 147, + "id": 149, "interval": null, "isNew": true, "legend": { @@ -20147,7 +20452,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 148, + "id": 150, "interval": null, "isNew": true, "legend": { @@ -20280,7 +20585,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 149, + "id": 151, "interval": null, "isNew": true, "legend": { @@ -20416,7 +20721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 150, + "id": 152, "interval": null, "links": [], "maxDataPoints": 100, @@ -20455,7 +20760,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 151, + "id": 153, "interval": null, "isNew": true, "legend": { @@ -20588,7 +20893,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 152, + "id": 154, "interval": null, "isNew": true, "legend": { @@ -20721,7 +21026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 153, + "id": 155, "interval": null, "isNew": true, "legend": { @@ -20854,7 +21159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 154, + "id": 156, "interval": null, "isNew": true, "legend": { @@ -20987,7 +21292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 155, + "id": 157, "interval": null, "isNew": true, "legend": { @@ -21120,7 +21425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 156, + "id": 158, "interval": null, "isNew": true, "legend": { @@ -21283,7 +21588,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 157, + "id": 159, "interval": null, "isNew": true, "legend": { @@ -21419,7 +21724,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 158, + "id": 160, "interval": null, "links": [], "maxDataPoints": 100, @@ -21458,7 +21763,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 159, + "id": 161, "interval": null, "isNew": true, "legend": { @@ -21606,7 +21911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 160, + "id": 162, "interval": null, "isNew": true, "legend": { @@ -21754,7 +22059,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 161, + "id": 163, "interval": null, "isNew": true, "legend": { @@ -21887,7 +22192,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 162, + "id": 164, "interval": null, "isNew": true, "legend": { @@ -22020,7 +22325,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 163, + "id": 165, "interval": null, "isNew": true, "legend": { @@ -22153,7 +22458,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 164, + "id": 166, "interval": null, "isNew": true, "legend": { @@ -22286,7 +22591,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 165, + "id": 167, "interval": null, "isNew": true, "legend": { @@ -22419,7 +22724,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 166, + "id": 168, "interval": null, "isNew": true, "legend": { @@ -22552,7 +22857,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 167, + "id": 169, "interval": null, "isNew": true, "legend": { @@ -22729,7 +23034,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 168, + "id": 170, "interval": null, "links": [], "maxDataPoints": 100, @@ -22768,7 +23073,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 169, + "id": 171, "interval": null, "isNew": true, "legend": { @@ -22944,7 +23249,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 170, + "id": 172, "interval": null, "links": [], "maxDataPoints": 100, @@ -22983,7 +23288,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 171, + "id": 173, "interval": null, "isNew": true, "legend": { @@ -23116,7 +23421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 172, + "id": 174, "interval": null, "isNew": true, "legend": { @@ -23249,7 +23554,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 173, + "id": 175, "interval": null, "isNew": true, "legend": { @@ -23389,7 +23694,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 174, + "id": 176, "interval": null, "legend": { "show": false @@ -23486,7 +23791,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 175, + "id": 177, "interval": null, "isNew": true, "legend": { @@ -23687,7 +23992,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 176, + "id": 178, "interval": null, "isNew": true, "legend": { @@ -23888,7 +24193,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 177, + "id": 179, "interval": null, "isNew": true, "legend": { @@ -24092,7 +24397,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 178, + "id": 180, "interval": null, "links": [], "maxDataPoints": 100, @@ -24131,7 +24436,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 179, + "id": 181, "interval": null, "isNew": true, "legend": { @@ -24264,7 +24569,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 180, + "id": 182, "interval": null, "isNew": true, "legend": { @@ -24404,7 +24709,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 181, + "id": 183, "interval": null, "legend": { "show": false @@ -24501,7 +24806,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 184, "interval": null, "isNew": true, "legend": { @@ -24709,7 +25014,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 183, + "id": 185, "interval": null, "legend": { "show": false @@ -24806,7 +25111,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 184, + "id": 186, "interval": null, "isNew": true, "legend": { @@ -25014,7 +25319,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 185, + "id": 187, "interval": null, "legend": { "show": false @@ -25111,7 +25416,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 186, + "id": 188, "interval": null, "isNew": true, "legend": { @@ -25319,7 +25624,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 187, + "id": 189, "interval": null, "legend": { "show": false @@ -25416,7 +25721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 188, + "id": 190, "interval": null, "isNew": true, "legend": { @@ -25624,7 +25929,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 189, + "id": 191, "interval": null, "legend": { "show": false @@ -25721,7 +26026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 190, + "id": 192, "interval": null, "isNew": true, "legend": { @@ -25922,7 +26227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 191, + "id": 193, "interval": null, "isNew": true, "legend": { @@ -26055,7 +26360,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 192, + "id": 194, "interval": null, "isNew": true, "legend": { @@ -26256,7 +26561,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 193, + "id": 195, "interval": null, "isNew": true, "legend": { @@ -26457,7 +26762,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 194, + "id": 196, "interval": null, "isNew": true, "legend": { @@ -26661,7 +26966,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 195, + "id": 197, "interval": null, "links": [], "maxDataPoints": 100, @@ -26700,7 +27005,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 196, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -26848,7 +27153,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 197, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -26988,7 +27293,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 198, + "id": 200, "interval": null, "legend": { "show": false @@ -27085,7 +27390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -27218,7 +27523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 200, + "id": 202, "interval": null, "isNew": true, "legend": { @@ -27351,7 +27656,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -27529,7 +27834,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -27692,7 +27997,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -27840,7 +28145,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 206, "interval": null, "isNew": true, "legend": { @@ -27973,7 +28278,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 205, + "id": 207, "interval": null, "isNew": true, "legend": { @@ -28109,7 +28414,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 208, "interval": null, "links": [], "maxDataPoints": 100, @@ -28148,7 +28453,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -28296,7 +28601,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 208, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -28497,7 +28802,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -28698,7 +29003,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -28899,7 +29204,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -29100,7 +29405,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -29233,7 +29538,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 215, "interval": null, "isNew": true, "legend": { @@ -29366,7 +29671,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -29499,7 +29804,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -29632,7 +29937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -29840,7 +30145,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 217, + "id": 219, "interval": null, "legend": { "show": false @@ -29940,7 +30245,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 220, "interval": null, "links": [], "maxDataPoints": 100, @@ -29979,7 +30284,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 219, + "id": 221, "interval": null, "isNew": true, "legend": { @@ -30127,7 +30432,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 220, + "id": 222, "interval": null, "isNew": true, "legend": { @@ -30260,7 +30565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -30393,7 +30698,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 222, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -30533,7 +30838,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 223, + "id": 225, "interval": null, "legend": { "show": false @@ -30633,7 +30938,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 226, "interval": null, "links": [], "maxDataPoints": 100, @@ -30672,7 +30977,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 225, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -30850,7 +31155,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 226, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -31051,7 +31356,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 227, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -31184,7 +31489,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -31317,7 +31622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -31450,7 +31755,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -31583,7 +31888,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 231, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -31716,7 +32021,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -31845,7 +32150,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 235, "interval": null, "links": [], "maxDataPoints": 100, @@ -31920,7 +32225,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 236, "interval": null, "links": [], "maxDataPoints": 100, @@ -31999,7 +32304,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 235, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -32252,7 +32557,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 236, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -32385,7 +32690,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -32521,7 +32826,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 240, "interval": null, "links": [], "maxDataPoints": 100, @@ -32560,7 +32865,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 241, "interval": null, "isNew": true, "legend": { @@ -32693,7 +32998,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 240, + "id": 242, "interval": null, "isNew": true, "legend": { @@ -32826,7 +33131,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -32959,7 +33264,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 242, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -33122,7 +33427,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 245, "interval": null, "isNew": true, "legend": { @@ -33255,7 +33560,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 246, "interval": null, "isNew": true, "legend": { @@ -33388,7 +33693,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 247, "interval": null, "isNew": true, "legend": { @@ -33536,7 +33841,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -33687,7 +33992,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 249, "interval": null, "links": [], "maxDataPoints": 100, @@ -33726,7 +34031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 250, "interval": null, "isNew": true, "legend": { @@ -33859,7 +34164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 249, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -33992,7 +34297,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 250, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -34125,7 +34430,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 253, "interval": null, "isNew": true, "legend": { @@ -34261,7 +34566,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 254, "interval": null, "links": [], "maxDataPoints": 100, @@ -34307,7 +34612,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 253, + "id": 255, "interval": null, "legend": { "show": false @@ -34404,7 +34709,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 254, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -34605,7 +34910,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 255, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -34738,7 +35043,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -34871,7 +35176,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -35004,7 +35309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 258, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -35205,7 +35510,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 261, "interval": null, "isNew": true, "legend": { @@ -35338,7 +35643,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -35474,7 +35779,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 263, "interval": null, "links": [], "maxDataPoints": 100, @@ -35513,7 +35818,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -35714,7 +36019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 263, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -35915,7 +36220,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 266, "interval": null, "isNew": true, "legend": { @@ -36116,7 +36421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 267, "interval": null, "isNew": true, "legend": { @@ -36317,7 +36622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -36450,7 +36755,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -36583,7 +36888,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 270, "interval": null, "isNew": true, "legend": { @@ -36716,7 +37021,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 269, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -36849,7 +37154,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 270, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -36982,7 +37287,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 271, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -37122,7 +37427,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 272, + "id": 274, "interval": null, "legend": { "show": false @@ -37219,7 +37524,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 273, + "id": 275, "interval": null, "isNew": true, "legend": { @@ -37423,7 +37728,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 274, + "id": 276, "interval": null, "links": [], "maxDataPoints": 100, @@ -37462,7 +37767,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 275, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -37610,7 +37915,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 276, + "id": 278, "interval": null, "isNew": true, "legend": { @@ -37743,7 +38048,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 277, + "id": 279, "interval": null, "isNew": true, "legend": { @@ -37876,7 +38181,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 280, "interval": null, "isNew": true, "legend": { @@ -38012,7 +38317,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 281, "interval": null, "links": [], "maxDataPoints": 100, @@ -38051,7 +38356,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 280, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -38244,7 +38549,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 281, + "id": 283, "interval": null, "isNew": true, "legend": { @@ -38422,7 +38727,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 284, "interval": null, "isNew": true, "legend": { @@ -38630,7 +38935,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 283, + "id": 285, "interval": null, "isNew": true, "legend": { @@ -38808,7 +39113,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 284, + "id": 286, "interval": null, "isNew": true, "legend": { @@ -38971,7 +39276,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 285, + "id": 287, "interval": null, "isNew": true, "legend": { @@ -39149,7 +39454,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 288, "interval": null, "isNew": true, "legend": { @@ -39282,7 +39587,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 287, + "id": 289, "interval": null, "isNew": true, "legend": { @@ -39460,7 +39765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 288, + "id": 290, "interval": null, "isNew": true, "legend": { @@ -39593,7 +39898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 289, + "id": 291, "interval": null, "isNew": true, "legend": { @@ -39771,7 +40076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 290, + "id": 292, "interval": null, "isNew": true, "legend": { @@ -39904,7 +40209,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 291, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -40082,7 +40387,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -40260,7 +40565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 295, "interval": null, "isNew": true, "legend": { @@ -40393,7 +40698,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -40526,7 +40831,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -40659,7 +40964,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 298, "interval": null, "isNew": true, "legend": { @@ -40882,7 +41187,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 299, "interval": null, "isNew": true, "legend": { @@ -41075,7 +41380,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 298, + "id": 300, "interval": null, "isNew": true, "legend": { @@ -41238,7 +41543,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 301, "interval": null, "isNew": true, "legend": { @@ -41431,7 +41736,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 302, "interval": null, "isNew": true, "legend": { @@ -41579,7 +41884,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 303, "interval": null, "isNew": true, "legend": { @@ -41712,7 +42017,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 302, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -41860,7 +42165,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 303, + "id": 305, "interval": null, "isNew": true, "legend": { @@ -42038,7 +42343,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 306, "interval": null, "isNew": true, "legend": { @@ -42201,7 +42506,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -42379,7 +42684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -42512,7 +42817,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 309, "interval": null, "isNew": true, "legend": { @@ -42645,7 +42950,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 308, + "id": 310, "interval": null, "isNew": true, "legend": { @@ -42778,7 +43083,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 311, "interval": null, "isNew": true, "legend": { @@ -42911,7 +43216,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -43044,7 +43349,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 313, "interval": null, "isNew": true, "legend": { @@ -43177,7 +43482,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 314, "interval": null, "isNew": true, "legend": { @@ -43310,7 +43615,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 313, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -43511,7 +43816,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -43644,7 +43949,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 317, "interval": null, "isNew": true, "legend": { @@ -43829,7 +44134,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 316, + "id": 318, "interval": null, "legend": { "show": false @@ -43926,7 +44231,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 317, + "id": 319, "interval": null, "isNew": true, "legend": { @@ -44062,7 +44367,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 318, + "id": 320, "interval": null, "links": [], "maxDataPoints": 100, @@ -44101,7 +44406,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -44264,7 +44569,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 320, + "id": 322, "interval": null, "isNew": true, "legend": { @@ -44465,7 +44770,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 323, "interval": null, "isNew": true, "legend": { @@ -44613,7 +44918,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 322, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -44776,7 +45081,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 323, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -44977,7 +45282,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 326, "interval": null, "isNew": true, "legend": { @@ -45155,7 +45460,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 325, + "id": 327, "interval": null, "isNew": true, "legend": { @@ -45318,7 +45623,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -45481,7 +45786,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 327, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -45617,7 +45922,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 330, "interval": null, "links": [], "maxDataPoints": 100, @@ -45656,7 +45961,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -45804,7 +46109,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -45952,7 +46257,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 333, "interval": null, "isNew": true, "legend": { @@ -46085,7 +46390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 334, "interval": null, "isNew": true, "legend": { @@ -46218,7 +46523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 333, + "id": 335, "interval": null, "isNew": true, "legend": { @@ -46396,7 +46701,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 336, "interval": null, "isNew": true, "legend": { @@ -46529,7 +46834,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 335, + "id": 337, "interval": null, "isNew": true, "legend": { @@ -46707,7 +47012,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 338, "interval": null, "isNew": true, "legend": { @@ -46885,7 +47190,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 337, + "id": 339, "interval": null, "isNew": true, "legend": { @@ -47018,7 +47323,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 340, "interval": null, "isNew": true, "legend": { @@ -47196,7 +47501,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 339, + "id": 341, "interval": null, "isNew": true, "legend": { @@ -47329,7 +47634,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -47492,7 +47797,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 341, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -47670,7 +47975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 344, "interval": null, "isNew": true, "legend": { @@ -47848,7 +48153,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -48026,7 +48331,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -48159,7 +48464,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 345, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -48337,7 +48642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 348, "interval": null, "isNew": true, "legend": { @@ -48470,7 +48775,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 349, "interval": null, "isNew": true, "legend": { @@ -48648,7 +48953,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -48781,7 +49086,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 351, "interval": null, "isNew": true, "legend": { @@ -48914,7 +49219,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -49092,7 +49397,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 351, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -49270,7 +49575,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -49403,7 +49708,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 355, "interval": null, "isNew": true, "legend": { @@ -49581,7 +49886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 356, "interval": null, "isNew": true, "legend": { @@ -49714,7 +50019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 355, + "id": 357, "interval": null, "isNew": true, "legend": { @@ -49892,7 +50197,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -50028,7 +50333,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 359, "interval": null, "links": [], "maxDataPoints": 100, @@ -50067,7 +50372,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 360, "interval": null, "isNew": true, "legend": { @@ -50215,7 +50520,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 359, + "id": 361, "interval": null, "isNew": true, "legend": { @@ -50348,7 +50653,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 362, "interval": null, "isNew": true, "legend": { @@ -50549,7 +50854,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 361, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -50697,7 +51002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -50898,7 +51203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 365, "interval": null, "isNew": true, "legend": { @@ -51031,7 +51336,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 366, "interval": null, "isNew": true, "legend": { @@ -51164,7 +51469,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 365, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51297,7 +51602,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -51430,7 +51735,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 369, "interval": null, "isNew": true, "legend": { @@ -51570,7 +51875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 368, + "id": 370, "interval": null, "legend": { "show": false @@ -51670,7 +51975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 371, "interval": null, "links": [], "maxDataPoints": 100, @@ -51709,7 +52014,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 370, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -51842,7 +52147,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52005,7 +52310,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52153,7 +52458,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -52293,7 +52598,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 374, + "id": 376, "interval": null, "legend": { "show": false @@ -52397,7 +52702,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 375, + "id": 377, "interval": null, "legend": { "show": false @@ -52501,7 +52806,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 376, + "id": 378, "interval": null, "legend": { "show": false @@ -52598,7 +52903,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 377, + "id": 379, "interval": null, "isNew": true, "legend": { @@ -52738,7 +53043,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 378, + "id": 380, "interval": null, "legend": { "show": false @@ -52842,7 +53147,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 379, + "id": 381, "interval": null, "legend": { "show": false @@ -52946,7 +53251,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 380, + "id": 382, "interval": null, "legend": { "show": false @@ -53043,7 +53348,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 381, + "id": 383, "interval": null, "isNew": true, "legend": { @@ -53176,7 +53481,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 382, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53309,7 +53614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53449,7 +53754,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 384, + "id": 386, "interval": null, "legend": { "show": false @@ -53546,7 +53851,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -53682,7 +53987,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 388, "interval": null, "links": [], "maxDataPoints": 100, @@ -53721,7 +54026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 389, "interval": null, "isNew": true, "legend": { @@ -53854,7 +54159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -53987,7 +54292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54120,7 +54425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -54253,7 +54558,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -54386,7 +54691,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -54519,7 +54824,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -54659,7 +54964,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 394, + "id": 396, "interval": null, "legend": { "show": false @@ -54756,7 +55061,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 397, "interval": null, "isNew": true, "legend": { @@ -54889,7 +55194,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 396, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55029,7 +55334,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 397, + "id": 399, "interval": null, "legend": { "show": false @@ -55126,7 +55431,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -55259,7 +55564,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55392,7 +55697,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55525,7 +55830,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -55673,7 +55978,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -55821,7 +56126,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -55957,7 +56262,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 406, "interval": null, "links": [], "maxDataPoints": 100, @@ -55996,7 +56301,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 407, "interval": null, "isNew": true, "legend": { @@ -56129,7 +56434,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56262,7 +56567,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -56395,7 +56700,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56531,7 +56836,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 411, "interval": null, "links": [], "maxDataPoints": 100, @@ -56570,7 +56875,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 412, "interval": null, "isNew": true, "legend": { @@ -56733,7 +57038,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -56866,7 +57171,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57006,7 +57311,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 413, + "id": 415, "interval": null, "legend": { "show": false @@ -57110,7 +57415,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 414, + "id": 416, "interval": null, "legend": { "show": false @@ -57207,7 +57512,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 417, "interval": null, "isNew": true, "legend": { @@ -57362,7 +57667,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 416, + "id": 418, "interval": null, "legend": { "show": false @@ -57466,7 +57771,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 417, + "id": 419, "interval": null, "legend": { "show": false @@ -57570,7 +57875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 418, + "id": 420, "interval": null, "legend": { "show": false @@ -57667,7 +57972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 419, + "id": 421, "interval": null, "isNew": true, "legend": { @@ -57837,7 +58142,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 420, + "id": 422, "interval": null, "legend": { "show": false @@ -57934,7 +58239,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 423, "interval": null, "isNew": true, "legend": { @@ -58135,7 +58440,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 422, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58336,7 +58641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -58469,7 +58774,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -58632,7 +58937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -58765,7 +59070,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -58898,7 +59203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59099,7 +59404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59239,7 +59544,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 429, + "id": 431, "interval": null, "legend": { "show": false @@ -59343,7 +59648,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 430, + "id": 432, "interval": null, "legend": { "show": false @@ -59447,7 +59752,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 431, + "id": 433, "interval": null, "legend": { "show": false @@ -59551,7 +59856,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 432, + "id": 434, "interval": null, "legend": { "show": false @@ -59655,7 +59960,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 433, + "id": 435, "interval": null, "legend": { "show": false @@ -59759,7 +60064,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 434, + "id": 436, "interval": null, "legend": { "show": false @@ -59863,7 +60168,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 435, + "id": 437, "interval": null, "legend": { "show": false @@ -59960,7 +60265,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 436, + "id": 438, "interval": null, "isNew": true, "legend": { @@ -60108,7 +60413,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 437, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60241,7 +60546,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -60374,7 +60679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60522,7 +60827,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60658,7 +60963,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 443, "interval": null, "links": [], "maxDataPoints": 100, @@ -60697,7 +61002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 444, "interval": null, "isNew": true, "legend": { @@ -60830,7 +61135,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -60963,7 +61268,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61096,7 +61401,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -61229,7 +61534,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 446, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -61377,7 +61682,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -61581,7 +61886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 450, "interval": null, "links": [], "maxDataPoints": 100, @@ -61632,7 +61937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 451, "interval": null, "links": [], "maxDataPoints": 100, @@ -61728,7 +62033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 450, + "id": 452, "interval": null, "links": [], "maxDataPoints": 100, @@ -61803,7 +62108,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 451, + "id": 453, "interval": null, "links": [], "maxDataPoints": 100, @@ -61878,7 +62183,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 452, + "id": 454, "interval": null, "links": [], "maxDataPoints": 100, @@ -61953,7 +62258,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 455, "interval": null, "links": [], "maxDataPoints": 100, @@ -62028,7 +62333,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 456, "interval": null, "links": [], "maxDataPoints": 100, @@ -62103,7 +62408,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 455, + "id": 457, "interval": null, "links": [], "maxDataPoints": 100, @@ -62178,7 +62483,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 456, + "id": 458, "interval": null, "links": [], "maxDataPoints": 100, @@ -62257,7 +62562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -62390,7 +62695,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62523,7 +62828,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -62656,7 +62961,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -62789,7 +63094,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -62922,7 +63227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63070,7 +63375,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -63203,7 +63508,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 466, "interval": null, "isNew": true, "legend": { @@ -63336,7 +63641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 467, "interval": null, "isNew": true, "legend": { @@ -63502,7 +63807,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 466, + "id": 468, "interval": null, "legend": { "show": false @@ -63606,7 +63911,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 467, + "id": 469, "interval": null, "legend": { "show": false @@ -63710,7 +64015,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 468, + "id": 470, "interval": null, "legend": { "show": false @@ -63814,7 +64119,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 471, "interval": null, "legend": { "show": false @@ -63918,7 +64223,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 472, "interval": null, "legend": { "show": false @@ -64022,7 +64327,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 473, "interval": null, "legend": { "show": false @@ -64126,7 +64431,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 472, + "id": 474, "interval": null, "legend": { "show": false @@ -64230,7 +64535,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 475, "interval": null, "legend": { "show": false @@ -64327,7 +64632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 474, + "id": 476, "interval": null, "isNew": true, "legend": { @@ -64460,7 +64765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 475, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -64593,7 +64898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -64726,7 +65031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -64859,7 +65164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -64992,7 +65297,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -65125,7 +65430,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -65265,7 +65570,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 481, + "id": 483, "interval": null, "legend": { "show": false @@ -65369,7 +65674,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 482, + "id": 484, "interval": null, "legend": { "show": false @@ -65466,7 +65771,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 483, + "id": 485, "interval": null, "isNew": true, "legend": { @@ -65599,7 +65904,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 484, + "id": 486, "interval": null, "isNew": true, "legend": { @@ -65732,7 +66037,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 485, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -65865,7 +66170,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -65998,7 +66303,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66131,7 +66436,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66267,7 +66572,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 491, "interval": null, "links": [], "maxDataPoints": 100, @@ -66306,7 +66611,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 492, "interval": null, "isNew": true, "legend": { @@ -66439,7 +66744,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -66572,7 +66877,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -66705,7 +67010,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 495, "interval": null, "isNew": true, "legend": { diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 330822d3c7bc..9fc86c869868 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -dbcc3ef2b588c133dbe4b56196abb366da5b25631f6d42bbc6ae1811b21bbec5 ./metrics/grafana/tikv_details.json +78b891e1edbbaa68d2c0638cd258ff0d80315e66f412225905434e63b6a14692 ./metrics/grafana/tikv_details.json diff --git a/src/server/metrics.rs b/src/server/metrics.rs index cef725c3f28b..3ad9c5bdde0f 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -409,6 +409,13 @@ lazy_static! { &["type", "store_id"] ) .unwrap(); + pub static ref RAFT_CLIENT_WAIT_CONN_READY_DURATION_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( + "tikv_server_raft_client_wait_ready_duration", + "Duration of wait raft client connection ready", + &["to"], + exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s + ) + .unwrap(); pub static ref RAFT_MESSAGE_FLUSH_COUNTER: RaftMessageFlushCounterVec = register_static_int_counter_vec!( RaftMessageFlushCounterVec, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index b120011c490e..700d409c1297 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -40,6 +40,7 @@ use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, lru::LruCache, + time::duration_to_sec, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, }; @@ -814,7 +815,13 @@ async fn start( let mut last_wake_time = None; let backoff_duration = back_end.builder.cfg.value().raft_client_max_backoff.0; let mut addr_channel = None; + let mut begin = None; + let mut try_count = 0; loop { + if begin.is_none() { + begin = Some(Instant::now()); + } + try_count += 1; maybe_backoff(backoff_duration, &mut last_wake_time).await; let f = back_end.resolve(); let addr = match f.await { @@ -862,7 +869,19 @@ async fn start( .report_store_unreachable(back_end.store_id); continue; } else { - debug!("connection established"; "store_id" => back_end.store_id, "addr" => %addr); + let wait_conn_duration = begin.unwrap_or_else(Instant::now).elapsed(); + info!("connection established"; + "store_id" => back_end.store_id, + "addr" => %addr, + "cost" => ?wait_conn_duration, + "msg_count" => ?back_end.queue.len(), + "try_count" => try_count, + ); + RAFT_CLIENT_WAIT_CONN_READY_DURATION_HISTOGRAM_VEC + .with_label_values(&[addr.as_str()]) + .observe(duration_to_sec(wait_conn_duration)); + begin = None; + try_count = 0; } let client = TikvClient::new(channel); From 1f384cfce5b41f49f19a06de33cf32d2f2a2eaf4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Dec 2023 14:19:52 +0800 Subject: [PATCH 197/203] *: do not suffix "fips" for tikv version ouput (#16201) close tikv/tikv#16200 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6d1cc5159074..acccb2f55e57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,11 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if crypto::fips::can_enable() { - format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") - } else { - env!("CARGO_PKG_VERSION").to_owned() - } + env!("CARGO_PKG_VERSION").to_owned() } /// Prints the tikv version information to the standard output. From d7959b8194b4607175050289e7c093b3d5caad80 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 20 Dec 2023 15:52:53 +0800 Subject: [PATCH 198/203] txn: change memory pessimsitic lock to btree map and support scan (#16180) ref tikv/tikv#15066 Change in-memory pessimsitic locks from hash map to btree map, support collecting pessimistic locks for scan lock command. Then: 1. GC could collect expired pessimistic locks. 2. Pessimistic rollback could use read scan first and then clean up expired pessimistic locks at one time. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/txn_ext.rs | 264 +++++++++++++++++++--- src/storage/mod.rs | 144 +++++++++++- src/storage/mvcc/metrics.rs | 17 ++ src/storage/mvcc/reader/reader.rs | 112 +++++---- tests/integrations/server/kv_service.rs | 163 +++++++++++++ 5 files changed, 624 insertions(+), 76 deletions(-) diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 0091fd4e7bb8..ae352ea08abc 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -1,16 +1,16 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + collections::{BTreeMap, Bound}, fmt, sync::atomic::{AtomicU64, Ordering}, }; -use collections::HashMap; use kvproto::metapb; use lazy_static::lazy_static; use parking_lot::RwLock; use prometheus::{register_int_gauge, IntGauge}; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, Lock, PessimisticLock}; /// Transaction extensions related to a peer. #[derive(Default)] @@ -106,7 +106,7 @@ pub struct PeerPessimisticLocks { /// skipped because of version mismatch. So, no lock should be deleted. /// It's correct that we include the locks that are marked deleted in the /// commit merge request. - map: HashMap, + map: BTreeMap, /// Status of the pessimistic lock map. /// The map is writable only in the Normal state. pub status: LocksStatus, @@ -143,7 +143,7 @@ impl fmt::Debug for PeerPessimisticLocks { impl Default for PeerPessimisticLocks { fn default() -> Self { PeerPessimisticLocks { - map: HashMap::default(), + map: BTreeMap::default(), status: LocksStatus::Normal, term: 0, version: 0, @@ -192,7 +192,7 @@ impl PeerPessimisticLocks { } pub fn clear(&mut self) { - self.map = HashMap::default(); + self.map = BTreeMap::default(); GLOBAL_MEM_SIZE.sub(self.memory_size as i64); self.memory_size = 0; } @@ -244,12 +244,20 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.drain_filter(|key, _| { - let key = &**key.as_encoded(); + // There is no drain_filter for BtreeMap, so extra clone are needed. + let mut removed_locks = Vec::new(); + self.map.retain(|key, value| { + let key_ref = key.as_encoded().as_slice(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); - key < start_key || (!end_key.is_empty() && key >= end_key) + if key_ref < start_key || (!end_key.is_empty() && key_ref >= end_key) { + removed_locks.push((key.clone(), value.clone())); + false + } else { + true + } }); - for (key, (lock, _)) in removed_locks { + + for (key, (lock, _)) in removed_locks.into_iter() { let idx = match regions .binary_search_by_key(&&**key.as_encoded(), |region| region.get_start_key()) { @@ -264,6 +272,37 @@ impl PeerPessimisticLocks { res } + /// Scan and return locks in the current pessimistic lock map, the map + /// should be locked first before calling this method. + pub fn scan_locks( + &self, + start: Option<&Key>, + end: Option<&Key>, + filter: F, + limit: usize, + ) -> (Vec<(Key, Lock)>, bool) + where + F: Fn(&Key, &PessimisticLock) -> bool, + { + if let (Some(start_key), Some(end_key)) = (start, end) { + assert!(end_key >= start_key); + } + let mut locks = Vec::with_capacity(limit); + let mut iter = self.map.range(( + start.map_or(Bound::Unbounded, |k| Bound::Included(k)), + end.map_or(Bound::Unbounded, |k| Bound::Excluded(k)), + )); + while let Some((key, (lock, _))) = iter.next() { + if filter(key, lock) { + locks.push((key.clone(), lock.clone().into_lock())); + } + if limit > 0 && locks.len() >= limit { + return (locks, iter.next().is_some()); + } + } + (locks, false) + } + #[cfg(test)] fn from_locks(locks: impl IntoIterator) -> Self { let mut res = PeerPessimisticLocks::default(); @@ -277,7 +316,7 @@ impl PeerPessimisticLocks { impl<'a> IntoIterator for &'a PeerPessimisticLocks { type Item = (&'a Key, &'a (PessimisticLock, bool)); - type IntoIter = std::collections::hash_map::Iter<'a, Key, (PessimisticLock, bool)>; + type IntoIter = std::collections::btree_map::Iter<'a, Key, (PessimisticLock, bool)>; fn into_iter(self) -> Self::IntoIter { self.map.iter() @@ -331,6 +370,24 @@ mod tests { } } + fn lock_with_key(key: &[u8], deleted: bool) -> (Key, (PessimisticLock, bool)) { + ( + Key::from_raw(key), + ( + PessimisticLock { + primary: key.to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 20.into(), + last_change: LastChange::make_exist(5.into(), 2), + is_locked_with_conflict: false, + }, + deleted, + ), + ) + } + #[test] fn test_memory_size() { let _guard = TEST_MUTEX.lock().unwrap(); @@ -418,23 +475,6 @@ mod tests { #[test] fn test_group_locks_by_regions() { - fn lock(key: &[u8], deleted: bool) -> (Key, (PessimisticLock, bool)) { - ( - Key::from_raw(key), - ( - PessimisticLock { - primary: key.to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 20.into(), - last_change: LastChange::make_exist(5.into(), 2), - is_locked_with_conflict: false, - }, - deleted, - ), - ) - } fn region(start_key: &[u8], end_key: &[u8]) -> metapb::Region { let mut region = metapb::Region::default(); region.set_start_key(start_key.to_vec()); @@ -445,11 +485,11 @@ mod tests { defer!(GLOBAL_MEM_SIZE.set(0)); let mut original = PeerPessimisticLocks::from_locks(vec![ - lock(b"a", true), - lock(b"c", false), - lock(b"e", true), - lock(b"g", false), - lock(b"i", false), + lock_with_key(b"a", true), + lock_with_key(b"c", false), + lock_with_key(b"e", true), + lock_with_key(b"g", false), + lock_with_key(b"i", false), ]); let regions = vec![ region(b"", b"b"), // test leftmost region @@ -460,10 +500,10 @@ mod tests { ]; let output = original.group_by_regions(®ions, ®ions[4]); let expected: Vec<_> = vec![ - vec![lock(b"a", false)], + vec![lock_with_key(b"a", false)], vec![], - vec![lock(b"c", false)], - vec![lock(b"e", false), lock(b"g", false)], + vec![lock_with_key(b"c", false)], + vec![lock_with_key(b"e", false), lock_with_key(b"g", false)], vec![], // the position of the derived region is empty ] .into_iter() @@ -473,7 +513,159 @@ mod tests { // The lock that belongs to the derived region is kept in the original map. assert_eq!( original, - PeerPessimisticLocks::from_locks(vec![lock(b"i", false)]) + PeerPessimisticLocks::from_locks(vec![lock_with_key(b"i", false)]) ); } + + #[test] + fn test_scan_memory_lock() { + // Create a sample PeerPessimisticLocks instance with some locks. + let peer_locks = PeerPessimisticLocks::from_locks(vec![ + lock_with_key(b"key1", false), + lock_with_key(b"key2", false), + lock_with_key(b"key3", false), + ]); + + fn txn_lock(key: &[u8], deleted: bool) -> Lock { + let (_, (pessimistic_lock, _)) = lock_with_key(key, deleted); + pessimistic_lock.into_lock() + } + + let filter_pass_all = |_key: &Key, _lock: &PessimisticLock| true; + let filter_pass_key2 = + |key: &Key, _lock: &PessimisticLock| key.as_encoded().starts_with(b"key2"); + + // Case parameter: start_key, end_key, filter, limit, expected results, expected + // has more. + type LockFilter = fn(&Key, &PessimisticLock) -> bool; + let cases: [( + Option, + Option, + LockFilter, + usize, + Vec<(Key, Lock)>, + bool, + ); 12] = [ + ( + None, + None, + filter_pass_all, + 1, + vec![(Key::from_raw(b"key1"), txn_lock(b"key1", false))], + true, + ), + ( + None, + None, + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key0")), + Some(Key::from_raw(b"key1")), + filter_pass_all, + 10, + vec![], + false, + ), + ( + Some(Key::from_raw(b"key0")), + Some(Key::from_raw(b"key2")), + filter_pass_all, + 10, + vec![(Key::from_raw(b"key1"), txn_lock(b"key1", false))], + false, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key3")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 2, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + ], + true, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key2")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key4")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![], + false, + ), + ( + None, + None, + filter_pass_key2, + 10, + vec![(Key::from_raw(b"key2"), txn_lock(b"key2", false))], + false, + ), + ( + Some(Key::from_raw(b"key2")), + None, + filter_pass_key2, + 1, + vec![(Key::from_raw(b"key2"), txn_lock(b"key2", false))], + true, + ), + ( + None, + Some(Key::from_raw(b"key2")), + filter_pass_key2, + 1, + vec![], + false, + ), + ]; + + for (start_key, end_key, filter, limit, expected_locks, expected_has_more) in cases { + let (locks, has_more) = + peer_locks.scan_locks(start_key.as_ref(), end_key.as_ref(), filter, limit); + assert_eq!(locks, expected_locks); + assert_eq!(has_more, expected_has_more); + } + } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2bdc07625ee0..13d868849f48 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -80,6 +80,7 @@ use engine_traits::{ raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use futures::{future::Either, prelude::*}; +use itertools::Itertools; use kvproto::{ kvrpcpb::{ ApiVersion, ChecksumAlgorithm, CommandPri, Context, GetRequest, IsolationLevel, KeyRange, @@ -1458,6 +1459,15 @@ impl Storage { Some(ScanMode::Forward), !ctx.get_not_fill_cache(), ); + let memory_locks = reader + .load_in_memory_pessimisitic_lock_range( + start_key.as_ref(), + end_key.as_ref(), + |_, lock| lock.start_ts <= max_ts, + limit, + ) + .map_err(txn::Error::from); + let (memory_lock_kv_pairs, _) = memory_locks?; let result = reader .scan_locks( start_key.as_ref(), @@ -1468,8 +1478,18 @@ impl Storage { .map_err(txn::Error::from); statistics.add(&reader.statistics); let (kv_pairs, _) = result?; - let mut locks = Vec::with_capacity(kv_pairs.len()); - for (key, lock) in kv_pairs { + + // Merge the results from in-memory pessimistic locks and the lock cf. + // The result order is decided by the key. + let memory_lock_iter = memory_lock_kv_pairs.into_iter(); + let lock_iter = kv_pairs.into_iter(); + let merged_iter = memory_lock_iter + .merge_by(lock_iter, |(memory_key, _), (key, _)| memory_key <= key); + let mut locks = Vec::with_capacity(limit); + for (key, lock) in merged_iter { + if limit > 0 && locks.len() >= limit { + break; + } let lock_info = lock.into_lock_info(key.into_raw().map_err(txn::Error::from)?); locks.push(lock_info); @@ -7396,6 +7416,126 @@ mod tests { ); } + #[test] + fn test_scan_lock_with_memory_lock() { + for in_memory_pessimistic_lock_enabled in [false, true] { + let txn_ext = Arc::new(TxnExt::default()); + let lock_mgr = MockLockManager::new(); + let storage = TestStorageBuilderApiV1::new(lock_mgr.clone()) + .pipelined_pessimistic_lock(in_memory_pessimistic_lock_enabled) + .in_memory_pessimistic_lock(in_memory_pessimistic_lock_enabled) + .build_for_txn(txn_ext.clone()) + .unwrap(); + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::AcquirePessimisticLock::new( + vec![(Key::from_raw(b"a"), false), (Key::from_raw(b"b"), false)], + b"a".to_vec(), + 20.into(), + 3000, + true, + 20.into(), + Some(WaitTimeout::Millis(1000)), + false, + 21.into(), + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + if in_memory_pessimistic_lock_enabled { + // Check if the lock exists in the memory buffer. + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + let lock = pessimistic_locks.get(&Key::from_raw(b"a")).unwrap(); + assert_eq!( + lock, + &( + PessimisticLock { + primary: Box::new(*b"a"), + start_ts: 20.into(), + ttl: 3000, + for_update_ts: 20.into(), + min_commit_ts: 21.into(), + last_change: LastChange::NotExist, + is_locked_with_conflict: false, + }, + false + ) + ); + } + + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![ + Mutation::make_put(Key::from_raw(b"x"), b"foo".to_vec()), + Mutation::make_put(Key::from_raw(b"y"), b"foo".to_vec()), + Mutation::make_put(Key::from_raw(b"z"), b"foo".to_vec()), + ], + b"x".to_vec(), + 10.into(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + + let (lock_a, lock_b, lock_x, lock_y, lock_z) = ( + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"a".to_vec()); + lock.set_lock_version(20); + lock.set_lock_for_update_ts(20); + lock.set_key(b"a".to_vec()); + lock.set_min_commit_ts(21); + lock.set_lock_type(Op::PessimisticLock); + lock.set_lock_ttl(3000); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"a".to_vec()); + lock.set_lock_version(20); + lock.set_lock_for_update_ts(20); + lock.set_key(b"b".to_vec()); + lock.set_min_commit_ts(21); + lock.set_lock_type(Op::PessimisticLock); + lock.set_lock_ttl(3000); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"x".to_vec()); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"y".to_vec()); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"z".to_vec()); + lock + }, + ); + let res = block_on(storage.scan_lock(Context::default(), 101.into(), None, None, 10)) + .unwrap(); + assert_eq!(res, vec![lock_a, lock_b, lock_x, lock_y, lock_z,]); + } + } + #[test] fn test_scan_lock() { let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index 22d2760a7692..eaef1134d815 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -36,6 +36,11 @@ make_static_metric! { write_not_loaded_skip } + pub label_enum ScanLockReadTimeSource { + resolve_lock, + pessimistic_rollback, + } + pub struct MvccConflictCounterVec: IntCounter { "type" => MvccConflictKind, } @@ -58,6 +63,10 @@ make_static_metric! { retry_req, }, } + + pub struct ScanLockReadTimeVec: Histogram { + "type" => ScanLockReadTimeSource, + } } lazy_static! { @@ -120,4 +129,12 @@ lazy_static! { ) .unwrap() }; + pub static ref SCAN_LOCK_READ_TIME_VEC: ScanLockReadTimeVec = register_static_histogram_vec!( + ScanLockReadTimeVec, + "tikv_storage_mvcc_scan_lock_read_duration_seconds", + "Bucketed histogram of memory lock read lock hold for scan lock", + &["type"], + exponential_buckets(0.00001, 2.0, 20).unwrap() + ) + .unwrap(); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 48158eda946a..257789b4765a 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -8,9 +8,12 @@ use kvproto::{ errorpb::{self, EpochNotMatch, FlashbackInProgress, StaleCommand}, kvrpcpb::Context, }; -use raftstore::store::LocksStatus; +use raftstore::store::{LocksStatus, PeerPessimisticLocks}; use tikv_kv::{SnapshotExt, SEEK_BOUND}; -use txn_types::{Key, LastChange, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; +use tikv_util::time::Instant; +use txn_types::{ + Key, LastChange, Lock, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteRef, WriteType, +}; use crate::storage::{ kv::{ @@ -18,6 +21,7 @@ use crate::storage::{ }, mvcc::{ default_not_found_error, + metrics::SCAN_LOCK_READ_TIME_VEC, reader::{OverlappedWrite, TxnCommitRecord}, Result, }, @@ -251,44 +255,76 @@ impl MvccReader { Ok(res) } - fn load_in_memory_pessimistic_lock(&self, key: &Key) -> Result> { - self.snapshot - .ext() - .get_txn_ext() - .and_then(|txn_ext| { - // If the term or region version has changed, do not read the lock table. - // Instead, just return a StaleCommand or EpochNotMatch error, so the - // client will not receive a false error because the lock table has been - // cleared. - let locks = txn_ext.pessimistic_locks.read(); - if self.term != 0 && locks.term != self.term { - let mut err = errorpb::Error::default(); - err.set_stale_command(StaleCommand::default()); - return Some(Err(KvError::from(err).into())); - } - if self.version != 0 && locks.version != self.version { - let mut err = errorpb::Error::default(); - // We don't know the current regions. Just return an empty EpochNotMatch error. - err.set_epoch_not_match(EpochNotMatch::default()); - return Some(Err(KvError::from(err).into())); - } - // If the region is in the flashback state, it should not be allowed to read the - // locks. - if locks.status == LocksStatus::IsInFlashback && !self.allow_in_flashback { - let mut err = errorpb::Error::default(); - err.set_flashback_in_progress(FlashbackInProgress::default()); - return Some(Err(KvError::from(err).into())); + fn check_term_version_status(&self, locks: &PeerPessimisticLocks) -> Result<()> { + // If the term or region version has changed, do not read the lock table. + // Instead, just return a StaleCommand or EpochNotMatch error, so the + // client will not receive a false error because the lock table has been + // cleared. + if self.term != 0 && locks.term != self.term { + let mut err = errorpb::Error::default(); + err.set_stale_command(StaleCommand::default()); + return Err(KvError::from(err).into()); + } + if self.version != 0 && locks.version != self.version { + let mut err = errorpb::Error::default(); + err.set_epoch_not_match(EpochNotMatch::default()); + return Err(KvError::from(err).into()); + } + if locks.status == LocksStatus::IsInFlashback && !self.allow_in_flashback { + let mut err = errorpb::Error::default(); + err.set_flashback_in_progress(FlashbackInProgress::default()); + return Err(KvError::from(err).into()); + } + Ok(()) + } + + pub fn load_in_memory_pessimisitic_lock_range( + &self, + start_key: Option<&Key>, + end_key: Option<&Key>, + filter: F, + scan_limit: usize, + ) -> Result<(Vec<(Key, Lock)>, bool)> + where + F: Fn(&Key, &PessimisticLock) -> bool, + { + if let Some(txn_ext) = self.snapshot.ext().get_txn_ext() { + let begin_instant = Instant::now(); + let res = match self.check_term_version_status(&txn_ext.pessimistic_locks.read()) { + Ok(_) => { + // Scan locks within the specified range and filter by max_ts. + Ok(txn_ext + .pessimistic_locks + .read() + .scan_locks(start_key, end_key, filter, scan_limit)) } + Err(e) => Err(e), + }; + let elapsed = begin_instant.saturating_elapsed(); + SCAN_LOCK_READ_TIME_VEC + .resolve_lock + .observe(elapsed.as_secs_f64()); - locks.get(key).map(|(lock, _)| { - // For write commands that are executed in serial, it should be impossible - // to read a deleted lock. - // For read commands in the scheduler, it should read the lock marked deleted - // because the lock is not actually deleted from the underlying storage. - Ok(lock.to_lock()) - }) - }) - .transpose() + res + } else { + Ok((vec![], false)) + } + } + + fn load_in_memory_pessimistic_lock(&self, key: &Key) -> Result> { + if let Some(txn_ext) = self.snapshot.ext().get_txn_ext() { + let locks = txn_ext.pessimistic_locks.read(); + self.check_term_version_status(&locks)?; + Ok(locks.get(key).map(|(lock, _)| { + // For write commands that are executed in serial, it should be impossible + // to read a deleted lock. + // For read commands in the scheduler, it should read the lock marked deleted + // because the lock is not actually deleted from the underlying storage. + lock.to_lock() + })) + } else { + Ok(None) + } } fn get_scan_mode(&self, allow_backward: bool) -> ScanMode { diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 4e087bb07b05..845ae2bc9699 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2759,3 +2759,166 @@ fn test_pessimistic_lock_execution_tracking() { handle.join().unwrap(); } + +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] +fn test_mvcc_scan_memory_and_cf_locks() { + let (cluster, client, ctx) = new_cluster(); + + // Create both pessimistic and prewrite locks. + // The peer in memory limit is 512KiB, generate 1KiB key for pessimistic lock. + // So Writing 512 pessimistic locks may exceed the memory limit and later + // pessimistic locks would be written to the lock cf. + let byte_slice: &[u8] = &[b'k'; 512]; + let start_ts = 11; + let prewrite_start_ts = start_ts - 1; + let num_keys = 1040; + let prewrite_primary_key = b"prewrite_primary"; + let val = b"value"; + let format_key = |i| format!("{:?}{:04}", byte_slice, i).as_bytes().to_vec(); + for i in 0..num_keys { + let key = format_key(i); + if i % 2 == 0 { + must_kv_pessimistic_lock(&client, ctx.clone(), key, start_ts); + } else { + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(key); + mutation.set_value(val.to_vec()); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![], + prewrite_primary_key.to_vec(), + start_ts - 1, + 0, + false, + false, + ); + } + } + // Ensure the pessimistic locks are written to the memory. The first key should + // be written into the memory and the last key should be put to lock cf as + // memory limit is exceeded. + let engine = cluster.get_engine(1); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(format_key(0).as_slice()).as_encoded()).as_slice(), + ) + .unwrap(); + assert!(cf_res.is_none()); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(format_key(num_keys - 2).as_slice()).as_encoded()) + .as_slice(), + ) + .unwrap(); + assert!(cf_res.is_some()); + + // Scan lock, the pessimistic and prewrite results are returned. + // When limit is 0 or it's larger than num_keys, all keys should be returned. + // When limit is less than 512, in-memory pessimistic locks and prewrite locks + // should be returned. + // When limit is larger than 512, in-memory and lock cf pessimistic locks and + // prewrite locks should be returned. + for scan_limit in [0, 128, 256, 512, num_keys, num_keys * 2] { + let scan_ts = 20; + let scan_lock_max_version = scan_ts; + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = scan_lock_max_version; + scan_lock_req.limit = scan_limit as u32; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + let expected_key_num = if scan_limit == 0 || scan_limit >= num_keys { + num_keys + } else { + scan_limit + }; + assert_eq!(scan_lock_resp.locks.len(), expected_key_num); + + for (i, lock_info) in (0..expected_key_num).zip(scan_lock_resp.locks.iter()) { + let key = format_key(i); + if i % 2 == 0 { + assert_eq!(lock_info.lock_type, Op::PessimisticLock); + assert_eq!(lock_info.lock_version, start_ts); + assert_eq!(lock_info.key, key); + } else { + assert_eq!( + lock_info.lock_type, + Op::Put, + "i={:?} lock_info={:?} expected_key_num={:?}, scan_limit={:?}", + i, + lock_info, + expected_key_num, + scan_limit + ); + assert_eq!(lock_info.primary_lock, prewrite_primary_key); + assert_eq!(lock_info.lock_version, prewrite_start_ts); + assert_eq!(lock_info.key, key); + } + } + } + + // Scan with smaller ts returns empty result. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = prewrite_start_ts - 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), 0); + + // Roll back the prewrite locks. + let rollback_start_version = prewrite_start_ts; + let mut rollback_req = BatchRollbackRequest::default(); + rollback_req.set_context(ctx.clone()); + rollback_req.start_version = rollback_start_version; + let keys = (0..num_keys) + .filter(|i| i % 2 != 0) + .map(|i| format_key(i)) + .collect(); + rollback_req.set_keys(keys); + let rollback_resp = client.kv_batch_rollback(&rollback_req).unwrap(); + assert!(!rollback_resp.has_region_error()); + assert!(!rollback_resp.has_error()); + + // Scan lock again after removing prewrite locks. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = start_ts + 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), num_keys / 2); + for (i, lock_info) in (0..num_keys / 2).zip(scan_lock_resp.locks.iter()) { + let key = format_key(i * 2); + assert_eq!(lock_info.lock_version, start_ts); + assert_eq!(lock_info.key, key); + assert_eq!(lock_info.lock_type, Op::PessimisticLock); + } + + // Pessimistic rollabck all the locks. Scan lock should return empty result. + let mut pessimsitic_rollback_req = PessimisticRollbackRequest::default(); + pessimsitic_rollback_req.start_version = start_ts; + pessimsitic_rollback_req.for_update_ts = start_ts; + pessimsitic_rollback_req.set_context(ctx.clone()); + let keys = (0..num_keys) + .filter(|i| i % 2 == 0) + .map(|i| format_key(i)) + .collect(); + pessimsitic_rollback_req.set_keys(keys); + let pessimistic_rollback_resp = client + .kv_pessimistic_rollback(&pessimsitic_rollback_req) + .unwrap(); + assert!(!pessimistic_rollback_resp.has_region_error()); + + // Scan lock again after all the cleanup. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx); + scan_lock_req.max_version = start_ts + 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), 0); +} From 75cd21cbaf008fcab937de70fa731703d999aedf Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Dec 2023 19:45:22 +0800 Subject: [PATCH 199/203] server: add metrics for status server API (#16198) close tikv/tikv#16197 This commit add histograms for status server APIs, so that we can inspect the duration for each API request. It helps us to identify any correlations between specific API requests and potential incidents. Signed-off-by: Neil Shen --- metrics/grafana/common.py | 2 + metrics/grafana/tikv_details.dashboard.py | 32 ++ metrics/grafana/tikv_details.json | 376 ++++++++++++++++++++++ metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/status_server/metrics.rs | 13 + src/server/status_server/mod.rs | 24 +- 6 files changed, 445 insertions(+), 4 deletions(-) create mode 100644 src/server/status_server/metrics.rs diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index 2c2ed7570ed2..7f15c06998f8 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -981,6 +981,7 @@ def graph_panel_histogram_quantiles( metric: str, label_selectors: list[str] = [], by_labels: list[str] = [], + hide_p9999=False, hide_avg=False, hide_count=False, ) -> Panel: @@ -1013,6 +1014,7 @@ def legend(prefix, labels): by_labels=by_labels, ), legend_format=legend("99.99%", by_labels), + hide=hide_p9999, ), target( expr=expr_histogram_quantile( diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 291597409678..966346f741ec 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -8568,6 +8568,37 @@ def SlowTrendStatistics() -> RowPanel: return layout.row_panel +def StatusServer() -> RowPanel: + layout = Layout(title="Status Server") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Status API Request Duration", + description="The 99 quantile durtion of status server API requests", + metric="tikv_status_server_request_duration_seconds", + yaxes=yaxes(left_format=UNITS.SECONDS), + by_labels=["path"], + hide_p9999=True, + hide_count=True, + hide_avg=True, + ), + graph_panel( + title="Status API Request (op/s)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_status_server_request_duration_seconds_count", + by_labels=["path"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + #### Metrics Definition End #### @@ -8619,6 +8650,7 @@ def SlowTrendStatistics() -> RowPanel: Encryption(), BackupLog(), SlowTrendStatistics(), + StatusServer(), ], # Set 14 or larger to support shared crosshair or shared tooltip. # See https://github.com/grafana/grafana/blob/v10.2.2/public/app/features/dashboard/state/DashboardMigrator.ts#L443-L445 diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 88821ac75381..3af1480137c4 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -67122,6 +67122,382 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 496, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The 99 quantile durtion of status server API requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 497, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{path}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{path}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{path}}", + "metric": "", + "query": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{path}}", + "metric": "", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status API Request Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 498, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "metric": "", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status API Request (op/s)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Status Server", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "1m", diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 9fc86c869868..d715ccca3519 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -78b891e1edbbaa68d2c0638cd258ff0d80315e66f412225905434e63b6a14692 ./metrics/grafana/tikv_details.json +ac3bba8b714ed9cad64ece88ff1f7b4bb075ca178f270e7e1f41461d5ac37bbd ./metrics/grafana/tikv_details.json diff --git a/src/server/status_server/metrics.rs b/src/server/status_server/metrics.rs new file mode 100644 index 000000000000..9786ebd0a109 --- /dev/null +++ b/src/server/status_server/metrics.rs @@ -0,0 +1,13 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use prometheus::{exponential_buckets, register_histogram_vec, HistogramVec}; + +lazy_static::lazy_static! { + pub static ref STATUS_REQUEST_DURATION: HistogramVec = register_histogram_vec!( + "tikv_status_server_request_duration_seconds", + "Bucketed histogram of TiKV status server request duration", + &["method", "path"], + exponential_buckets(0.0001, 2.0, 24).unwrap() // 0.1ms ~ 1677.7s + ) + .unwrap(); +} diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index ff8909fa8521..90c966d13e24 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -1,7 +1,9 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +mod metrics; /// Provides profilers for TiKV. mod profile; + use std::{ env::args, error::Error as StdError, @@ -33,6 +35,7 @@ use hyper::{ Body, Method, Request, Response, Server, StatusCode, }; use kvproto::resource_manager::ResourceGroup; +use metrics::STATUS_REQUEST_DURATION; use online_config::OnlineConfig; use openssl::{ ssl::{Ssl, SslAcceptor, SslContext, SslFiletype, SslMethod, SslVerifyMode}, @@ -645,7 +648,9 @@ where )); } - match (method, path.as_ref()) { + let mut is_unknown_path = false; + let start = Instant::now(); + let res = match (method.clone(), path.as_ref()) { (Method::GET, "/metrics") => { Self::handle_get_metrics(req, &cfg_controller) } @@ -717,8 +722,21 @@ where (Method::PUT, "/resume_grpc") => { Self::handle_resume_grpc(grpc_service_mgr).await } - _ => Ok(make_response(StatusCode::NOT_FOUND, "path not found")), - } + _ => { + is_unknown_path = true; + Ok(make_response(StatusCode::NOT_FOUND, "path not found")) + }, + }; + // Using "unknown" for unknown paths to void creating high cardinality. + let path_label = if is_unknown_path { + "unknown".to_owned() + } else { + path + }; + STATUS_REQUEST_DURATION + .with_label_values(&[method.as_str(), &path_label]) + .observe(start.elapsed().as_secs_f64()); + res } })) } From d9b70f7f3a3332aa4ad9946325d1877fa4f33da2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:58:53 +0800 Subject: [PATCH 200/203] In-memory Engine: implement read flow -- hybrid engine part (#16181) ref tikv/tikv#16141 implement read flow of in-memory engine -- hybrid engine part Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 + components/engine_panic/src/engine.rs | 6 +- components/engine_rocks/src/engine.rs | 4 +- components/engine_traits/src/engine.rs | 5 +- components/engine_traits/src/memory_engine.rs | 4 +- components/hybrid_engine/Cargo.toml | 5 + components/hybrid_engine/src/engine.rs | 82 ++++- .../hybrid_engine/src/engine_iterator.rs | 67 ++++- components/hybrid_engine/src/iterable.rs | 6 +- components/hybrid_engine/src/lib.rs | 1 + components/hybrid_engine/src/snapshot.rs | 33 ++- components/raftstore/Cargo.toml | 2 + components/raftstore/src/router.rs | 6 +- components/raftstore/src/store/peer.rs | 23 +- .../raftstore/src/store/region_snapshot.rs | 5 + components/raftstore/src/store/worker/read.rs | 279 ++++++++++++++++-- .../region_cache_memory_engine/Cargo.toml | 2 +- .../region_cache_memory_engine/src/engine.rs | 61 ++-- components/test_raftstore/src/cluster.rs | 17 +- components/test_raftstore/src/node.rs | 5 +- components/test_raftstore/src/server.rs | 7 +- .../test_raftstore/src/transport_simulate.rs | 5 +- components/test_raftstore/src/util.rs | 16 +- src/server/raftkv/mod.rs | 9 +- tests/benches/misc/raftkv/mod.rs | 3 +- tests/failpoints/cases/test_witness.rs | 8 +- tests/integrations/raftstore/mod.rs | 1 + .../raftstore/test_region_cache.rs | 17 ++ tests/integrations/raftstore/test_witness.rs | 2 +- 29 files changed, 561 insertions(+), 124 deletions(-) create mode 100644 tests/integrations/raftstore/test_region_cache.rs diff --git a/Cargo.lock b/Cargo.lock index eb1fe04b4244..3f19e8d287fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2449,6 +2449,8 @@ version = "0.0.1" dependencies = [ "engine_rocks", "engine_traits", + "region_cache_memory_engine", + "tempfile", "tikv_util", "txn_types", ] @@ -4137,6 +4139,7 @@ dependencies = [ "futures-util", "getset", "grpcio-health", + "hybrid_engine", "into_other", "itertools", "keys", @@ -4157,6 +4160,7 @@ dependencies = [ "raft", "raft-proto", "rand 0.8.5", + "region_cache_memory_engine", "resource_control", "resource_metering", "serde", diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 3f75d64f78d7..7b8546af111f 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -1,8 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, - WriteOptions, + IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapshotContext, + SyncMutable, WriteOptions, }; use crate::{db_vector::PanicDbVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; @@ -13,7 +13,7 @@ pub struct PanicEngine; impl KvEngine for PanicEngine { type Snapshot = PanicSnapshot; - fn snapshot(&self, _: Option) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { panic!() } fn sync(&self) -> Result<()> { diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index e0eed00ad53d..7de0ffd0dbe0 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -3,7 +3,7 @@ use std::{any::Any, sync::Arc}; use engine_traits::{ - IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, + IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapshotContext, SyncMutable, }; use rocksdb::{DBIterator, Writable, DB}; @@ -184,7 +184,7 @@ impl RocksEngine { impl KvEngine for RocksEngine { type Snapshot = RocksSnapshot; - fn snapshot(&self, _: Option) -> RocksSnapshot { + fn snapshot(&self, _: Option) -> RocksSnapshot { RocksSnapshot::new(self.db.clone()) } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 44539f194195..83f05180820d 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -42,7 +42,7 @@ pub trait KvEngine: /// /// SnapCtx will only be used by some type of trait implementors (ex: /// HybridEngine) - fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; + fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; /// Syncs any writes to disk fn sync(&self) -> Result<()>; @@ -82,7 +82,8 @@ pub trait KvEngine: fn inner_refcount(&self) -> usize; } -pub struct SnapCtx { +#[derive(Debug, Clone)] +pub struct SnapshotContext { pub region_id: u64, pub read_ts: u64, } diff --git a/components/engine_traits/src/memory_engine.rs b/components/engine_traits/src/memory_engine.rs index 189c3bc0c28c..9babc8580fc0 100644 --- a/components/engine_traits/src/memory_engine.rs +++ b/components/engine_traits/src/memory_engine.rs @@ -13,5 +13,7 @@ pub trait RegionCacheEngine: // If None is returned, the RegionCacheEngine is currently not readable for this // region or read_ts. - fn snapshot(&self, region_id: u64, read_ts: u64) -> Option; + // Sequence number is shared between RegionCacheEngine and disk KvEnigne to + // provide atomic write + fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option; } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index e0be90b179e9..0ae04b1dc3ed 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -12,3 +12,8 @@ engine_traits = { workspace = true } txn_types = { workspace = true } tikv_util = { workspace = true } engine_rocks = { workspace = true } + +[dev-dependencies] +engine_rocks = { workspace = true } +region_cache_memory_engine = { workspace = true } +tempfile = "3.0" diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index 6ccb223baf12..b76b999f1c37 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,7 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapCtx, SyncMutable, + KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapshotContext, SnapshotMiscExt, + SyncMutable, }; use crate::snapshot::HybridEngineSnapshot; @@ -65,21 +66,31 @@ where { type Snapshot = HybridEngineSnapshot; - fn snapshot(&self, _: Option) -> Self::Snapshot { - unimplemented!() + fn snapshot(&self, ctx: Option) -> Self::Snapshot { + let disk_snap = self.disk_engine.snapshot(ctx.clone()); + let region_cache_snap = if let Some(ctx) = ctx { + self.region_cache_engine.snapshot( + ctx.region_id, + ctx.read_ts, + disk_snap.sequence_number(), + ) + } else { + None + }; + HybridEngineSnapshot::new(disk_snap, region_cache_snap) } fn sync(&self) -> engine_traits::Result<()> { - unimplemented!() + self.disk_engine.sync() } fn bad_downcast(&self) -> &T { - unimplemented!() + self.disk_engine.bad_downcast() } #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize { - unimplemented!() + self.disk_engine.inner_refcount() } } @@ -90,17 +101,19 @@ where { type DbVector = EK::DbVector; + // region cache engine only supports peekable trait in the snapshot of it fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { - unimplemented!() + self.disk_engine.get_value_opt(opts, key) } + // region cache engine only supports peekable trait in the snapshot of it fn get_value_cf_opt( &self, opts: &ReadOptions, cf: &str, key: &[u8], ) -> Result> { - unimplemented!() + self.disk_engine.get_value_cf_opt(opts, cf, key) } } @@ -133,3 +146,56 @@ where unimplemented!() } } + +#[cfg(test)] +mod tests { + use engine_rocks::util::new_engine; + use engine_traits::{KvEngine, SnapshotContext, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use region_cache_memory_engine::RegionCacheMemoryEngine; + use tempfile::Builder; + + use crate::HybridEngine; + + #[test] + fn test_engine() { + let path = Builder::new().prefix("temp").tempdir().unwrap(); + let disk_engine = new_engine( + path.path().to_str().unwrap(), + &[CF_DEFAULT, CF_LOCK, CF_WRITE], + ) + .unwrap(); + let memory_engine = RegionCacheMemoryEngine::default(); + memory_engine.new_region(1); + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_region_meta(1).unwrap().set_safe_ts(10); + } + + let hybrid_engine = HybridEngine::new(disk_engine, memory_engine.clone()); + let s = hybrid_engine.snapshot(None); + assert!(!s.region_cache_snapshot_available()); + + let mut snap_ctx = SnapshotContext { + read_ts: 15, + region_id: 1, + }; + let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); + assert!(s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(false); + } + let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); + assert!(!s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + } + snap_ctx.read_ts = 5; + let s = hybrid_engine.snapshot(Some(snap_ctx)); + assert!(!s.region_cache_snapshot_available()); + } +} diff --git a/components/hybrid_engine/src/engine_iterator.rs b/components/hybrid_engine/src/engine_iterator.rs index 642aac82b605..7349240f2a97 100644 --- a/components/hybrid_engine/src/engine_iterator.rs +++ b/components/hybrid_engine/src/engine_iterator.rs @@ -11,44 +11,89 @@ where iter: Either, } +impl HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn disk_engine_iterator(iter: EK::Iterator) -> Self { + Self { + iter: Either::Left(iter), + } + } + + pub fn region_cache_engine_iterator(iter: EC::Iterator) -> Self { + Self { + iter: Either::Right(iter), + } + } +} + impl Iterator for HybridEngineIterator where EK: KvEngine, EC: RegionCacheEngine, { - fn seek(&mut self, _key: &[u8]) -> Result { - unimplemented!() + fn seek(&mut self, key: &[u8]) -> Result { + match self.iter { + Either::Left(ref mut iter) => iter.seek(key), + Either::Right(ref mut iter) => iter.seek(key), + } } - fn seek_for_prev(&mut self, _key: &[u8]) -> Result { - unimplemented!() + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + match self.iter { + Either::Left(ref mut iter) => iter.seek_for_prev(key), + Either::Right(ref mut iter) => iter.seek_for_prev(key), + } } fn seek_to_first(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.seek_to_first(), + Either::Right(ref mut iter) => iter.seek_to_first(), + } } fn seek_to_last(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.seek_to_last(), + Either::Right(ref mut iter) => iter.seek_to_last(), + } } fn prev(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.prev(), + Either::Right(ref mut iter) => iter.prev(), + } } fn next(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.next(), + Either::Right(ref mut iter) => iter.next(), + } } fn key(&self) -> &[u8] { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.key(), + Either::Right(ref iter) => iter.key(), + } } fn value(&self) -> &[u8] { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.value(), + Either::Right(ref iter) => iter.value(), + } } fn valid(&self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.valid(), + Either::Right(ref iter) => iter.valid(), + } } } diff --git a/components/hybrid_engine/src/iterable.rs b/components/hybrid_engine/src/iterable.rs index 96933641b068..27a38570f016 100644 --- a/components/hybrid_engine/src/iterable.rs +++ b/components/hybrid_engine/src/iterable.rs @@ -12,6 +12,10 @@ where type Iterator = HybridEngineIterator; fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { - unimplemented!() + // Iterator of region cache engine should only be created from the + // snapshot of it + self.disk_engine() + .iterator_opt(cf, opts) + .map(|iter| HybridEngineIterator::disk_engine_iterator(iter)) } } diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs index 367d985b094a..0778412a2c98 100644 --- a/components/hybrid_engine/src/lib.rs +++ b/components/hybrid_engine/src/lib.rs @@ -24,3 +24,4 @@ mod ttl_properties; mod write_batch; pub use engine::HybridEngine; +pub use snapshot::HybridEngineSnapshot; diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs index 4ada590c3d61..3c7ab875a210 100644 --- a/components/hybrid_engine/src/snapshot.rs +++ b/components/hybrid_engine/src/snapshot.rs @@ -1,9 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt::{self, Debug, Formatter}, - marker::PhantomData, -}; +use std::fmt::{self, Debug, Formatter}; use engine_traits::{ CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, @@ -18,34 +15,40 @@ where EC: RegionCacheEngine, { disk_snap: EK::Snapshot, - - phantom: PhantomData, + region_cache_snap: Option, } -impl Snapshot for HybridEngineSnapshot +impl HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { + pub fn new(disk_snap: EK::Snapshot, region_cache_snap: Option) -> Self { + HybridEngineSnapshot { + disk_snap, + region_cache_snap, + } + } + + pub fn region_cache_snapshot_available(&self) -> bool { + self.region_cache_snap.is_some() + } } -impl Debug for HybridEngineSnapshot +impl Snapshot for HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { - fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { - write!(fmt, "Hybrid Engine Snapshot Impl") - } } -impl Drop for HybridEngineSnapshot +impl Debug for HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { - fn drop(&mut self) { - unimplemented!() + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "Hybrid Engine Snapshot Impl") } } @@ -98,6 +101,6 @@ where EC: RegionCacheEngine, { fn sequence_number(&self) -> u64 { - unimplemented!() + self.disk_snap.sequence_number() } } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 81e809a02059..cde5c961f3f7 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -95,5 +95,7 @@ yatp = { workspace = true } encryption_export = { workspace = true } engine_panic = { workspace = true } engine_rocks = { workspace = true } +hybrid_engine = { workspace = true } panic_hook = { workspace = true } +region_cache_memory_engine = { workspace = true } test_sst_importer = { workspace = true } diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index fd50357fa38d..452616caf7e6 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -7,7 +7,7 @@ use std::{ // #[PerformanceCriticalPath] use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine, Snapshot}; +use engine_traits::{KvEngine, RaftEngine, Snapshot, SnapshotContext}; use error_code::ErrorCodeExt; use kvproto::{metapb, raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::SnapshotStatus; @@ -121,6 +121,7 @@ where { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, @@ -251,11 +252,12 @@ impl RaftStoreRouter for ServerRaftStoreRouter impl LocalReadRouter for ServerRaftStoreRouter { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()> { - self.local_reader.read(read_id, req, cb); + self.local_reader.read(snap_ctx, read_id, req, cb); Ok(()) } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 382b9e53b4b1..904d35fec2f7 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -19,8 +19,8 @@ use bytes::Bytes; use collections::{HashMap, HashSet}; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_traits::{ - Engines, KvEngine, PerfContext, RaftEngine, Snapshot, WriteBatch, WriteOptions, CF_DEFAULT, - CF_LOCK, CF_WRITE, + Engines, KvEngine, PerfContext, RaftEngine, Snapshot, SnapshotContext, WriteBatch, + WriteOptions, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use fail::fail_point; @@ -4829,7 +4829,16 @@ where } } - let mut resp = reader.execute(&req, &Arc::new(region), read_index, None); + let snap_ctx = if let Ok(read_ts) = decode_u64(&mut req.get_header().get_flag_data()) { + Some(SnapshotContext { + region_id: self.region_id, + read_ts, + }) + } else { + None + }; + + let mut resp = reader.execute(&req, &Arc::new(region), read_index, snap_ctx, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); snap.bucket_meta = self @@ -5735,8 +5744,12 @@ where &self.engines.kv } - fn get_snapshot(&mut self, _: &Option>) -> Arc { - Arc::new(self.engines.kv.snapshot(None)) + fn get_snapshot( + &mut self, + snap_ctx: Option, + _: &Option>, + ) -> Arc { + Arc::new(self.engines.kv.snapshot(snap_ctx)) } } diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 1c2c6251eba8..5232675f14a0 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -175,6 +175,11 @@ where pub fn get_end_key(&self) -> &[u8] { self.region.get_end_key() } + + #[cfg(test)] + pub fn snap(&self) -> Arc { + self.snap.clone() + } } impl Clone for RegionSnapshot diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index dbec805fe971..778f4ce45f01 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -12,7 +12,7 @@ use std::{ }; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; -use engine_traits::{KvEngine, Peekable, RaftEngine}; +use engine_traits::{KvEngine, Peekable, RaftEngine, SnapshotContext}; use fail::fail_point; use kvproto::{ errorpb, @@ -57,6 +57,7 @@ pub trait ReadExecutor { /// Currently, only multi-rocksdb version may return `None`. fn get_snapshot( &mut self, + snap_ctx: Option, read_context: &Option>, ) -> Arc<::Snapshot>; @@ -64,6 +65,7 @@ pub trait ReadExecutor { &mut self, req: &Request, region: &metapb::Region, + snap_ctx: Option, read_context: &Option>, ) -> Result { let key = req.get_get().get_key(); @@ -71,7 +73,7 @@ pub trait ReadExecutor { util::check_key_in_region(key, region)?; let mut resp = Response::default(); - let snapshot = self.get_snapshot(read_context); + let snapshot = self.get_snapshot(snap_ctx, read_context); let res = if !req.get_get().get_cf().is_empty() { let cf = req.get_get().get_cf(); snapshot @@ -109,6 +111,7 @@ pub trait ReadExecutor { msg: &RaftCmdRequest, region: &Arc, read_index: Option, + snap_ctx: Option, local_read_ctx: Option>, ) -> ReadResponse<::Snapshot> { let requests = msg.get_requests(); @@ -121,20 +124,22 @@ pub trait ReadExecutor { for req in requests { let cmd_type = req.get_cmd_type(); let mut resp = match cmd_type { - CmdType::Get => match self.get_value(req, region.as_ref(), &local_read_ctx) { - Ok(resp) => resp, - Err(e) => { - error!(?e; - "failed to execute get command"; - "region_id" => region.get_id(), - ); - response.response = cmd_resp::new_error(e); - return response; + CmdType::Get => { + match self.get_value(req, region.as_ref(), snap_ctx.clone(), &local_read_ctx) { + Ok(resp) => resp, + Err(e) => { + error!(?e; + "failed to execute get command"; + "region_id" => region.get_id(), + ); + response.response = cmd_resp::new_error(e); + return response; + } } - }, + } CmdType::Snap => { let snapshot = RegionSnapshot::from_snapshot( - self.get_snapshot(&local_read_ctx), + self.get_snapshot(snap_ctx.clone(), &local_read_ctx), region.clone(), ); response.snapshot = Some(snapshot); @@ -226,9 +231,16 @@ where } } - /// Update the snapshot in the `snap_cache` if the read_id is None or does - /// not match. - fn maybe_update_snapshot(&mut self, engine: &E, delegate_last_valid_ts: Timespec) -> bool { + // Update the snapshot in the `snap_cache` if the read_id is None or does + // not match. + // snap_ctx is used (if not None) to acquire the snapshot of the relevant region + // from region cache engine + fn maybe_update_snapshot( + &mut self, + engine: &E, + snap_ctx: Option, + delegate_last_valid_ts: Timespec, + ) -> bool { // When the read_id is None, it means the `snap_cache` has been cleared // before and the `cached_read_id` of it is None because only a consecutive // requests will have the same cache and the cache will be cleared after the @@ -242,7 +254,7 @@ where } self.snap_cache.cached_read_id = self.read_id.clone(); - self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(None))); + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(snap_ctx))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -250,7 +262,7 @@ where } else { // read_id being None means the snapshot acquired will only be used in this // request - self.snapshot = Some(Arc::new(engine.snapshot(None))); + self.snapshot = Some(Arc::new(engine.snapshot(snap_ctx))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -984,14 +996,18 @@ where &mut self, req: &RaftCmdRequest, delegate: &mut CachedReadDelegate, + snap_ctx: Option, read_id: Option, snap_updated: &mut bool, last_valid_ts: Timespec, ) -> Option> { let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, read_id); - (*snap_updated) = - local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + (*snap_updated) = local_read_ctx.maybe_update_snapshot( + delegate.get_tablet(), + snap_ctx.clone(), + last_valid_ts, + ); let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); if !delegate.is_in_leader_lease(snapshot_ts) { @@ -999,7 +1015,7 @@ where } let region = Arc::clone(&delegate.region); - let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + let mut response = delegate.execute(req, ®ion, None, snap_ctx, Some(local_read_ctx)); if let Some(snap) = response.snapshot.as_mut() { snap.bucket_meta = delegate.bucket_meta.clone(); } @@ -1024,11 +1040,11 @@ where // Stale read does not use cache, so we pass None for read_id let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); (*snap_updated) = - local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), None, last_valid_ts); let region = Arc::clone(&delegate.region); // Getting the snapshot - let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + let mut response = delegate.execute(req, ®ion, None, None, Some(local_read_ctx)); if let Some(snap) = response.snapshot.as_mut() { snap.bucket_meta = delegate.bucket_meta.clone(); } @@ -1042,6 +1058,7 @@ where pub fn propose_raft_command( &mut self, + snap_ctx: Option, read_id: Option, mut req: RaftCmdRequest, cb: Callback, @@ -1056,6 +1073,7 @@ where if let Some(read_resp) = self.try_local_leader_read( &req, &mut delegate, + snap_ctx, read_id, &mut snap_updated, last_valid_ts, @@ -1103,6 +1121,7 @@ where &req, &mut delegate, None, + None, &mut snap_updated, last_valid_ts, ) { @@ -1180,11 +1199,12 @@ where #[inline] pub fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) { - self.propose_raft_command(read_id, req, cb); + self.propose_raft_command(snap_ctx, read_id, req, cb); maybe_tls_local_read_metrics_flush(); } @@ -1218,7 +1238,11 @@ where &self.kv_engine } - fn get_snapshot(&mut self, read_context: &Option>) -> Arc { + fn get_snapshot( + &mut self, + _: Option, + read_context: &Option>, + ) -> Arc { read_context.as_ref().unwrap().snapshot().unwrap() } } @@ -1266,7 +1290,9 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; + use hybrid_engine::{HybridEngine, HybridEngineSnapshot}; use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; + use region_cache_memory_engine::RegionCacheMemoryEngine; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; @@ -1351,6 +1377,7 @@ mod tests { cmd: RaftCmdRequest, ) { reader.propose_raft_command( + None, None, cmd.clone(), Callback::read(Box::new(|resp| { @@ -1379,7 +1406,7 @@ mod tests { task: RaftCommand, read_id: Option, ) { - reader.propose_raft_command(read_id, task.request, task.callback); + reader.propose_raft_command(None, read_id, task.request, task.callback); assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); } @@ -1512,6 +1539,7 @@ mod tests { .mut_peer() .set_store_id(store_id + 1); reader.propose_raft_command( + None, None, cmd_store_id, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1536,6 +1564,7 @@ mod tests { .mut_peer() .set_id(leader2.get_id() + 1); reader.propose_raft_command( + None, None, cmd_peer_id, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1561,6 +1590,7 @@ mod tests { let mut cmd_term = cmd.clone(); cmd_term.mut_header().set_term(term6 - 2); reader.propose_raft_command( + None, None, cmd_term, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1597,8 +1627,9 @@ mod tests { ); // Channel full. - reader.propose_raft_command(None, cmd.clone(), Callback::None); + reader.propose_raft_command(None, None, cmd.clone(), Callback::None); reader.propose_raft_command( + None, None, cmd.clone(), Callback::read(Box::new(move |resp: ReadResponse| { @@ -1631,6 +1662,7 @@ mod tests { .update(Progress::applied_term(term6 + 3)); } reader.propose_raft_command( + None, None, cmd9.clone(), Callback::read(Box::new(|resp| { @@ -2007,7 +2039,7 @@ mod tests { let compare_ts = monotonic_raw_now(); // Case 1: snap_cache_context.read_id is None - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); assert_eq!( read_context @@ -2022,7 +2054,7 @@ mod tests { // snap_cache_context is *not* created with read_id, so calling // `maybe_update_snapshot` again will update the snapshot let compare_ts = monotonic_raw_now(); - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); let read_id = ThreadReadId::new(); @@ -2032,7 +2064,7 @@ mod tests { let compare_ts = monotonic_raw_now(); // Case 2: snap_cache_context.read_id is not None but not equals to the // snap_cache.cached_read_id - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); let snap_ts = read_context.snapshot_ts().unwrap(); assert_eq!( @@ -2050,7 +2082,7 @@ mod tests { // `maybe_update_snapshot` again will *not* update the snapshot // Case 3: snap_cache_context.read_id is not None and equals to the // snap_cache.cached_read_id - assert!(!read_context.maybe_update_snapshot(&db2, Timespec::new(0, 0))); + assert!(!read_context.maybe_update_snapshot(&db2, None, Timespec::new(0, 0))); assert_eq!(read_context.snapshot_ts().unwrap(), snap_ts); assert_eq!( read_context @@ -2065,7 +2097,7 @@ mod tests { // Case 4: delegate.last_valid_ts is larger than create_time of read_id let mut last_valid_ts = read_id_clone.create_time; last_valid_ts = last_valid_ts.add(Duration::nanoseconds(1)); - assert!(read_context.maybe_update_snapshot(&db2, last_valid_ts)); + assert!(read_context.maybe_update_snapshot(&db2, None, last_valid_ts)); assert!(read_context.snapshot_ts().unwrap() > snap_ts); assert!( read_context @@ -2385,4 +2417,187 @@ mod tests { .has_data_is_not_ready() ); } + + type HybridTestEnigne = HybridEngine; + type HybridEngineTestSnapshot = HybridEngineSnapshot; + + struct HybridEngineMockRouter { + p_router: SyncSender>, + c_router: SyncSender<(u64, CasualMessage)>, + } + + impl HybridEngineMockRouter { + #[allow(clippy::type_complexity)] + fn new() -> ( + HybridEngineMockRouter, + Receiver>, + Receiver<(u64, CasualMessage)>, + ) { + let (p_ch, p_rx) = sync_channel(1); + let (c_ch, c_rx) = sync_channel(1); + ( + HybridEngineMockRouter { + p_router: p_ch, + c_router: c_ch, + }, + p_rx, + c_rx, + ) + } + } + + impl ProposalRouter for HybridEngineMockRouter { + fn send( + &self, + cmd: RaftCommand, + ) -> std::result::Result<(), TrySendError>> { + ProposalRouter::send(&self.p_router, cmd) + } + } + + impl CasualRouter for HybridEngineMockRouter { + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::send(&self.c_router, region_id, msg) + } + } + + #[allow(clippy::type_complexity)] + fn new_hybrid_engine_reader( + path: &str, + store_id: u64, + store_meta: Arc>, + ) -> ( + TempDir, + LocalReader, + Receiver>, + RegionCacheMemoryEngine, + ) { + let path = Builder::new().prefix(path).tempdir().unwrap(); + let disk_engine = + engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); + let (ch, rx, _) = HybridEngineMockRouter::new(); + let memory_engine = RegionCacheMemoryEngine::default(); + let engine = HybridEngine::new(disk_engine, memory_engine.clone()); + let mut reader = LocalReader::new( + engine.clone(), + StoreMetaDelegate::new(store_meta, engine), + ch, + ); + reader.local_reader.store_id = Cell::new(Some(store_id)); + (path, reader, rx, memory_engine) + } + + fn get_snapshot( + snap_ctx: Option, + reader: &mut LocalReader, + request: RaftCmdRequest, + rx: &Receiver>, + ) -> Arc { + let (sender, receiver) = channel(); + reader.propose_raft_command( + snap_ctx, + None, + request, + Callback::read(Box::new(move |snap| { + sender.send(snap).unwrap(); + })), + ); + // no direct is expected + assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); + receiver.recv().unwrap().snapshot.unwrap().snap() + } + + #[test] + fn test_hybrid_engine_read() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx, memory_engine) = new_hybrid_engine_reader( + "test-local-hybrid-engine-reader", + store_id, + store_meta.clone(), + ); + + // set up region so we can acquire snapshot from local reader + let mut region1 = metapb::Region::default(); + region1.set_id(1); + let prs = new_peers(store_id, vec![2, 3, 4]); + region1.set_peers(prs.clone().into()); + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let leader2 = prs[0].clone(); + region1.set_region_epoch(epoch13.clone()); + let term6 = 6; + let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. + let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, 1)); + + lease.renew(monotonic_raw_now()); + let remote = lease.maybe_new_remote_lease(term6).unwrap(); + { + let mut meta = store_meta.lock().unwrap(); + let read_delegate = ReadDelegate { + tag: String::new(), + region: Arc::new(region1.clone()), + peer_id: leader2.get_id(), + term: term6, + applied_term: term6, + leader_lease: Some(remote), + last_valid_ts: Timespec::new(0, 0), + txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), + txn_ext: Arc::new(TxnExt::default()), + read_progress, + pending_remove: false, + wait_data: false, + track_ver: TrackVer::new(), + bucket_meta: None, + }; + meta.readers.insert(1, read_delegate); + } + + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader2); + header.set_region_epoch(epoch13); + header.set_term(term6); + cmd.set_header(header); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + let s = get_snapshot(None, &mut reader, cmd.clone(), &rx); + assert!(!s.region_cache_snapshot_available()); + + memory_engine.new_region(1); + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_region_meta(1).unwrap().set_safe_ts(10); + } + + let mut snap_ctx = SnapshotContext { + read_ts: 15, + region_id: 1, + }; + + let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); + assert!(s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(false); + } + let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); + assert!(!s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + } + snap_ctx.read_ts = 5; + assert!(!s.region_cache_snapshot_available()); + } } diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index c529698fa143..949b2596f461 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -12,4 +12,4 @@ engine_traits = { workspace = true } collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } bytes = "1.0" -tikv_util = { workspace = true } \ No newline at end of file +tikv_util = { workspace = true } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 93e4c1a6d74d..a8ee66a5b232 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -107,12 +107,28 @@ pub struct RegionMemoryMeta { safe_ts: u64, } +impl RegionMemoryMeta { + pub fn set_can_read(&mut self, can_read: bool) { + self.can_read = can_read; + } + + pub fn set_safe_ts(&mut self, safe_ts: u64) { + self.safe_ts = safe_ts; + } +} + #[derive(Default)] pub struct RegionCacheMemoryEngineCore { engine: HashMap, region_metas: HashMap, } +impl RegionCacheMemoryEngineCore { + pub fn mut_region_meta(&mut self, region_id: u64) -> Option<&mut RegionMemoryMeta> { + self.region_metas.get_mut(®ion_id) + } +} + /// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in /// the leaders' store. Incoming writes that are written to disk engine (now, /// RocksDB) are also written to the RegionCacheMemoryEngine, leading to a @@ -135,6 +151,12 @@ pub struct RegionCacheMemoryEngine { core: Arc>, } +impl RegionCacheMemoryEngine { + pub fn core(&self) -> &Arc> { + &self.core + } +} + impl Debug for RegionCacheMemoryEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Region Cache Memory Engine") @@ -157,8 +179,8 @@ impl RegionCacheEngine for RegionCacheMemoryEngine { type Snapshot = RegionCacheSnapshot; // todo(SpadeA): add sequence number logic - fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { - RegionCacheSnapshot::new(self.clone(), region_id, read_ts) + fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option { + RegionCacheSnapshot::new(self.clone(), region_id, read_ts, seq_num) } } @@ -193,10 +215,6 @@ pub struct RegionCacheIterator { impl Iterable for RegionCacheMemoryEngine { type Iterator = RegionCacheIterator; - fn iterator(&self, cf: &str) -> Result { - unimplemented!() - } - fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { unimplemented!() } @@ -357,12 +375,20 @@ impl Mutable for RegionCacheWriteBatch { pub struct RegionCacheSnapshot { region_id: u64, snapshot_ts: u64, + // Sequence number is shared between RegionCacheEngine and disk KvEnigne to + // provide atomic write + sequence_number: u64, region_memory_engine: RegionMemoryEngine, engine: RegionCacheMemoryEngine, } impl RegionCacheSnapshot { - pub fn new(engine: RegionCacheMemoryEngine, region_id: u64, read_ts: u64) -> Option { + pub fn new( + engine: RegionCacheMemoryEngine, + region_id: u64, + read_ts: u64, + seq_num: u64, + ) -> Option { let mut core = engine.core.lock().unwrap(); let region_meta = core.region_metas.get_mut(®ion_id)?; if !region_meta.can_read { @@ -379,6 +405,7 @@ impl RegionCacheSnapshot { Some(RegionCacheSnapshot { region_id, snapshot_ts: read_ts, + sequence_number: seq_num, region_memory_engine: core.engine.get(®ion_id).unwrap().clone(), engine: engine.clone(), }) @@ -446,7 +473,7 @@ impl CfNamesExt for RegionCacheSnapshot { impl SnapshotMiscExt for RegionCacheSnapshot { fn sequence_number(&self) -> u64 { - self.snapshot_ts + self.sequence_number } } @@ -515,31 +542,31 @@ mod tests { } }; - assert!(engine.snapshot(1, 5).is_none()); + assert!(engine.snapshot(1, 5, u64::MAX).is_none()); { let mut core = engine.core.lock().unwrap(); core.region_metas.get_mut(&1).unwrap().can_read = true; } - let s1 = engine.snapshot(1, 5).unwrap(); + let s1 = engine.snapshot(1, 5, u64::MAX).unwrap(); { let mut core = engine.core.lock().unwrap(); core.region_metas.get_mut(&1).unwrap().safe_ts = 5; } - assert!(engine.snapshot(1, 5).is_none()); - let s2 = engine.snapshot(1, 10).unwrap(); + assert!(engine.snapshot(1, 5, u64::MAX).is_none()); + let s2 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(5, 1); verify_snapshot_count(10, 1); - let s3 = engine.snapshot(1, 10).unwrap(); + let s3 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s1); verify_snapshot_count(5, 0); drop(s2); verify_snapshot_count(10, 1); - let s4 = engine.snapshot(1, 10).unwrap(); + let s4 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s4); verify_snapshot_count(10, 1); @@ -609,7 +636,7 @@ mod tests { fill_data_in_skiplist(sl, (1..100).step_by(1)); } - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); let opts = ReadOptions::default(); for i in 1..100 { let k = construct_key(i); @@ -644,7 +671,7 @@ mod tests { } let mut iter_opt = IterOptions::default(); - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); // boundaries are not set assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); @@ -719,7 +746,7 @@ mod tests { iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); verify_key_values(&mut iter, step, 99, i32::MIN); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 1e0c57c3706f..2521fccb694a 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,8 +19,8 @@ use encryption_export::DataKeyManager; use engine_rocks::{RocksCompactedEvent, RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, WriteBatch, - CF_DEFAULT, CF_RAFT, + Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SnapshotContext, + SyncMutable, WriteBatch, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; @@ -123,19 +123,21 @@ pub trait Simulator { fn read( &mut self, + snap_ctx: Option, batch_id: Option, request: RaftCmdRequest, timeout: Duration, ) -> Result { let node_id = request.get_header().get_peer().get_store_id(); let (cb, mut rx) = make_cb::(&request); - self.async_read(node_id, batch_id, request, cb); + self.async_read(snap_ctx, node_id, batch_id, request, cb); rx.recv_timeout(timeout) .map_err(|_| Error::Timeout(format!("request timeout for {:?}", timeout))) } fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -450,11 +452,16 @@ where pub fn read( &self, + snap_ctx: Option, batch_id: Option, request: RaftCmdRequest, timeout: Duration, ) -> Result { - match self.sim.wl().read(batch_id, request.clone(), timeout) { + match self + .sim + .wl() + .read(snap_ctx, batch_id, request.clone(), timeout) + { Err(e) => { warn!("failed to read {:?}: {:?}", request, e); Err(e) @@ -478,7 +485,7 @@ where } } let ret = if is_read { - self.sim.wl().read(None, request.clone(), timeout) + self.sim.wl().read(None, None, request.clone(), timeout) } else { self.sim.rl().call_command(request.clone(), timeout) }; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 7564da0e27e1..5fdd4f24822f 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -10,7 +10,7 @@ use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, KvEngine}; +use engine_traits::{Engines, KvEngine, SnapshotContext}; use kvproto::{ kvrpcpb::ApiVersion, metapb, @@ -459,6 +459,7 @@ impl Simulator for NodeCluster { fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -480,7 +481,7 @@ impl Simulator for NodeCluster { } let mut guard = self.trans.core.lock().unwrap(); let router = guard.routers.get_mut(&node_id).unwrap(); - router.read(batch_id, request, cb).unwrap(); + router.read(snap_ctx, batch_id, request, cb).unwrap(); } fn send_raft_msg(&mut self, msg: raft_serverpb::RaftMessage) -> Result<()> { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index bbc4ee2cf497..883a38edb239 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -15,7 +15,7 @@ use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, KvEngine}; +use engine_traits::{Engines, KvEngine, SnapshotContext}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; @@ -737,6 +737,7 @@ impl Simulator for ServerCluster { fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -750,7 +751,9 @@ impl Simulator for ServerCluster { cb.invoke_with_response(resp); } Some(meta) => { - meta.sim_router.read(batch_id, request, cb).unwrap(); + meta.sim_router + .read(snap_ctx, batch_id, request, cb) + .unwrap(); } }; } diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 4c21552cee51..3824e0dbe753 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -11,7 +11,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; -use engine_traits::KvEngine; +use engine_traits::{KvEngine, SnapshotContext}; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::eraftpb::MessageType; use raftstore::{ @@ -257,11 +257,12 @@ impl> RaftStoreRouter for SimulateTrans impl> LocalReadRouter for SimulateTransport { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()> { - self.ch.read(read_id, req, cb) + self.ch.read(snap_ctx, read_id, req, cb) } fn release_snapshot_cache(&mut self) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 8933f4dca743..019a7416a7a0 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -459,7 +459,7 @@ pub fn read_on_peer>( read_quorum, ); request.mut_header().set_peer(peer); - cluster.read(None, request, timeout) + cluster.read(None, None, request, timeout) } pub fn async_read_on_peer>( @@ -481,7 +481,10 @@ pub fn async_read_on_peer>( request.mut_header().set_replica_read(replica_read); let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); - cluster.sim.wl().async_read(node_id, None, request, cb); + cluster + .sim + .wl() + .async_read(None, node_id, None, request, cb); Box::pin(async move { let fut = rx.next(); fut.await.unwrap() @@ -512,7 +515,7 @@ pub fn batch_read_on_peer>( cluster .sim .wl() - .async_read(node_id, batch_id.clone(), request, cb); + .async_read(None, node_id, batch_id.clone(), request, cb); len += 1; } while results.len() < len { @@ -536,7 +539,7 @@ pub fn read_index_on_peer>( read_quorum, ); request.mut_header().set_peer(peer); - cluster.read(None, request, timeout) + cluster.read(None, None, request, timeout) } pub fn async_read_index_on_peer>( @@ -561,7 +564,10 @@ pub fn async_read_index_on_peer>( request.mut_header().set_peer(peer); let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); - cluster.sim.wl().async_read(node_id, None, request, cb); + cluster + .sim + .wl() + .async_read(None, node_id, None, request, cb); Box::pin(async move { let fut = rx.next(); fut.await.unwrap() diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 82563666f048..9f42925b6d46 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -22,7 +22,7 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; -use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; +use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot, SnapshotContext}; use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt, TryFutureExt}; use kvproto::{ errorpb, @@ -644,10 +644,15 @@ where })); let tracker = store_cb.read_tracker().unwrap(); + let snap_ctx = ctx.start_ts.map(|ts| SnapshotContext { + read_ts: ts.into_inner(), + region_id: ctx.pb_ctx.get_region_id(), + }); + if res.is_ok() { res = self .router - .read(ctx.read_id, cmd, store_cb) + .read(snap_ctx, ctx.read_id, cmd, store_cb) .map_err(kv::Error::from); } async move { diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index eab0f38d749e..2650434c80f2 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, RwLock}; use collections::HashSet; use crossbeam::channel::TrySendError; use engine_rocks::{RocksEngine, RocksSnapshot}; -use engine_traits::{KvEngine, ALL_CFS, CF_DEFAULT}; +use engine_traits::{KvEngine, SnapshotContext, ALL_CFS, CF_DEFAULT}; use futures::future::FutureExt; use kvproto::{ kvrpcpb::{Context, ExtraOp as TxnExtraOp}, @@ -121,6 +121,7 @@ impl RaftStoreRouter for SyncBenchRouter { impl LocalReadRouter for SyncBenchRouter { fn read( &mut self, + _: Option, _: Option, req: RaftCmdRequest, cb: Callback, diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index e207525bcea6..f6fec8b35dee 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -53,7 +53,7 @@ fn test_witness_update_region_in_local_reader() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request.clone(), Duration::from_millis(100)) + .read(None, None, request.clone(), Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), @@ -105,7 +105,7 @@ fn test_witness_not_reported_while_disabled() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request.clone(), Duration::from_millis(100)) + .read(None, None, request.clone(), Duration::from_millis(100)) .unwrap(); assert!(resp.get_header().has_error()); assert!(!resp.get_header().get_error().has_is_witness()); @@ -492,7 +492,7 @@ fn test_non_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), @@ -517,7 +517,7 @@ fn test_non_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!(resp.get_header().has_error(), false); } diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index 3bb93f6809b4..998269afb98a 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -15,6 +15,7 @@ mod test_life; mod test_merge; mod test_multi; mod test_prevote; +mod test_region_cache; mod test_region_change_observer; mod test_region_heartbeat; mod test_region_info_accessor; diff --git a/tests/integrations/raftstore/test_region_cache.rs b/tests/integrations/raftstore/test_region_cache.rs new file mode 100644 index 000000000000..4d95ff6701ce --- /dev/null +++ b/tests/integrations/raftstore/test_region_cache.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use test_raftstore::new_node_cluster_with_hybrid_engine; + +#[test] +fn test_basic_read() { + let _cluster = new_node_cluster_with_hybrid_engine(1, 3); + // todo(SpadeA): add test logic +} + +#[test] +fn test_read_index() { + let _cluster = new_node_cluster_with_hybrid_engine(1, 3); + // todo(SpadeA): add test logic +} + +// todo(SpadeA): more tests when other relevant modules are ready. diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index 7879ffc49be7..e42ac75598ec 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -474,7 +474,7 @@ fn test_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), From fe80806fcab1832c3cc3684dcd50b67f388fb5c1 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 21 Dec 2023 13:09:23 +0800 Subject: [PATCH 201/203] metrics: change resource group label name (#16192) close tikv/tikv#16191 change metrics label name for resource-group-name from "name" to "resource_group". To be backward compatible with old grafana panel, we add a new label name and keep the old one. We are going to deprecate the old label in v8.0. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/metrics.rs | 6 +++--- src/server/metrics.rs | 3 ++- src/server/service/kv.rs | 25 +++++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs index c94040925011..457230634923 100644 --- a/components/resource_control/src/metrics.rs +++ b/components/resource_control/src/metrics.rs @@ -7,19 +7,19 @@ lazy_static! { pub static ref BACKGROUND_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resource_control_background_quota_limiter", "The quota limiter of background resource groups per resource type", - &["name", "type"] + &["resource_group", "type"] ) .unwrap(); pub static ref BACKGROUND_RESOURCE_CONSUMPTION: IntCounterVec = register_int_counter_vec!( "tikv_resource_control_background_resource_consumption", "Total resource consumed of background resource groups per resource type", - &["name", "type"] + &["resource_group", "type"] ) .unwrap(); pub static ref BACKGROUND_TASKS_WAIT_DURATION: IntCounterVec = register_int_counter_vec!( "tikv_resource_control_background_task_wait_duration", "Total wait duration of background tasks per resource group", - &["name"] + &["resource_group"] ) .unwrap(); pub static ref PRIORITY_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 3ad9c5bdde0f..c55a0c0ae8a5 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -217,10 +217,11 @@ lazy_static! { &["type"] ) .unwrap(); + // TODO: deprecate the "name" label in v8.0. pub static ref GRPC_RESOURCE_GROUP_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_grpc_resource_group_total", "Total number of handle grpc message for each resource group", - &["name"] + &["name", "resource_group"] ) .unwrap(); pub static ref GRPC_PROXY_MSG_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 01aae59fe183..02bfca0473ea 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -204,7 +204,7 @@ macro_rules! handle_request { resource_group_priority= ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let resp = $future_name(&self.storage, req); let task = async move { @@ -493,7 +493,10 @@ impl Tikv for Service { } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let begin_instant = Instant::now(); @@ -535,7 +538,10 @@ impl Tikv for Service { ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let begin_instant = Instant::now(); @@ -629,7 +635,10 @@ impl Tikv for Service { ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let mut stream = self @@ -1185,7 +1194,7 @@ fn handle_batch_commands_request( } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[ resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) @@ -1208,7 +1217,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_raw_get(&req) @@ -1231,7 +1240,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority ); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); @@ -1268,7 +1277,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); From 01498b0519d89fd9f6b72546312eded1134a3a2b Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 21 Dec 2023 15:36:23 +0800 Subject: [PATCH 202/203] metric: add read scan duration panel (#16205) ref tikv/tikv#15066 Add scan lock read duration panel to check read lock holding time. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 11 +- metrics/grafana/tikv_details.json | 459 ++++++++++++++++------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 341 insertions(+), 131 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 966346f741ec..1ed32eb6fe59 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -6852,7 +6852,16 @@ def PessimisticLocking() -> RowPanel: description="The length includes the entering transaction itself", yaxis=yaxis(format=UNITS.SHORT), metric="tikv_lock_wait_queue_length_bucket", - ) + ), + graph_panel_histogram_quantiles( + title="In-memory scan lock read duration", + description="The duration scan in-memory pessimistic locks with read lock", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_storage_mvcc_scan_lock_read_duration_seconds", + by_labels=["type"], + hide_count=True, + hide_avg=True, + ), ] ) return layout.row_panel diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3af1480137c4..5dd36b73dfb5 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -51866,7 +51866,7 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, "y": 35 }, @@ -51939,6 +51939,207 @@ "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration scan in-memory pessimistic locks with read lock", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 371, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{type}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "In-memory scan lock read duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } } ], "repeat": null, @@ -51975,7 +52176,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 372, "interval": null, "links": [], "maxDataPoints": 100, @@ -52014,7 +52215,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52147,7 +52348,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52310,7 +52511,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -52458,7 +52659,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 376, "interval": null, "isNew": true, "legend": { @@ -52598,7 +52799,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 376, + "id": 377, "interval": null, "legend": { "show": false @@ -52702,7 +52903,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 377, + "id": 378, "interval": null, "legend": { "show": false @@ -52806,7 +53007,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 378, + "id": 379, "interval": null, "legend": { "show": false @@ -52903,7 +53104,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 379, + "id": 380, "interval": null, "isNew": true, "legend": { @@ -53043,7 +53244,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 380, + "id": 381, "interval": null, "legend": { "show": false @@ -53147,7 +53348,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 381, + "id": 382, "interval": null, "legend": { "show": false @@ -53251,7 +53452,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 382, + "id": 383, "interval": null, "legend": { "show": false @@ -53348,7 +53549,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53481,7 +53682,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53614,7 +53815,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -53754,7 +53955,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 386, + "id": 387, "interval": null, "legend": { "show": false @@ -53851,7 +54052,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 388, "interval": null, "isNew": true, "legend": { @@ -53987,7 +54188,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 389, "interval": null, "links": [], "maxDataPoints": 100, @@ -54026,7 +54227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -54159,7 +54360,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54292,7 +54493,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -54425,7 +54626,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -54558,7 +54759,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -54691,7 +54892,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 394, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -54824,7 +55025,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 396, "interval": null, "isNew": true, "legend": { @@ -54964,7 +55165,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 396, + "id": 397, "interval": null, "legend": { "show": false @@ -55061,7 +55262,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55194,7 +55395,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -55334,7 +55535,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 399, + "id": 400, "interval": null, "legend": { "show": false @@ -55431,7 +55632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55564,7 +55765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55697,7 +55898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -55830,7 +56031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -55978,7 +56179,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -56126,7 +56327,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 406, "interval": null, "isNew": true, "legend": { @@ -56262,7 +56463,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 407, "interval": null, "links": [], "maxDataPoints": 100, @@ -56301,7 +56502,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56434,7 +56635,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -56567,7 +56768,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56700,7 +56901,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 411, "interval": null, "isNew": true, "legend": { @@ -56836,7 +57037,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 412, "interval": null, "links": [], "maxDataPoints": 100, @@ -56875,7 +57076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -57038,7 +57239,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57171,7 +57372,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 415, "interval": null, "isNew": true, "legend": { @@ -57311,7 +57512,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 415, + "id": 416, "interval": null, "legend": { "show": false @@ -57415,7 +57616,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 416, + "id": 417, "interval": null, "legend": { "show": false @@ -57512,7 +57713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 417, + "id": 418, "interval": null, "isNew": true, "legend": { @@ -57667,7 +57868,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 418, + "id": 419, "interval": null, "legend": { "show": false @@ -57771,7 +57972,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 419, + "id": 420, "interval": null, "legend": { "show": false @@ -57875,7 +58076,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 420, + "id": 421, "interval": null, "legend": { "show": false @@ -57972,7 +58173,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 422, "interval": null, "isNew": true, "legend": { @@ -58142,7 +58343,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 422, + "id": 423, "interval": null, "legend": { "show": false @@ -58239,7 +58440,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58440,7 +58641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -58641,7 +58842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -58774,7 +58975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -58937,7 +59138,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -59070,7 +59271,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59203,7 +59404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 429, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59404,7 +59605,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 430, + "id": 431, "interval": null, "isNew": true, "legend": { @@ -59544,7 +59745,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 431, + "id": 432, "interval": null, "legend": { "show": false @@ -59648,7 +59849,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 432, + "id": 433, "interval": null, "legend": { "show": false @@ -59752,7 +59953,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 433, + "id": 434, "interval": null, "legend": { "show": false @@ -59856,7 +60057,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 434, + "id": 435, "interval": null, "legend": { "show": false @@ -59960,7 +60161,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 435, + "id": 436, "interval": null, "legend": { "show": false @@ -60064,7 +60265,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 436, + "id": 437, "interval": null, "legend": { "show": false @@ -60168,7 +60369,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 437, + "id": 438, "interval": null, "legend": { "show": false @@ -60265,7 +60466,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60413,7 +60614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -60546,7 +60747,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60679,7 +60880,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60827,7 +61028,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -60963,7 +61164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 444, "interval": null, "links": [], "maxDataPoints": 100, @@ -61002,7 +61203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -61135,7 +61336,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61268,7 +61469,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 446, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -61401,7 +61602,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -61534,7 +61735,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -61682,7 +61883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 450, "interval": null, "isNew": true, "legend": { @@ -61886,7 +62087,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 450, + "id": 451, "interval": null, "links": [], "maxDataPoints": 100, @@ -61937,7 +62138,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 451, + "id": 452, "interval": null, "links": [], "maxDataPoints": 100, @@ -62033,7 +62234,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 452, + "id": 453, "interval": null, "links": [], "maxDataPoints": 100, @@ -62108,7 +62309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 454, "interval": null, "links": [], "maxDataPoints": 100, @@ -62183,7 +62384,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 455, "interval": null, "links": [], "maxDataPoints": 100, @@ -62258,7 +62459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 455, + "id": 456, "interval": null, "links": [], "maxDataPoints": 100, @@ -62333,7 +62534,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 456, + "id": 457, "interval": null, "links": [], "maxDataPoints": 100, @@ -62408,7 +62609,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 458, "interval": null, "links": [], "maxDataPoints": 100, @@ -62483,7 +62684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 459, "interval": null, "links": [], "maxDataPoints": 100, @@ -62562,7 +62763,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62695,7 +62896,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -62828,7 +63029,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -62961,7 +63162,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -63094,7 +63295,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63227,7 +63428,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -63375,7 +63576,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 466, "interval": null, "isNew": true, "legend": { @@ -63508,7 +63709,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 466, + "id": 467, "interval": null, "isNew": true, "legend": { @@ -63641,7 +63842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 467, + "id": 468, "interval": null, "isNew": true, "legend": { @@ -63807,7 +64008,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 468, + "id": 469, "interval": null, "legend": { "show": false @@ -63911,7 +64112,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 470, "interval": null, "legend": { "show": false @@ -64015,7 +64216,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 471, "interval": null, "legend": { "show": false @@ -64119,7 +64320,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 472, "interval": null, "legend": { "show": false @@ -64223,7 +64424,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 472, + "id": 473, "interval": null, "legend": { "show": false @@ -64327,7 +64528,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 474, "interval": null, "legend": { "show": false @@ -64431,7 +64632,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 474, + "id": 475, "interval": null, "legend": { "show": false @@ -64535,7 +64736,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 475, + "id": 476, "interval": null, "legend": { "show": false @@ -64632,7 +64833,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -64765,7 +64966,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -64898,7 +65099,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -65031,7 +65232,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -65164,7 +65365,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -65297,7 +65498,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 481, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -65430,7 +65631,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 482, + "id": 483, "interval": null, "isNew": true, "legend": { @@ -65570,7 +65771,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 483, + "id": 484, "interval": null, "legend": { "show": false @@ -65674,7 +65875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 484, + "id": 485, "interval": null, "legend": { "show": false @@ -65771,7 +65972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 485, + "id": 486, "interval": null, "isNew": true, "legend": { @@ -65904,7 +66105,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -66037,7 +66238,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -66170,7 +66371,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66303,7 +66504,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66436,7 +66637,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 491, "interval": null, "isNew": true, "legend": { @@ -66572,7 +66773,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 492, "interval": null, "links": [], "maxDataPoints": 100, @@ -66611,7 +66812,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -66744,7 +66945,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -66877,7 +67078,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 494, + "id": 495, "interval": null, "isNew": true, "legend": { @@ -67010,7 +67211,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 495, + "id": 496, "interval": null, "isNew": true, "legend": { @@ -67146,7 +67347,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 496, + "id": 497, "interval": null, "links": [], "maxDataPoints": 100, @@ -67185,7 +67386,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 497, + "id": 498, "interval": null, "isNew": true, "legend": { @@ -67386,7 +67587,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 499, "interval": null, "isNew": true, "legend": { diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index d715ccca3519..cc9c77697556 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -ac3bba8b714ed9cad64ece88ff1f7b4bb075ca178f270e7e1f41461d5ac37bbd ./metrics/grafana/tikv_details.json +75c3d3d71080a5e3bd40273bc2250797ab929e6c6ab46df89cad79d837531a2d ./metrics/grafana/tikv_details.json From a0e8a7a163302bc9a7be5fd5a903b6a156797eb8 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 21 Dec 2023 16:34:53 +0800 Subject: [PATCH 203/203] Revert "*: make unified-pool use FuturePool (#15925)" (#16050) close tikv/tikv#16015 Revert "*: make unified-pool use FuturePool (#15925)" - revert due to performance regression Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/yatp_pool/mod.rs | 15 +-- src/read_pool.rs | 155 +++++++++++++--------- 2 files changed, 97 insertions(+), 73 deletions(-) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 0b4cffbdc146..cfdfc540b306 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -391,7 +391,7 @@ impl YatpPoolBuilder { FuturePool::from_pool(pool, &name, size, task) } - fn build_single_level_pool(self) -> ThreadPool { + pub fn build_single_level_pool(self) -> ThreadPool { let (builder, runner) = self.create_builder(); builder.build_with_queue_and_runner( yatp::queue::QueueType::SingleLevel, @@ -399,18 +399,7 @@ impl YatpPoolBuilder { ) } - pub fn build_multi_level_future_pool(self) -> FuturePool { - let name = self - .name_prefix - .clone() - .unwrap_or_else(|| "yatp_pool".to_string()); - let size = self.core_thread_count; - let task = self.max_tasks; - let pool = self.build_multi_level_pool(); - FuturePool::from_pool(pool, &name, size, task) - } - - fn build_multi_level_pool(self) -> ThreadPool { + pub fn build_multi_level_pool(self) -> ThreadPool { let name = self .name_prefix .clone() diff --git a/src/read_pool.rs b/src/read_pool.rs index 111d3f0ce8ae..2ea6c7e36b24 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -27,7 +27,10 @@ use tikv_util::{ worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; -use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; +use tracker::TrackedFuture; +use yatp::{ + metrics::MULTILEVEL_LEVEL_ELAPSED, pool::Remote, queue::Extras, task::future::TaskCell, +}; use self::metrics::*; use crate::{ @@ -53,9 +56,11 @@ pub enum ReadPool { read_pool_low: FuturePool, }, Yatp { - pool: FuturePool, - // deprecated. will remove in the v8.x. + pool: yatp::ThreadPool, running_tasks: IntGauge, + running_threads: IntGauge, + max_tasks: usize, + pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -76,11 +81,17 @@ impl ReadPool { ReadPool::Yatp { pool, running_tasks, + running_threads, + max_tasks, + pool_size, resource_ctl, time_slice_inspector, } => ReadPoolHandle::Yatp { - remote: pool.clone(), + remote: pool.remote().clone(), running_tasks: running_tasks.clone(), + running_threads: running_threads.clone(), + max_tasks: *max_tasks, + pool_size: *pool_size, resource_ctl: resource_ctl.clone(), time_slice_inspector: time_slice_inspector.clone(), }, @@ -96,8 +107,11 @@ pub enum ReadPoolHandle { read_pool_low: FuturePool, }, Yatp { - remote: FuturePool, + remote: Remote, running_tasks: IntGauge, + running_threads: IntGauge, + max_tasks: usize, + pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -132,10 +146,19 @@ impl ReadPoolHandle { ReadPoolHandle::Yatp { remote, running_tasks, + max_tasks, resource_ctl, .. } => { let running_tasks = running_tasks.clone(); + // Note that the running task number limit is not strict. + // If several tasks are spawned at the same time while the running task number + // is close to the limit, they may all pass this check and the number of running + // tasks may exceed the limit. + if running_tasks.get() as usize >= *max_tasks { + return Err(ReadPoolError::UnifiedReadPoolFull); + } + running_tasks.inc(); let fixed_level = match priority { CommandPri::High => Some(0), @@ -145,33 +168,31 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); - let running_tasks1 = running_tasks.clone(); - if let Some(resource_ctl) = resource_ctl { - let fut = with_resource_limiter( - ControlledFuture::new( - async move { - f.await; - running_tasks.dec(); - }, - resource_ctl.clone(), - group_name, - ), - resource_limiter, - ); - remote.spawn_with_extras(fut, extras).map_err(|e| { - running_tasks1.dec(); - e - })?; + let task_cell = if let Some(resource_ctl) = resource_ctl { + TaskCell::new( + TrackedFuture::new(with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_name, + ), + resource_limiter, + )), + extras, + ) } else { - let fut = async move { - f.await; - running_tasks.dec(); - }; - remote.spawn_with_extras(fut, extras).map_err(|e| { - running_tasks1.dec(); - e - })?; - } + TaskCell::new( + TrackedFuture::new(async move { + f.await; + running_tasks.dec(); + }), + extras, + ) + }; + remote.spawn(task_cell); } } Ok(()) @@ -211,7 +232,7 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { read_pool_normal, .. } => read_pool_normal.get_pool_size(), - ReadPoolHandle::Yatp { remote, .. } => remote.get_pool_size(), + ReadPoolHandle::Yatp { pool_size, .. } => *pool_size, } } @@ -221,10 +242,10 @@ impl ReadPoolHandle { read_pool_normal, .. } => read_pool_normal.get_running_task_count() / read_pool_normal.get_pool_size(), ReadPoolHandle::Yatp { - remote, running_tasks, + pool_size, .. - } => running_tasks.get() as usize / remote.get_pool_size(), + } => running_tasks.get() as usize / *pool_size, } } @@ -233,19 +254,34 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { remote, .. } => { - remote.scale_pool_size(max_thread_count); + ReadPoolHandle::Yatp { + remote, + running_threads, + max_tasks, + pool_size, + .. + } => { + remote.scale_workers(max_thread_count); + *max_tasks = max_tasks + .saturating_div(*pool_size) + .saturating_mul(max_thread_count); + running_threads.set(max_thread_count as i64); + *pool_size = max_thread_count; } } } - pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + pub fn set_max_tasks_per_worker(&mut self, tasks_per_thread: usize) { match self { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { remote, .. } => { - remote.set_max_tasks_per_worker(tasks_per_thread); + ReadPoolHandle::Yatp { + max_tasks, + pool_size, + .. + } => { + *max_tasks = tasks_per_thread.saturating_mul(*pool_size); } } } @@ -452,11 +488,6 @@ pub fn build_yatp_read_pool_with_name( config.max_thread_count, ), ) - .max_tasks( - config - .max_tasks_per_worker - .saturating_mul(config.max_thread_count), - ) .after_start(move || { let engine = raftkv.lock().unwrap().clone(); set_tls_engine(engine); @@ -468,15 +499,21 @@ pub fn build_yatp_read_pool_with_name( .enable_task_wait_metrics(enable_task_wait_metrics); let pool = if let Some(ref r) = resource_ctl { - builder.build_priority_future_pool(r.clone()) + builder.build_priority_pool(r.clone()) } else { - builder.build_multi_level_future_pool() + builder.build_multi_level_pool() }; let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS .with_label_values(&[&unified_read_pool_name]), + running_threads: UNIFIED_READ_POOL_RUNNING_THREADS + .with_label_values(&[&unified_read_pool_name]), + max_tasks: config + .max_tasks_per_worker + .saturating_mul(config.max_thread_count), + pool_size: config.max_thread_count, resource_ctl, time_slice_inspector, } @@ -754,6 +791,12 @@ mod metrics { &["name"] ) .unwrap(); + pub static ref UNIFIED_READ_POOL_RUNNING_THREADS: IntGaugeVec = register_int_gauge_vec!( + "tikv_unified_read_pool_thread_count", + "The number of running threads in the unified read pool", + &["name"] + ) + .unwrap(); } } @@ -762,8 +805,6 @@ mod tests { use std::{thread, time::Duration}; use futures::channel::oneshot; - use futures_executor::block_on; - use kvproto::kvrpcpb::ResourceControlContext; use raftstore::store::{ReadStats, WriteStats}; use resource_control::ResourceGroupManager; @@ -823,7 +864,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); @@ -884,7 +925,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -897,7 +938,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } } @@ -946,18 +987,12 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } + // TODO: move running task by priority to read_pool. // spawn a high-priority task, should not return Full error. - let (task_high, tx_h) = gen_task(); - let mut ctx = ResourceControlContext::default(); - ctx.override_priority = 16; // high priority - let metadata = TaskMetadata::from_ctx(&ctx); - let f = handle.spawn_handle(task_high, CommandPri::Normal, 6, metadata, None); - tx_h.send(()).unwrap(); - block_on(f).unwrap(); tx1.send(()).unwrap(); tx2.send(()).unwrap(); @@ -972,7 +1007,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } }