From 2f2fae900787ca9d663380b05bb25b52d7e07f50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Tue, 26 Nov 2024 14:41:40 +0100 Subject: [PATCH 1/2] chore(dre): Updating the list of cordoned DCs with FM1, SJ1, AT1, and TP1 (#1117) --- cordoned_features.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cordoned_features.yaml b/cordoned_features.yaml index 5f949a129..23338c3d3 100644 --- a/cordoned_features.yaml +++ b/cordoned_features.yaml @@ -9,3 +9,16 @@ features: # https://dfinity.slack.com/archives/C05LD0CEAHY/p1730385735765399 - feature: data_center value: dl1 + explanation: "offboarding DL1 DC after 48 months" + - feature: data_center + value: fm1 + explanation: "offboarding FM1 DC after 48 months" + - feature: data_center + value: sj1 + explanation: "offboarding SJ1 DC after 48 months" + - feature: data_center + value: at1 + explanation: "offboarding AT1 DC after 48 months" + - feature: data_center + value: tp1 + explanation: "offboarding TP1 DC after 48 months" From ba28578444280ff4658f6d24a7014665da4e4dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Tomi=C4=87?= Date: Tue, 26 Nov 2024 15:01:05 +0100 Subject: [PATCH 2/2] feat: update cordoned features handling and support for local file (#1119) --- BUILD.bazel | 1 + rs/cli/BUILD.bazel | 7 +++- rs/cli/src/commands/mod.rs | 4 +- rs/cli/src/cordoned_feature_fetcher.rs | 42 +++++++++++-------- rs/cli/src/ctx.rs | 2 +- rs/cli/src/runner.rs | 4 +- rs/cli/src/store.rs | 30 ++++++++----- .../unit_tests/cordoned_feature_fetcher.rs | 16 +++---- rs/cli/src/unit_tests/replace.rs | 7 ++-- rs/decentralization/src/network.rs | 15 +++---- 10 files changed, 75 insertions(+), 53 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 5039c9edb..eb9642db3 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -4,6 +4,7 @@ exports_files([ "archive_canister.wasm.gz", "clippy.toml", "rustfmt.toml", + "cordoned_features.yaml", "WORKSPACE.bazel", ]) diff --git a/rs/cli/BUILD.bazel b/rs/cli/BUILD.bazel index 9ed9be89d..de19a42c3 100644 --- a/rs/cli/BUILD.bazel +++ b/rs/cli/BUILD.bazel @@ -1,6 +1,6 @@ load("@crate_index_dre//:defs.bzl", "aliases", "all_crate_deps") load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_test", "rust_library") -load("@rules_rust//cargo:defs.bzl", "cargo_build_script", "cargo_dep_env") +load("@rules_rust//cargo:defs.bzl", "cargo_build_script") DEPS = [ "//rs/ic-canisters", @@ -34,7 +34,10 @@ rust_binary( ), deps = all_crate_deps( normal = True, - ) + DEPS + ["//rs/cli:dre-lib", ":build_script"] + ) + DEPS + ["//rs/cli:dre-lib", ":build_script"], + data = [ + "//:cordoned_features.yaml", + ] ) rust_library( diff --git a/rs/cli/src/commands/mod.rs b/rs/cli/src/commands/mod.rs index 658f723a3..2eb3a6288 100644 --- a/rs/cli/src/commands/mod.rs +++ b/rs/cli/src/commands/mod.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use crate::commands::subnet::Subnet; use api_boundary_nodes::ApiBoundaryNodes; use clap::Args as ClapArgs; @@ -212,7 +210,7 @@ The argument is mandatory for testnets, and is optional for mainnet and staging" /// Path to file which contains cordoned features #[clap(long, global = true, visible_aliases = &["cf-file", "cfff"])] - pub cordon_feature_fallback_file: Option, + pub cordoned_features_file: Option, } // Do not use outside of DRE CLI. diff --git a/rs/cli/src/cordoned_feature_fetcher.rs b/rs/cli/src/cordoned_feature_fetcher.rs index 60c78d12b..2306dc30f 100644 --- a/rs/cli/src/cordoned_feature_fetcher.rs +++ b/rs/cli/src/cordoned_feature_fetcher.rs @@ -1,6 +1,6 @@ use std::{path::PathBuf, time::Duration}; -use decentralization::network::NodeFeaturePair; +use decentralization::network::CordonedFeature; use futures::future::BoxFuture; use ic_management_types::NodeFeature; use itertools::Itertools; @@ -11,10 +11,10 @@ use strum::VariantNames; #[automock] pub trait CordonedFeatureFetcher: Sync + Send { - fn fetch(&self) -> BoxFuture<'_, anyhow::Result>>; + fn fetch(&self) -> BoxFuture<'_, anyhow::Result>>; #[cfg(test)] - fn parse_outer(&self, contents: &[u8]) -> anyhow::Result>; + fn parse_outer(&self, contents: &[u8]) -> anyhow::Result>; } pub struct CordonedFeatureFetcherImpl { @@ -24,19 +24,23 @@ pub struct CordonedFeatureFetcherImpl { // fetch from github. If github is // unreachable, this cache will be used local_copy: PathBuf, - offline: bool, + use_local_file: bool, // By default, cordoned features are fetched from github } const CORDONED_FEATURES_FILE_URL: &str = "https://raw.githubusercontent.com/dfinity/dre/refs/heads/main/cordoned_features.yaml"; impl CordonedFeatureFetcherImpl { - pub fn new(local_copy: PathBuf, offline: bool) -> anyhow::Result { + pub fn new(local_copy: PathBuf, use_local_file: bool) -> anyhow::Result { let client = ClientBuilder::new().timeout(Duration::from_secs(10)).build()?; - Ok(Self { client, local_copy, offline }) + Ok(Self { + client, + local_copy, + use_local_file, + }) } - async fn fetch_from_git(&self) -> anyhow::Result> { + async fn fetch_from_git(&self) -> anyhow::Result> { let bytes = self .client .get(CORDONED_FEATURES_FILE_URL) @@ -58,13 +62,13 @@ impl CordonedFeatureFetcherImpl { self.parse(&bytes) } - fn fetch_from_file(&self) -> anyhow::Result> { + fn fetch_from_file(&self) -> anyhow::Result> { let contents = std::fs::read(&self.local_copy)?; self.parse(&contents) } - fn parse(&self, contents: &[u8]) -> anyhow::Result> { + fn parse(&self, contents: &[u8]) -> anyhow::Result> { let valid_yaml = serde_yaml::from_slice::(contents)?; let features = match valid_yaml.get("features") { @@ -78,7 +82,7 @@ impl CordonedFeatureFetcherImpl { let mut valid_features = vec![]; for feature in features { - valid_features.push(NodeFeaturePair { + valid_features.push(CordonedFeature { feature: feature .get("feature") .map(|value| { @@ -90,7 +94,7 @@ impl CordonedFeatureFetcherImpl { ) }) }) - .ok_or(anyhow::anyhow!("Expected `feature` key to be present. Got: \n{:?}", feature))??, + .ok_or(anyhow::anyhow!("Expected `feature` field to be present. Got: \n{:?}", feature))??, value: feature .get("value") .map(|value| { @@ -102,7 +106,8 @@ impl CordonedFeatureFetcherImpl { )) .map(|s| s.to_string()) }) - .ok_or(anyhow::anyhow!("Expected `value` key to be present. Got: \n{:?}", feature))??, + .ok_or(anyhow::anyhow!("Expected `value` field to be present. Got: \n{:?}", feature))??, + explanation: feature.get("explanation").and_then(|value| value.as_str().map(|s| s.to_string())), }); } @@ -111,12 +116,13 @@ impl CordonedFeatureFetcherImpl { } impl CordonedFeatureFetcher for CordonedFeatureFetcherImpl { - fn fetch(&self) -> BoxFuture<'_, anyhow::Result>> { + fn fetch(&self) -> BoxFuture<'_, anyhow::Result>> { Box::pin(async { - if self.offline { - // Offline mode specified, use cache - info!("In offline mode, cordoned features will be loaded from cache"); - info!("Cache path for cordoned features: {}", self.local_copy.display()); + if self.use_local_file { + info!( + "Received request to load cordoned features from local cache path: {}", + self.local_copy.display() + ); self.fetch_from_file() } else { self.fetch_from_git().await @@ -125,7 +131,7 @@ impl CordonedFeatureFetcher for CordonedFeatureFetcherImpl { } #[cfg(test)] - fn parse_outer(&self, contents: &[u8]) -> anyhow::Result> { + fn parse_outer(&self, contents: &[u8]) -> anyhow::Result> { self.parse(contents) } } diff --git a/rs/cli/src/ctx.rs b/rs/cli/src/ctx.rs index 331b3bc10..c1cbca63a 100644 --- a/rs/cli/src/ctx.rs +++ b/rs/cli/src/ctx.rs @@ -116,7 +116,7 @@ impl DreContext { args.subcommands.require_auth(), args.forum_post_link.clone(), args.ic_admin_version.clone(), - store.cordoned_features_fetcher()?, + store.cordoned_features_fetcher(args.cordoned_features_file.clone())?, store.health_client(&network)?, store, args.discourse_opts.clone(), diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index 2fb464e7f..5b6458ae5 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -3,9 +3,9 @@ use std::collections::HashMap; use std::sync::Arc; use ahash::AHashMap; +use decentralization::network::CordonedFeature; use decentralization::network::DecentralizedSubnet; use decentralization::network::NetworkHealRequest; -use decentralization::network::NodeFeaturePair; use decentralization::network::SubnetChange; use decentralization::network::SubnetChangeRequest; use decentralization::network::SubnetQueryBy; @@ -648,7 +648,7 @@ impl Runner { health_of_nodes: &IndexMap, node: &Node, ensure_assigned: bool, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], ) -> Option { let mut best_change: Option = None; diff --git a/rs/cli/src/store.rs b/rs/cli/src/store.rs index acfb8c1fc..b2d96d8cd 100644 --- a/rs/cli/src/store.rs +++ b/rs/cli/src/store.rs @@ -284,23 +284,33 @@ impl Store { #[cfg(test)] pub fn cordoned_features_file_outer(&self) -> anyhow::Result { - self.cordoned_features_file() + self.cordoned_features_file(None) } - fn cordoned_features_file(&self) -> anyhow::Result { - let file = self.path().join("cordoned_features.yaml"); + fn cordoned_features_file(&self, file_path: Option) -> anyhow::Result { + let file = match file_path { + Some(path) => std::path::PathBuf::from(path).canonicalize()?, + None => { + let file = self.path().join("cordoned_features.yaml"); - if !file.exists() { - info!("Cordoned features file was missing. Creating on path `{}`...", file.display()); - fs_err::write(&file, "")?; - } + if !file.exists() { + info!("Cordoned features file was missing. Creating on path `{}`...", file.display()); + fs_err::write(&file, "")?; + } + + file + } + }; Ok(file) } - pub fn cordoned_features_fetcher(&self) -> anyhow::Result> { - let file = self.cordoned_features_file()?; - Ok(Arc::new(CordonedFeatureFetcherImpl::new(file, self.is_offline())?)) + pub fn cordoned_features_fetcher(&self, local_file_path: Option) -> anyhow::Result> { + let file = self.cordoned_features_file(local_file_path.clone())?; + Ok(Arc::new(CordonedFeatureFetcherImpl::new( + file, + self.is_offline() || local_file_path.is_some(), + )?)) } #[cfg(test)] diff --git a/rs/cli/src/unit_tests/cordoned_feature_fetcher.rs b/rs/cli/src/unit_tests/cordoned_feature_fetcher.rs index 62cb6d204..748286b73 100644 --- a/rs/cli/src/unit_tests/cordoned_feature_fetcher.rs +++ b/rs/cli/src/unit_tests/cordoned_feature_fetcher.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use decentralization::network::NodeFeaturePair; +use decentralization::network::CordonedFeature; use ic_management_types::NodeFeature; use itertools::Itertools; use serde_yaml::Mapping; @@ -11,7 +11,7 @@ fn ensure_empty(file: PathBuf) { std::fs::write(file, "").unwrap(); } -fn write_to_cache(contents: &[NodeFeaturePair], path: PathBuf) { +fn write_to_cache(contents: &[CordonedFeature], path: PathBuf) { let mut mapping = Mapping::new(); mapping.insert("features".into(), serde_yaml::to_value(contents).unwrap()); let root = serde_yaml::Value::Mapping(mapping); @@ -23,7 +23,7 @@ fn write_to_cache(contents: &[NodeFeaturePair], path: PathBuf) { struct TestScenario { name: String, offline: bool, - cache_contents: Option>, + cache_contents: Option>, should_succeed: bool, } @@ -52,7 +52,7 @@ impl TestScenario { } } - fn with_cache(self, pairs: &[NodeFeaturePair]) -> Self { + fn with_cache(self, pairs: &[CordonedFeature]) -> Self { Self { cache_contents: Some(pairs.to_vec()), ..self @@ -82,16 +82,18 @@ fn cordoned_feature_fetcher_tests() { TestScenario::new("[Offline] No cache").no_cache().offline().should_fail(), TestScenario::new("[Offline] Fetch from cache") .offline() - .with_cache(&[NodeFeaturePair { + .with_cache(&[CordonedFeature { feature: NodeFeature::NodeProvider, value: "some-np".to_string(), + explanation: None, }]) .should_succeed(), TestScenario::new("[Online] Stale cache") .online() - .with_cache(&[NodeFeaturePair { + .with_cache(&[CordonedFeature { feature: NodeFeature::NodeProvider, value: "some-np".to_string(), + explanation: None, }]) .should_succeed(), ]; @@ -107,7 +109,7 @@ fn cordoned_feature_fetcher_tests() { None => ensure_empty(store.cordoned_features_file_outer().unwrap()), } - let cordoned_feature_fetcher = store.cordoned_features_fetcher().unwrap(); + let cordoned_feature_fetcher = store.cordoned_features_fetcher(None).unwrap(); let maybe_cordoned_features = runtime.block_on(cordoned_feature_fetcher.fetch()); diff --git a/rs/cli/src/unit_tests/replace.rs b/rs/cli/src/unit_tests/replace.rs index 36a860fa6..64d1c11a7 100644 --- a/rs/cli/src/unit_tests/replace.rs +++ b/rs/cli/src/unit_tests/replace.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use decentralization::{ - network::{DecentralizedSubnet, NodeFeaturePair}, + network::{CordonedFeature, DecentralizedSubnet}, SubnetChangeResponse, }; use ic_management_backend::{health::MockHealthStatusQuerier, lazy_registry::MockLazyRegistry}; @@ -54,10 +54,11 @@ fn subnet(id: u64, nodes: &[Node]) -> DecentralizedSubnet { } } -fn cordoned_feature(feature: NodeFeature, value: &str) -> NodeFeaturePair { - NodeFeaturePair { +fn cordoned_feature(feature: NodeFeature, value: &str) -> CordonedFeature { + CordonedFeature { feature, value: value.to_string(), + explanation: None, } } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 311ce74d2..7ab8f44bc 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -824,7 +824,7 @@ pub trait TopologyManager: SubnetQuerier + AvailableNodesQuerier + Sync { exclude_nodes: Vec, only_nodes: Vec, health_of_nodes: &'a IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &'a [Node], ) -> BoxFuture<'a, Result> { Box::pin(async move { @@ -883,9 +883,10 @@ impl> MatchAnyNode for std::slice::Iter<'_, T> { } #[derive(Debug, Clone, PartialEq, Eq, Serialize)] -pub struct NodeFeaturePair { +pub struct CordonedFeature { pub feature: NodeFeature, pub value: String, + pub explanation: Option, } #[derive(Default, Clone, Debug)] @@ -980,7 +981,7 @@ impl SubnetChangeRequest { optimize_count: usize, replacements_unhealthy: &[Node], health_of_nodes: &IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], ) -> Result { let old_nodes = self.subnet.nodes.clone(); @@ -999,7 +1000,7 @@ impl SubnetChangeRequest { pub fn rescue( mut self, health_of_nodes: &IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], ) -> Result { let old_nodes = self.subnet.nodes.clone(); @@ -1031,7 +1032,7 @@ impl SubnetChangeRequest { how_many_nodes_to_remove: usize, how_many_nodes_unhealthy: usize, health_of_nodes: &IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], ) -> Result { let old_nodes = self.subnet.nodes.clone(); @@ -1133,7 +1134,7 @@ impl SubnetChangeRequest { pub fn evaluate( self, health_of_nodes: &IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], ) -> Result { self.resize(0, 0, 0, health_of_nodes, cordoned_features, all_nodes) @@ -1254,7 +1255,7 @@ impl NetworkHealRequest { &self, mut available_nodes: Vec, health_of_nodes: &IndexMap, - cordoned_features: Vec, + cordoned_features: Vec, all_nodes: &[Node], optimize_for_business_rules_compliance: bool, ) -> Result, NetworkError> {