From 0d34331bb8e6894a27482fff93ff4d40e337917e Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Tue, 19 Mar 2024 15:50:25 -0700 Subject: [PATCH] feat(pd): support archives for migrate and join Enables opt-in archive generation when performing: * pd export * pd migrate The goal is to provide a standardized bottling-up of pd state, specifically the rocksdb directory. In the context of upgrades, only the "pd migrate" functionality change is what we care about: we want the archived dir to contain both rocksdb data and the modified genesis file. Accordingly, `pd testnet join` is modified to support an optional archive URL. If set, the remote tar.gz archive will be downloaded and extracted, clobbering the cometbft config. A remote bootstrap node is still contacted, to learn about peers, otherwise the newly created node wouldn't be able to talk to the network. (cherry picked from commit 27e18e34ce5ddddce57a94c705b2dcc30fe2172e) --- Cargo.lock | 51 +++++++++++++++++ crates/bin/pd/Cargo.toml | 4 +- crates/bin/pd/src/cli.rs | 26 +++++++-- crates/bin/pd/src/main.rs | 91 ++++++++++++++++++++++--------- crates/bin/pd/src/migrate.rs | 48 +++++++++++++--- crates/bin/pd/src/testnet/join.rs | 79 +++++++++++++++++++++++++++ 6 files changed, 261 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb1bae0c6f..a0ffd224fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2363,6 +2363,18 @@ dependencies = [ "subtle", ] +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.52.0", +] + [[package]] name = "fixed-hash" version = "0.8.0" @@ -4548,6 +4560,7 @@ dependencies = [ "decaf377-rdsa", "directories", "ed25519-consensus", + "flate2", "fs_extra", "futures", "hex", @@ -4596,6 +4609,7 @@ dependencies = [ "serde_json", "serde_with", "sha2 0.10.8", + "tar", "tempfile", "tendermint", "tendermint-config", @@ -6650,10 +6664,12 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls 0.24.1", + "tokio-util 0.7.10", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "winreg", ] @@ -7706,6 +7722,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.10.0" @@ -8671,6 +8698,19 @@ version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.68" @@ -8946,6 +8986,17 @@ dependencies = [ "time 0.3.34", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys 0.4.13", + "rustix 0.38.31", +] + [[package]] name = "yasna" version = "0.5.2" diff --git a/crates/bin/pd/Cargo.toml b/crates/bin/pd/Cargo.toml index 1ff147f4cb..9951853bb1 100644 --- a/crates/bin/pd/Cargo.toml +++ b/crates/bin/pd/Cargo.toml @@ -50,6 +50,7 @@ decaf377 = { workspace = true, features = ["parallel"], decaf377-rdsa = { workspace = true } directories = { workspace = true } ed25519-consensus = { workspace = true } +flate2 = "1.0.28" fs_extra = "1.3.0" futures = { workspace = true } hex = { workspace = true } @@ -91,12 +92,13 @@ rand = { workspace = true } rand_chacha = { workspace = true } rand_core = { workspace = true, features = ["getrandom"] } regex = { workspace = true } -reqwest = { version = "0.11", features = ["json"] } +reqwest = { version = "0.11", features = ["json", "stream"] } rocksdb = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } serde_with = { workspace = true, features = ["hex"] } sha2 = { workspace = true } +tar = "0.4.40" tempfile = { workspace = true } tendermint = { workspace = true } tendermint-config = { workspace = true } diff --git a/crates/bin/pd/src/cli.rs b/crates/bin/pd/src/cli.rs index 3cb1f54fbf..72977b9542 100644 --- a/crates/bin/pd/src/cli.rs +++ b/crates/bin/pd/src/cli.rs @@ -113,9 +113,13 @@ pub enum RootCommand { /// The home directory of the full node. #[clap(long, env = "PENUMBRA_PD_HOME", display_order = 100)] home: PathBuf, - /// The directory that the exported state will be written to. + /// The directory where the exported node state will be written. + #[clap(long, display_order = 200, alias = "export-path")] + export_directory: PathBuf, + /// An optional filepath for a compressed archive containing the exported + /// node state, e.g. ~/pd-backup.tar.gz. #[clap(long, display_order = 200)] - export_path: PathBuf, + export_archive: Option, /// Whether to prune the JMT tree. #[clap(long, display_order = 300)] prune: bool, @@ -123,13 +127,18 @@ pub enum RootCommand { /// Run a migration on the exported storage state of the full node, /// and create a genesis file. Migrate { - /// The directory containing exported state to which the upgrade will be applied. - #[clap(long, display_order = 200)] - target_dir: PathBuf, + /// The directory containing exported state, created via `pd export`, to be modified + /// in-place. This should be a pd home directory, with a subdirectory called "rocksdb". + #[clap(long, display_order = 200, alias = "target-dir")] + target_directory: PathBuf, #[clap(long, display_order = 300)] /// Timestamp of the genesis file in RFC3339 format. If unset, defaults to the current time, /// unless the migration logic overrides it. genesis_start: Option, + /// An optional filepath for a compressed archive containing the migrated node state, + /// e.g. ~/pd-state-post-upgrade.tar.gz. + #[clap(long, display_order = 400)] + migrate_archive: Option, }, } @@ -197,6 +206,13 @@ pub enum TestnetCommand { default_value = "https://rpc.testnet.penumbra.zone" )] node: Url, + + /// Optional URL of archived node state in .tar.gz format. The archive will be + /// downloaded and extracted locally, allowing the node to join a network at a block height + /// higher than 0. + #[clap(long)] + archive_url: Option, + /// Human-readable name to identify node on network // Default: 'node-#' #[clap(long, env = "PENUMBRA_PD_TM_MONIKER")] diff --git a/crates/bin/pd/src/main.rs b/crates/bin/pd/src/main.rs index 29d2ecbe7e..b4bd928966 100644 --- a/crates/bin/pd/src/main.rs +++ b/crates/bin/pd/src/main.rs @@ -255,6 +255,7 @@ async fn main() -> anyhow::Result<()> { tn_cmd: TestnetCommand::Join { node, + archive_url, moniker, external_address, tendermint_rpc_bind, @@ -290,7 +291,7 @@ async fn main() -> anyhow::Result<()> { // Join the target testnet, looking up network info and writing // local configs for pd and tendermint. testnet_join( - output_dir, + output_dir.clone(), node, &node_name, external_address, @@ -298,6 +299,11 @@ async fn main() -> anyhow::Result<()> { tendermint_p2p_bind, ) .await?; + + // Download and extract archive URL, if set. + if let Some(archive_url) = archive_url { + pd::testnet::join::unpack_state_archive(archive_url, output_dir).await?; + } } RootCommand::Testnet { @@ -379,44 +385,79 @@ async fn main() -> anyhow::Result<()> { t.write_configs()?; } RootCommand::Export { - mut home, - mut export_path, + home, + export_directory, + export_archive, prune, } => { use fs_extra; - tracing::info!("exporting state to {}", export_path.display()); + // Export state as directory. + let src_rocksdb_dir = home.join("rocksdb"); + tracing::info!( + "copying node state {} -> {}", + src_rocksdb_dir.display(), + export_directory.display() + ); + std::fs::create_dir_all(&export_directory)?; let copy_opts = fs_extra::dir::CopyOptions::new(); - home.push("rocksdb"); - let from = [home.as_path()]; - tracing::info!(?home, ?export_path, "copying from data dir to export dir",); - std::fs::create_dir_all(&export_path)?; - fs_extra::copy_items(&from, export_path.as_path(), ©_opts)?; - - tracing::info!("done copying"); - if !prune { - return Ok(()); + fs_extra::copy_items( + &[src_rocksdb_dir.as_path()], + export_directory.as_path(), + ©_opts, + )?; + tracing::info!("finished copying node state"); + + let dst_rocksdb_dir = export_directory.join("rocksdb"); + // If prune=true, then export-directory is required, because we must munge state prior + // to compressing. So we'll just mandate the presence of the --export-directory arg + // always. + if prune { + tracing::info!("pruning JMT tree"); + let export = Storage::load(dst_rocksdb_dir, SUBSTORE_PREFIXES.to_vec()).await?; + let _ = StateDelta::new(export.latest_snapshot()); + // TODO: + // - add utilities in `cnidarium` to prune a tree + // - apply the delta to the exported storage + // - apply checks: root hash, size, etc. + todo!() } - tracing::info!("pruning JMT tree"); - export_path.push("rocksdb"); - let export = Storage::load(export_path, SUBSTORE_PREFIXES.to_vec()).await?; - let _ = StateDelta::new(export.latest_snapshot()); - // TODO: - // - add utilities in `cnidarium` to prune a tree - // - apply the delta to the exported storage - // - apply checks: root hash, size, etc. - todo!() + // Compress to tarball if requested. + if let Some(archive_filepath) = export_archive { + pd::migrate::archive_directory( + dst_rocksdb_dir.clone(), + archive_filepath.clone(), + Some("rocksdb".to_owned()), + )?; + tracing::info!("export complete: {}", archive_filepath.display()); + } else { + // Provide friendly "OK" message that's still accurate without archiving. + tracing::info!("export complete: {}", export_directory.display()); + } } RootCommand::Migrate { - target_dir, + target_directory, genesis_start, + migrate_archive, } => { - tracing::info!("migrating state from {}", target_dir.display()); + tracing::info!("migrating state in {}", target_directory.display()); SimpleMigration - .migrate(target_dir.clone(), genesis_start) + .migrate(target_directory.clone(), genesis_start) .await .context("failed to upgrade state")?; + // Compress to tarball if requested. + if let Some(archive_filepath) = migrate_archive { + pd::migrate::archive_directory( + target_directory.clone(), + archive_filepath.clone(), + None, + )?; + tracing::info!("migration complete: {}", archive_filepath.display()); + } else { + // Provide friendly "OK" message that's still accurate without archiving. + tracing::info!("migration complete: {}", target_directory.display()); + } } } Ok(()) diff --git a/crates/bin/pd/src/migrate.rs b/crates/bin/pd/src/migrate.rs index 4a9cac548c..04feef53f4 100644 --- a/crates/bin/pd/src/migrate.rs +++ b/crates/bin/pd/src/migrate.rs @@ -4,6 +4,7 @@ //! node operators must coordinate to perform a chain upgrade. //! This module declares how local `pd` state should be altered, if at all, //! in order to be compatible with the network post-chain-upgrade. +use anyhow::Context; use std::path::PathBuf; use cnidarium::{StateDelta, StateWrite, Storage}; @@ -16,6 +17,10 @@ use penumbra_stake::{ use crate::testnet::generate::TestnetConfig; +use flate2::write::GzEncoder; +use flate2::Compression; +use std::fs::File; + /// The kind of migration that should be performed. pub enum Migration { /// No-op migration. @@ -36,9 +41,8 @@ impl Migration { match self { Migration::Noop => (), Migration::SimpleMigration => { - let mut db_path = path_to_export.clone(); - db_path.push("rocksdb"); - let storage = Storage::load(db_path, SUBSTORE_PREFIXES.to_vec()).await?; + let rocksdb_dir = path_to_export.join("rocksdb"); + let storage = Storage::load(rocksdb_dir, SUBSTORE_PREFIXES.to_vec()).await?; let export_state = storage.latest_snapshot(); let root_hash = export_state.root_hash().await.expect("can get root hash"); let app_hash_pre_migration: RootHash = root_hash.into(); @@ -97,12 +101,10 @@ impl Migration { let genesis_json = serde_json::to_string(&genesis).expect("can serialize genesis"); tracing::info!("genesis: {}", genesis_json); - let mut genesis_path = path_to_export.clone(); - genesis_path.push("genesis.json"); + let genesis_path = path_to_export.join("genesis.json"); std::fs::write(genesis_path, genesis_json).expect("can write genesis"); - let mut validator_state_path = path_to_export.clone(); - validator_state_path.push("priv_validator_state.json"); + let validator_state_path = path_to_export.join("priv_validator_state.json"); let fresh_validator_state = crate::testnet::generate::TestnetValidator::initial_state(); std::fs::write(validator_state_path, fresh_validator_state) @@ -113,3 +115,35 @@ impl Migration { Ok(()) } } + +/// Compress single directory to gzipped tar archive. Accepts an Option for naming +/// the subdir within the tar archive, which defaults to ".", meaning no nesting. +pub fn archive_directory( + src_directory: PathBuf, + archive_filepath: PathBuf, + subdir_within_archive: Option, +) -> anyhow::Result<()> { + // Don't clobber an existing target archive. + if archive_filepath.exists() { + tracing::error!( + "export archive filepath already exists: {}", + archive_filepath.display() + ); + anyhow::bail!("refusing to overwrite existing archive"); + } + + tracing::info!( + "creating archive {} -> {}", + src_directory.display(), + archive_filepath.display() + ); + let tarball_file = File::create(&archive_filepath) + .context("failed to create file for archive: check parent directory and permissions")?; + let enc = GzEncoder::new(tarball_file, Compression::default()); + let mut tarball = tar::Builder::new(enc); + let subdir_within_archive = subdir_within_archive.unwrap_or(String::from(".")); + tarball + .append_dir_all(subdir_within_archive, src_directory.as_path()) + .context("failed to package archive contents")?; + Ok(()) +} diff --git a/crates/bin/pd/src/testnet/join.rs b/crates/bin/pd/src/testnet/join.rs index d9d9fb772d..525390524c 100644 --- a/crates/bin/pd/src/testnet/join.rs +++ b/crates/bin/pd/src/testnet/join.rs @@ -8,6 +8,10 @@ use std::path::PathBuf; use tendermint_config::net::Address as TendermintAddress; use url::Url; +use flate2::read::GzDecoder; +use std::io::Write; +use tokio_stream::StreamExt; + use crate::testnet::config::{parse_tm_address, TestnetTendermintConfig}; use crate::testnet::generate::TestnetValidator; @@ -230,6 +234,81 @@ pub async fn fetch_peers(tm_url: &Url) -> anyhow::Result> Ok(seeds) } +/// Download a gzipped tarball from a URL, and extract its contents as the starting state +/// config for the fullnode. Allows bootstrapping from archived state, which is useful +/// for nodes joining after a chain upgrade has been performed. +/// +/// Supports archive files generated via `pd export`, which contain only the rocksdb dir, +/// and via `pd migrate`, which contain the rocksdb dir, new genesis content, and a private +/// validator state file. +/// +/// The `output_dir` should be the same argument as passed to `pd testnet --testnet-dir join`; +/// relative paths for pd and cometbft will be created from this base path. +pub async fn unpack_state_archive(archive_url: Url, output_dir: PathBuf) -> anyhow::Result<()> { + tracing::info!(%archive_url, "downloading compressed node state"); + // Download. + // Here we inspect HEAD so we can infer filename. + let response = reqwest::get(archive_url).await?; + let fname = response + .url() + .path_segments() + .and_then(|segments| segments.last()) + .and_then(|name| if name.is_empty() { None } else { Some(name) }) + .unwrap_or("pd-node-state-archive.tar.gz"); + + let archive_filepath = output_dir.join(fname); + let mut download_opts = std::fs::OpenOptions::new(); + download_opts.create_new(true).write(true); + let mut archive_file = download_opts.open(&archive_filepath)?; + + // Download via stream, in case file is too large to shove into RAM. + let mut stream = response.bytes_stream(); + while let Some(chunk_result) = stream.next().await { + let chunk = chunk_result?; + archive_file.write_all(&chunk)?; + } + archive_file.flush()?; + tracing::info!("download complete: {}", archive_filepath.display()); + + // Extract. + // Re-open downloaded file for unpacking, for a fresh filehandle. + let mut unpack_opts = std::fs::OpenOptions::new(); + unpack_opts.read(true); + let f = unpack_opts.open(&archive_filepath)?; + let tar = GzDecoder::new(f); + let mut archive = tar::Archive::new(tar); + // This dir-path building is duplicated in the config gen code. + let pd_home = output_dir.join("node0").join("pd"); + archive + .unpack(&pd_home) + .context("failed to extract tar.gz archive")?; + + // If the archive we consumed was generated via `pd migrate`, then it will contain + // a new genesis file and priv_validator_state.json, both of which should be applied + // over the generted cometbft config files. If the archive was generated via `pd export`, + // then those extra files will be missing, and only rocksdb data will be present. + let new_genesis = pd_home.join("genesis.json"); + let new_val_state = pd_home.join("priv_validator_state.json"); + let cometbft_dir = output_dir.join("node0").join("cometbft"); + let copy_opts = fs_extra::dir::CopyOptions::new().overwrite(true); + + if new_genesis.exists() { + tracing::info!(new_genesis = %new_genesis.display(), "copying new genesis content from archive"); + let f = vec![new_genesis]; + fs_extra::move_items(&f, cometbft_dir.join("config"), ©_opts)?; + } + if new_val_state.exists() { + tracing::info!(new_val_state = %new_val_state.display(), "copying new priv_validator_state.json content from archive"); + let f = vec![new_val_state]; + fs_extra::move_items(&f, cometbft_dir.join("data"), ©_opts)?; + } + + tracing::info!("archived node state unpacked to {}", pd_home.display()); + // Post-extraction, clean up the downloaded tarball. + std::fs::remove_file(archive_filepath)?; + Ok(()) +} + /// Check whether SocketAddress spec is likely to be externally-accessible. /// Filters out RFC1918 and loopback addresses. Requires an address and port. // TODO: This should return a Result, to be clearer about the expectation