From e74d4299fa98853f78b84896af97d033cdf4b419 Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Tue, 19 Mar 2024 17:00:00 -0700 Subject: [PATCH] feat(pd): add --archive-url flag to pd testnet join Support ingesting a remote URL for downloading and extracting a .tar.gz file, that unpacks into a rocksdb dir for pd. Not yet supported is any notion of copying a genesis file out of the archive. Should we default to that behavior? Make it optional? --- Cargo.lock | 15 ++++++++++ crates/bin/pd/Cargo.toml | 2 +- crates/bin/pd/src/cli.rs | 7 +++++ crates/bin/pd/src/main.rs | 62 +++++++++++++++++++++++++++++++++++---- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc9e4db235..4dc288f429 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6663,10 +6663,12 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls 0.24.1", + "tokio-util 0.7.10", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "winreg", ] @@ -8695,6 +8697,19 @@ version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.68" diff --git a/crates/bin/pd/Cargo.toml b/crates/bin/pd/Cargo.toml index d8cc14bc77..517906b75d 100644 --- a/crates/bin/pd/Cargo.toml +++ b/crates/bin/pd/Cargo.toml @@ -92,7 +92,7 @@ rand = { workspace = true } rand_chacha = { workspace = true } rand_core = { workspace = true, features = ["getrandom"] } regex = { workspace = true } -reqwest = { version = "0.11", features = ["json"] } +reqwest = { version = "0.11", features = ["json", "stream"] } rocksdb = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/bin/pd/src/cli.rs b/crates/bin/pd/src/cli.rs index 0a5af216a9..fd0b142742 100644 --- a/crates/bin/pd/src/cli.rs +++ b/crates/bin/pd/src/cli.rs @@ -201,6 +201,13 @@ pub enum TestnetCommand { default_value = "https://rpc.testnet.penumbra.zone" )] node: Url, + + /// Optional URL of archived node state in .tar.gz format. The archive will be + /// downloaded and extracted locally, allowing the node to join a network at a block height + /// higher than 0. + #[clap(long)] + archive_url: Option, + /// Human-readable name to identify node on network // Default: 'node-#' #[clap(long, env = "PENUMBRA_PD_TM_MONIKER")] diff --git a/crates/bin/pd/src/main.rs b/crates/bin/pd/src/main.rs index aacecfd03d..7207bfba12 100644 --- a/crates/bin/pd/src/main.rs +++ b/crates/bin/pd/src/main.rs @@ -1,8 +1,13 @@ #![allow(clippy::clone_on_copy)] #![deny(clippy::unwrap_used)] #![recursion_limit = "512"] +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use flate2::Compression; use std::error::Error; +use std::fs::File; use std::io::IsTerminal as _; +use std::io::Write; use console_subscriber::ConsoleLayer; use metrics_tracing_context::{MetricsLayer, TracingContextLayer}; @@ -25,6 +30,7 @@ use rand::Rng; use rand_core::OsRng; use tendermint_config::net::Address as TendermintAddress; use tokio::runtime; +use tokio_stream::StreamExt; use tower_http::cors::CorsLayer; use tracing_subscriber::{prelude::*, EnvFilter}; use url::Url; @@ -255,6 +261,7 @@ async fn main() -> anyhow::Result<()> { tn_cmd: TestnetCommand::Join { node, + archive_url, moniker, external_address, tendermint_rpc_bind, @@ -290,7 +297,7 @@ async fn main() -> anyhow::Result<()> { // Join the target testnet, looking up network info and writing // local configs for pd and tendermint. testnet_join( - output_dir, + output_dir.clone(), node, &node_name, external_address, @@ -298,6 +305,55 @@ async fn main() -> anyhow::Result<()> { tendermint_p2p_bind, ) .await?; + + // Download and extract archive URL, if set. + if let Some(archive_url) = archive_url { + tracing::info!(%archive_url, "downloading compressed node state"); + + // Download. Adapted from https://rust-lang-nursery.github.io/rust-cookbook/web/clients/download.html + // Here we inspect HEAD so we can infer filename. + let response = reqwest::get(archive_url).await?; + + let fname = response + .url() + .path_segments() + .and_then(|segments| segments.last()) + .and_then(|name| if name.is_empty() { None } else { Some(name) }) + .unwrap_or("pd-node-state-archive.tar.gz"); + + let archive_filepath = output_dir.join(fname); + let mut download_opts = std::fs::OpenOptions::new(); + download_opts + .create_new(true) + .read(true) + .write(true) + .truncate(true); + let mut archive_file = download_opts.open(&archive_filepath)?; + + // Download via stream, in case file is too large to shove into RAM. + let mut stream = response.bytes_stream(); + while let Some(chunk_result) = stream.next().await { + let chunk = chunk_result?; + archive_file.write_all(&chunk)?; + } + archive_file.flush()?; + tracing::info!("download complete: {}", archive_filepath.display()); + + // Extract + // Open downloaded file for writing + let mut unpack_opts = std::fs::OpenOptions::new(); + unpack_opts.read(true); + let f = unpack_opts.open(&archive_filepath)?; + let tar = GzDecoder::new(f); + let mut archive = tar::Archive::new(tar); + // This dir-path building is duplicated in the config gen code. + let pd_home = output_dir.join("node0").join("pd"); + archive + .unpack(&pd_home) + .context("failed to extract tar.gz archive")?; + tracing::info!("archived node state unpacked to {}", pd_home.display()); + // TODO: use "migrate" tarball, clobber genesis file from migration. + } } RootCommand::Testnet { @@ -426,10 +482,6 @@ async fn main() -> anyhow::Result<()> { ); anyhow::bail!("refusing to overwrite existing archive"); } - use flate2::write::GzEncoder; - use flate2::Compression; - use std::fs::File; - tracing::info!( "creating archive {} -> {}", dst_rocksdb_dir.display(),