From 3f8d0605c50de2276c296ada954daa7fc82221d5 Mon Sep 17 00:00:00 2001 From: guipenedo Date: Tue, 26 Nov 2024 18:47:25 +0100 Subject: [PATCH] GIVE ME MY PROGRESS BARS GOD DAMN IT --- .../pipeline/dedup/fast_mh3/src/main.rs | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/datatrove/pipeline/dedup/fast_mh3/src/main.rs b/src/datatrove/pipeline/dedup/fast_mh3/src/main.rs index 206f7d9f..7dd77a28 100644 --- a/src/datatrove/pipeline/dedup/fast_mh3/src/main.rs +++ b/src/datatrove/pipeline/dedup/fast_mh3/src/main.rs @@ -1,3 +1,5 @@ +use std::fs::File; +use std::io::Write; use std::io::Cursor; use std::collections::HashMap; use anyhow::{Context, Result}; @@ -47,6 +49,10 @@ struct Args { /// Total number of concurrent downloads #[arg(long, default_value = "0")] downloads: usize, + + /// Path to the log file for saving progress bar output + #[arg(long)] + logspath: String, } #[derive(Debug, Clone)] @@ -367,7 +373,14 @@ async fn process_post_union( .unwrap() .progress_chars("#>-")); pb.enable_steady_tick(std::time::Duration::from_secs(1)); - pb.set_draw_target(indicatif::ProgressDrawTarget::stderr()); + + // Create a custom draw target that writes to both stderr and the log file + pb.set_draw_target(indicatif::ProgressDrawTarget::custom(Box::new( + |s| { + std::io::stderr().write_all(s.as_bytes()).unwrap(); + log_file.write_all(s.as_bytes()).unwrap(); + }, + ))); let mut handles = Vec::new(); for file_number in files { @@ -421,6 +434,9 @@ async fn main() -> Result<()> { Semaphore::new(args.downloads) }); + // Open the log file for writing + let mut log_file = File::create(&args.logspath)?; + println!("Processing {} input files...", files.len()); let pb = ProgressBar::new(files.len() as u64); pb.set_style(ProgressStyle::default_bar() @@ -428,7 +444,14 @@ async fn main() -> Result<()> { .unwrap() .progress_chars("#>-")); pb.enable_steady_tick(std::time::Duration::from_secs(1)); - pb.set_draw_target(indicatif::ProgressDrawTarget::stderr()); + + // Create a custom draw target that writes to both stderr and the log file + pb.set_draw_target(indicatif::ProgressDrawTarget::custom(Box::new( + |s| { + std::io::stderr().write_all(s.as_bytes()).unwrap(); + log_file.write_all(s.as_bytes()).unwrap(); + }, + ))); let mut handles = Vec::new();