diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e77be449..08d2b6048 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3817,7 +3817,7 @@ the response, the HTTP status code, and if its a cache hit. request the URL again. https://github.com/jqnatividad/qsv/pull/393 ### Changed -* `fetch`: fast defaults. Now tries to go as fast as possible, leveraging dynamic throttling (using RateLimit and Rety-After headers) +* `fetch`: fast defaults. Now tries to go as fast as possible, leveraging dynamic throttling (using RateLimit and Retry-After headers) but aborting after 100 errors. Also added a separate error progress bar. https://github.com/jqnatividad/qsv/pull/388 * Smarter `tojsonl`. Now scans CSV file and infers data types and uses the appropriate JSON data type https://github.com/jqnatividad/qsv/pull/389 * `tojsonl` is also multithreaded https://github.com/jqnatividad/qsv/pull/392 diff --git a/Cargo.lock b/Cargo.lock index 07c954d96..f410b6d1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3855,7 +3855,10 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9ca8daf4b0b4029777f1bc6e1aedd1aec7b74c276a43bc6f620a8e1a1c0a90e" dependencies = [ + "percent-encoding", "serde", + "serde_json", + "v_htmlescape", ] [[package]] @@ -5418,6 +5421,7 @@ dependencies = [ "localzone", "log", "mimalloc", + "minijinja", "mlua", "newline-converter", "num_cpus", @@ -7408,6 +7412,12 @@ dependencies = [ "vsimd", ] +[[package]] +name = "v_htmlescape" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" + [[package]] name = "valuable" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index e98ce0fc9..1cc0dbd57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,6 +140,12 @@ local-encoding = { version = "0.2", optional = true } localzone = { version = "0.3", features = ["auto_validation"] } log = "0.4" mimalloc = { version = "0.1", default-features = false, optional = true } +minijinja = { version = "2", features = [ + "json", + "loop_controls", + "speedups", + "urlencode", +] } mlua = { version = "0.10", features = [ "luau", "luau-jit", diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index 810d7dd95..87af5ff8d 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -93,6 +93,8 @@ pub mod sqlp; pub mod stats; #[cfg(any(feature = "feature_capable", feature = "lite"))] pub mod table; +#[cfg(feature = "feature_capable")] +pub mod template; #[cfg(all(feature = "to", feature = "feature_capable"))] pub mod to; #[cfg(any(feature = "feature_capable", feature = "lite"))] diff --git a/src/cmd/template.rs b/src/cmd/template.rs new file mode 100644 index 000000000..bf2ab879a --- /dev/null +++ b/src/cmd/template.rs @@ -0,0 +1,175 @@ +static USAGE: &str = r#" +Renders a template using CSV data with the minijinja template engine. +https://docs.rs/minijinja/latest/minijinja/ + +Each CSV row is used to populate the template, with column headers used as variable names. +The template syntax follows the Jinja2 template language. + +Example template: + Dear {{ name }}, + Your account balance is {{ balance | format_float(precision=2) }}. + Status: {{ if active }}Active{{ else }}Inactive{{ endif }} + +Usage: + qsv template [options] [--template | --template-file ] [] [ | --output ] + qsv template --help + +template arguments: + The CSV file to read. If not given, input is read from STDIN. + The directory where the output files will be written. + If it does not exist, it will be created. +template options: + --template Template string to use (alternative to --template-file) + --template-file Template file to use + --outfilename Template string to use to create the filestem of the output + files to write to . If set to ROWNO, the filestem + is set to the current rowno of the record, padded with leading + zeroes, with the ".txt" extension (e.g. 001.txt, 002.txt, etc.) + [default: ROWNO] + -n, --no-headers When set, the first row will not be interpreted + as headers. Templates must use numeric 1-based indices + with the "_c" prefix.(e.g. col1: {{_c1}} col2: {{_c2}}) + +Common options: + -o, --output Write output to instead of stdout + --delimiter Field separator for reading CSV [default: ,] + -h, --help Display this message +"#; + +use std::{ + fs, + io::{BufWriter, Write}, +}; + +use minijinja::Environment; +use serde::Deserialize; +use serde_json::Value; + +use crate::{ + config::{Config, Delimiter}, + util, CliError, CliResult, +}; + +#[derive(Deserialize)] +struct Args { + arg_input: Option, + arg_outdir: Option, + flag_template: Option, + + flag_template_file: Option, + flag_output: Option, + flag_outfilename: String, + flag_delimiter: Option, + flag_no_headers: bool, +} + +impl From for CliError { + fn from(err: minijinja::Error) -> CliError { + CliError::Other(err.to_string()) + } +} + +pub fn run(argv: &[&str]) -> CliResult<()> { + let args: Args = util::get_args(USAGE, argv)?; + + // Get template content + let template_content = match (args.flag_template_file, args.flag_template) { + (Some(path), None) => fs::read_to_string(path)?, + (None, Some(template)) => template, + _ => return fail_clierror!("Must provide either --template or --template-string"), + }; + + // Set up minijinja environment + let mut env = Environment::new(); + env.add_template("template", &template_content)?; + let template = env.get_template("template")?; + + // Set up CSV reader + let rconfig = Config::new(args.arg_input.as_ref()) + .delimiter(args.flag_delimiter) + .no_headers(args.flag_no_headers); + + let mut rdr = rconfig.reader()?; + let headers = if args.flag_no_headers { + csv::StringRecord::new() + } else { + rdr.headers()?.clone() + }; + + // Set up output handling + let output_to_dir = args.arg_outdir.is_some(); + let mut row_number = 0_u64; + let mut rowcount = 0; + + // Create filename environment once if needed + let filename_env = if output_to_dir && args.flag_outfilename != "ROWNO" { + let mut env = Environment::new(); + env.add_template("filename", &args.flag_outfilename)?; + Some(env) + } else { + rowcount = util::count_rows(&rconfig)?; + None + }; + + let width = rowcount.to_string().len(); + + if output_to_dir { + fs::create_dir_all(args.arg_outdir.as_ref().unwrap())?; + } + + let mut wtr = if output_to_dir { + None + } else { + Some(match args.flag_output { + Some(file) => Box::new(BufWriter::new(fs::File::create(file)?)) as Box, + None => Box::new(BufWriter::new(std::io::stdout())) as Box, + }) + }; + + let mut curr_record = csv::StringRecord::new(); + + // Process each record + for record in rdr.records() { + row_number += 1; + curr_record.clone_from(&record?); + let mut context = serde_json::Map::with_capacity(curr_record.len()); + + if args.flag_no_headers { + // Use numeric indices + for (i, field) in curr_record.iter().enumerate() { + context.insert(format!("_c{}", i + 1), Value::String(field.to_string())); + } + } else { + // Use header names + for (header, field) in headers.iter().zip(curr_record.iter()) { + context.insert(header.to_string(), Value::String(field.to_string())); + } + } + + // Render template with record data + let rendered = template.render(&context)?; + + if output_to_dir { + let outfilename = if args.flag_outfilename == "ROWNO" { + format!("{row_number:0width$}.txt") + } else { + filename_env + .as_ref() + .unwrap() + .get_template("filename")? + .render(&context)? + }; + let outpath = std::path::Path::new(args.arg_outdir.as_ref().unwrap()).join(outfilename); + let mut writer = BufWriter::new(fs::File::create(outpath)?); + write!(writer, "{rendered}")?; + } else if let Some(ref mut w) = wtr { + write!(w, "{rendered}")?; + } + } + + if let Some(mut w) = wtr { + w.flush()?; + } + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index b6907fa4e..9688f722d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -192,6 +192,7 @@ fn main() -> QsvExitCode { enabled_commands.push_str( " stats Infer data types and compute summary statistics table Align CSV data into columns + template Render templates using CSV data tojsonl Convert CSV to newline-delimited JSON\n", ); @@ -393,6 +394,7 @@ enum Command { SqlP, Stats, Table, + Template, Transpose, #[cfg(all(feature = "to", feature = "feature_capable"))] To, @@ -489,6 +491,7 @@ impl Command { Command::SqlP => cmd::sqlp::run(argv), Command::Stats => cmd::stats::run(argv), Command::Table => cmd::table::run(argv), + Command::Template => cmd::template::run(argv), Command::Transpose => cmd::transpose::run(argv), #[cfg(all(feature = "to", feature = "feature_capable"))] Command::To => cmd::to::run(argv), diff --git a/tests/test_template.rs b/tests/test_template.rs new file mode 100644 index 000000000..8ecb70e5b --- /dev/null +++ b/tests/test_template.rs @@ -0,0 +1,333 @@ +use crate::workdir::Workdir; + +fn data(headers: bool) -> String { + if headers { + String::from("name,age,city\nJohn,30,New York\nJane,25,Boston\n") + } else { + String::from("John,30,New York\nJane,25,Boston\n") + } +} + +#[test] +fn template_basic() { + let wrk = Workdir::new("template_basic"); + wrk.create_from_string("data.csv", &data(true)); + wrk.create_from_string("template.txt", "Hello {{name}} from {{city}}!\n\n"); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "Hello John from New York!\nHello Jane from Boston!"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_no_headers() { + let wrk = Workdir::new("template_no_headers"); + wrk.create_from_string("data.csv", &data(true)); + wrk.create_from_string("template.txt", "Name: {{_c1}}, Age: {{_c2}}\n\n"); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv") + .arg("--no-headers"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "Name: name, Age: age\nName: John, Age: 30\nName: Jane, Age: 25"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_string() { + let wrk = Workdir::new("template_string"); + wrk.create_from_string("data.csv", &data(true)); + + let mut cmd = wrk.command("template"); + cmd.arg("--template") + .arg("{{name}} is {{age}} years old\n\n") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "John is 30 years old\nJane is 25 years old"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_custom_delimiter() { + let wrk = Workdir::new("template_custom_delimiter"); + wrk.create_from_string( + "data.csv", + "name;age;city\nJohn;30;New York\nJane;25;Boston\n", + ); + wrk.create_from_string("template.txt", "Name: {{ name }}, Age: {{age}}\n\n"); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv") + .args(["--delimiter", ";"]); + + let got: String = wrk.stdout(&mut cmd); + let expected = "Name: John, Age: 30\nName: Jane, Age: 25"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_with_filters() { + let wrk = Workdir::new("template_filters"); + wrk.create_from_string("data.csv", "name,amount\nJohn,1234.5678\nJane,9876.54321\n"); + wrk.create_from_string( + "template.txt", + "{{ name }}: ${{ amount | float | round(2) }}\n\n", + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "John: $1234.57\nJane: $9876.54"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_with_conditionals() { + let wrk = Workdir::new("template_conditionals"); + wrk.create_from_string("data.csv", "name,age\nJohn,17\nJane,21\n"); + wrk.create_from_string( + "template.txt", + "{{ name }} is {% if age | int >= 18 %}an adult{% else %}a minor{% endif %}\n\n", + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "John is a minor\nJane is an adult"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_missing_field() { + let wrk = Workdir::new("template_missing_field"); + wrk.create_from_string("data.csv", "name,age\nJohn,30\nJane,25\n"); + wrk.create_from_string( + "template.txt", + "{{ name }} ({{ missing_field | default('N/A') }})\n\n", + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "John (N/A)\nJane (N/A)"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_empty_input() { + let wrk = Workdir::new("template_empty"); + wrk.create_from_string("data.csv", "name,age\n"); + wrk.create_from_string("template.txt", "Hello {{name}}!\n"); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = ""; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_with_loops() { + let wrk = Workdir::new("template_loops"); + wrk.create_from_string( + "data.csv", + "name,hobbies\nJohn,\"reading,gaming,cooking\"\nJane,\"hiking,painting\"\n", + ); + wrk.create_from_string( + "template.txt", + "{{ name }}'s hobbies: {% for hobby in hobbies | split(',') %}{{ hobby | trim }}{% if not \ + loop.last %}, {% endif %}{% endfor %}\n\n", + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "John's hobbies: reading, gaming, cooking, \nJane's hobbies: hiking, painting, "; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_error_invalid_syntax() { + let wrk = Workdir::new("template_invalid_syntax"); + wrk.create_from_string("data.csv", "name,age\nJohn,30\n"); + wrk.create_from_string("template.txt", "{{ name } }}\n"); // Invalid syntax + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + wrk.assert_err(&mut cmd); +} + +#[test] +fn template_error_missing_template() { + let wrk = Workdir::new("template_missing_template"); + wrk.create_from_string("data.csv", "name,age\nJohn,30\n"); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("nonexistent.txt") + .arg("data.csv"); + + wrk.assert_err(&mut cmd); +} + +#[test] +fn template_with_whitespace_control() { + let wrk = Workdir::new("template_whitespace"); + wrk.create_from_string("data.csv", "name,items\nJohn,\"a,b,c\"\n"); + wrk.create_from_string( + "template.txt", + "Items:{%- for item in items | split(',') %}\n - {{ item }}{%- if not loop.last %}{%- \ + endif %}{%- endfor %}\n\n", + ); + + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv"); + + let got: String = wrk.stdout(&mut cmd); + let expected = "Items:\n - a\n - b\n - c"; + + wrk.assert_success(&mut cmd); + assert_eq!(got, expected); +} + +#[test] +fn template_output_file() { + let wrk = Workdir::new("template_output"); + wrk.create_from_string("data.csv", &data(true)); + wrk.create_from_string("template.txt", "{{name}},{{city}}\n\n"); + + let output_file = "output.txt"; + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("--output") + .arg(output_file) + .arg("data.csv"); + + wrk.assert_success(&mut cmd); + + let got = wrk.read_to_string(output_file); + let expected = "John,New York\nJane,Boston\n"; + assert_eq!(got, expected); +} + +#[test] +fn template_output_directory() { + let wrk = Workdir::new("template_output_dir"); + wrk.create_from_string("data.csv", &data(true)); + wrk.create_from_string("template.txt", "Hello {{name}} from {{city}}!\n"); + + let outdir = "output_dir"; + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("data.csv") + .arg(outdir); + + wrk.assert_success(&mut cmd); + + // Check that files were created with default ROWNO naming + let file1 = wrk.read_to_string(&format!("{outdir}/1.txt")); + let file2 = wrk.read_to_string(&format!("{outdir}/2.txt")); + + assert_eq!(file1, "Hello John from New York!"); + assert_eq!(file2, "Hello Jane from Boston!"); +} + +#[test] +fn template_output_custom_filename() { + let wrk = Workdir::new("template_custom_filename"); + wrk.create_from_string("data.csv", &data(true)); + wrk.create_from_string("template.txt", "Greetings from {{city}}!\n"); + + let outdir = "custom_output"; + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("--outfilename") + .arg("{{name}}_greeting.txt") + .arg("data.csv") + .arg(outdir); + + wrk.assert_success(&mut cmd); + + // Check that files were created with custom naming + let file1 = wrk.read_to_string(&format!("{outdir}/John_greeting.txt")); + let file2 = wrk.read_to_string(&format!("{outdir}/Jane_greeting.txt")); + + assert_eq!(file1, "Greetings from New York!"); + assert_eq!(file2, "Greetings from Boston!"); +} + +#[test] +fn template_output_directory_no_headers() { + let wrk = Workdir::new("template_output_dir_no_headers"); + wrk.create_from_string("data.csv", &data(false)); + wrk.create_from_string("template.txt", "Record: {{_c1}} - {{_c3}}\n"); + + let outdir = "no_headers_output"; + let mut cmd = wrk.command("template"); + cmd.arg("--template-file") + .arg("template.txt") + .arg("--no-headers") + .arg("data.csv") + .arg(outdir); + + wrk.assert_success(&mut cmd); + + // Check files with row numbers + let file1 = wrk.read_to_string(&format!("{outdir}/1.txt")); + let file2 = wrk.read_to_string(&format!("{outdir}/2.txt")); + + assert_eq!(file1, "Record: John - New York"); + assert_eq!(file2, "Record: Jane - Boston"); +} diff --git a/tests/tests.rs b/tests/tests.rs index a446e9f88..68bf74bbf 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -119,6 +119,8 @@ mod test_sqlp; mod test_stats; #[cfg(any(feature = "feature_capable", feature = "lite"))] mod test_table; +#[cfg(feature = "feature_capable")] +mod test_template; #[cfg(all(feature = "to", feature = "feature_capable"))] mod test_to; #[cfg(any(feature = "feature_capable", feature = "lite"))]