Skip to content

Commit

Permalink
Merge pull request #2268 from jqnatividad/544-fetchpost-construct-pay…
Browse files Browse the repository at this point in the history
…load-using-minijinja

`fetchpost`: add `--payload-tpl <file>` option to construct payload using MiniJinja
  • Loading branch information
jqnatividad authored Nov 3, 2024
2 parents 6d5faeb + a2813a8 commit cff4d89
Show file tree
Hide file tree
Showing 2 changed files with 243 additions and 10 deletions.
69 changes: 59 additions & 10 deletions src/cmd/fetchpost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ static USAGE: &str = r#"
Fetchpost fetches data from web services for every row using HTTP Post.
As opposed to fetch, which uses HTTP Get.
CSV data is posted using two methods:
1. Column-list using the <column-list> argument
The columns are used to construct the form data.
2. MiniJinja template using the --payload-tpl <file> option
The template file is used to construct the JSON payload.
Fetchpost is integrated with `jaq` (a jq clone) to directly parse out values from an API JSON response.
(See https://github.com/01mf02/jaq for more info on how to use the jaq JSON Query Language)
Expand Down Expand Up @@ -121,7 +127,7 @@ Usage:
qsv fetchpost (<url-column> <column-list>) [--jaq <selector> | --jaqfile <file>] [--http-header <k:v>...] [options] [<input>]
qsv fetchpost --help
Fetchpost options:
Fetchpost arguments:
<url-column> Name of the column with the URL.
Otherwise, if the argument starts with `http`, the URL to use.
<column-list> Comma-delimited list of columns to insert into the HTTP Post body.
Expand All @@ -130,6 +136,10 @@ Fetchpost options:
with more indexing). Column ranges can also be specified. Finally, columns
can be selected using regular expressions.
See 'qsv select --help' for examples.
Fetchpost options:
-t, --payload-tpl <file> Instead of <column-list>, use a MiniJinja template to construct a
JSON payload in the HTTP Post body.
-c, --new-column <name> Put the fetched values in a new column. Specifying this option
results in a CSV. Otherwise, the output is in JSONL format.
--jaq <selector> Apply jaq selector to API returned JSON response.
Expand Down Expand Up @@ -241,6 +251,7 @@ use log::{
debug, error, info, log_enabled, warn,
Level::{Debug, Trace, Warn},
};
use minijinja::Environment;
use rand::Rng;
use regex::Regex;
use reqwest::{
Expand All @@ -249,7 +260,6 @@ use reqwest::{
};
use serde::Deserialize;
use serde_json::{json, Value};
use simdutf8::basic::from_utf8;
use simple_expand_tilde::expand_tilde;
use url::Url;

Expand All @@ -265,6 +275,7 @@ use crate::{

#[derive(Deserialize)]
struct Args {
flag_payload_tpl: Option<String>,
flag_new_column: Option<String>,
flag_jaq: Option<String>,
flag_jaqfile: Option<String>,
Expand Down Expand Up @@ -439,13 +450,21 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
};

// validate column-list is a list of valid column names
let cl_config = Config::new(args.arg_input.as_ref())
.delimiter(args.flag_delimiter)
.trim(csv::Trim::All)
.no_headers(args.flag_no_headers)
.select(args.arg_column_list.clone());
let cl_config = if args.flag_payload_tpl.is_none() {
Config::new(args.arg_input.as_ref())
.delimiter(args.flag_delimiter)
.trim(csv::Trim::All)
.no_headers(args.flag_no_headers)
.select(args.arg_column_list.clone())
} else {
Config::new(args.arg_input.as_ref())
.delimiter(args.flag_delimiter)
.trim(csv::Trim::All)
.no_headers(args.flag_no_headers)
// we're constructing a payload, ensure all the columns are selected
.select(SelectColumns::parse("1-")?)
};
let col_list = cl_config.selection(&headers)?;
debug!("column-list: {col_list:?}");

// check if the url_column arg was passed as a URL literal
// or as a column selector
Expand Down Expand Up @@ -635,6 +654,18 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
report_wtr.write_byte_record(&report_headers)?;
}

let mut template_content = String::new();
let mut build_payload = false;
let payload_env_option = if let Some(template_file) = args.flag_payload_tpl {
template_content = fs::read_to_string(template_file)?;
let mut env = Environment::new();
env.add_template("template", &template_content)?;
build_payload = true;
Some(env)
} else {
None
};

// amortize memory allocations
// why optimize for mem & speed, when we're just doing single-threaded, throttled URL fetches?
// we still optimize since fetch is backed by a memoized cache (in memory or Redis, when --redis
Expand Down Expand Up @@ -681,6 +712,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
.collect();

let debug_flag = log_enabled!(Debug);
let mut rendered_json: Value;
let payload_env = if build_payload {
payload_env_option.unwrap()
} else {
Environment::empty()
};

while rdr.read_byte_record(&mut record)? {
if show_progress {
Expand All @@ -697,10 +734,22 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
form_body_jsonmap.insert(
(header_key_vec[*col_idx]).to_string(),
serde_json::Value::String(
from_utf8(&record[*col_idx]).unwrap_or_default().to_owned(),
simdutf8::basic::from_utf8(record.get(*col_idx).unwrap_or_default())
.unwrap_or_default()
.to_owned(),
),
);
}

if build_payload {
rendered_json = serde_json::from_str(
&payload_env
.get_template("template")?
.render(&form_body_jsonmap)?,
)?;
form_body_jsonmap.clone_from(rendered_json.as_object().ok_or("Expected JSON object")?);
}

if debug_flag {
// deserializing the form_body_jsonmap to a string is expensive
// so we only do it when debug is enabled
Expand All @@ -709,7 +758,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

if literal_url_used {
url.clone_from(&literal_url);
} else if let Ok(s) = from_utf8(&record[column_index]) {
} else if let Ok(s) = simdutf8::basic::from_utf8(&record[column_index]) {
s.clone_into(&mut url);
} else {
url = String::new();
Expand Down
184 changes: 184 additions & 0 deletions tests/test_fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1513,3 +1513,187 @@ fn fetchpost_simple_report() {

assert_eq!(got, expected);
}

#[test]
fn fetchpost_payload_template() {
let wrk = Workdir::new("fetchpost_tpl");
wrk.create(
"data.csv",
vec![
svec!["first_name", "last_name", "age", "city"],
svec!["John", "Smith", "35", "New York"],
svec!["Jane", "Doe", "28", "Los Angeles"],
svec!["Bob", "Jones", "42", "Chicago"],
],
);

// Create template file
wrk.create_from_string(
"payload.tpl",
r#"{
"firstName": "{{ first_name }}",
"lastName": "{{ last_name }}",
"age": {{ age }},
"city": "{{ city }}"
}"#,
);

let mut cmd = wrk.command("fetchpost");
cmd.arg("https://httpbin.org/post")
.arg("1-")
.arg("--payload-tpl")
.arg("payload.tpl")
.arg("--new-column")
.arg("response")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

let expected = vec![
svec!["first_name", "last_name", "age", "city", "response"],
svec![
"John",
"Smith",
"35",
"New York",
r#"{"args":{},"data":"","files":{},"form":{"age":"35","city":"New York","firstName":"John","lastName":"Smith"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"50","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-6b2f608527e3b127729e8409"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
svec![
"Jane",
"Doe",
"28",
"Los Angeles",
r#"{"args":{},"data":"","files":{},"form":{"age":"28","city":"Los Angeles","firstName":"Jane","lastName":"Doe"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"51","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-47c9d7ed1247562762fdd379"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
svec![
"Bob",
"Jones",
"42",
"Chicago",
r#"{"args":{},"data":"","files":{},"form":{"age":"42","city":"Chicago","firstName":"Bob","lastName":"Jones"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"48","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-784c0cdf4d78bf1257f9a4d4"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
];

for (got_row, expected_row) in got.iter().skip(1).zip(expected.iter().skip(1)) {
// Assert first 4 columns match
assert_eq!(&got_row[..4], &expected_row[..4]);
// Assert the first 50 characters of response column match
assert_eq!(
&got_row[4][..50],
&expected_row[4][..50],
"Response column first 50 chars mismatch"
);
}
}

#[test]
fn fetchpost_payload_template_with_report() {
let wrk = Workdir::new("fetchpost_tpl_report");
wrk.create(
"data.csv",
vec![
svec!["first_name", "last_name", "age", "city"],
svec!["John", "Smith", "35", "New York"],
svec!["Jane", "Doe", "28", "Los Angeles"],
svec!["Bob", "Jones", "42", "Chicago"],
],
);

// Create template file
wrk.create_from_string(
"payload.tpl",
r#"{
"firstName": "{{ first_name }}",
"lastName": "{{ last_name }}",
"age": {{ age }},
"city": "{{ city }}"
}"#,
);

let mut cmd = wrk.command("fetchpost");
cmd.arg("https://httpbin.org/post")
.arg("1-")
.arg("--payload-tpl")
.arg("payload.tpl")
.arg("--new-column")
.arg("response")
.arg("--report")
.arg("short")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

let expected = vec![
svec!["first_name", "last_name", "age", "city", "response"],
svec![
"John",
"Smith",
"35",
"New York",
r#"{"args":{},"data":"","files":{},"form":{"age":"35","city":"New York","firstName":"John","lastName":"Smith"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"50","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-6b2f608527e3b127729e8409"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
svec![
"Jane",
"Doe",
"28",
"Los Angeles",
r#"{"args":{},"data":"","files":{},"form":{"age":"28","city":"Los Angeles","firstName":"Jane","lastName":"Doe"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"51","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-47c9d7ed1247562762fdd379"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
svec![
"Bob",
"Jones",
"42",
"Chicago",
r#"{"args":{},"data":"","files":{},"form":{"age":"42","city":"Chicago","firstName":"Bob","lastName":"Jones"},"headers":{"Accept":"*/*","Accept-Encoding":"zstd;q=1.0, br;q=0.8, gzip;q=0.6, deflate;q=0.4, *;q=0.2","Content-Length":"48","Content-Type":"application/x-www-form-urlencoded","Host":"httpbin.org","User-Agent":"qsv/0.137.0 (aarch64-apple-darwin; fetchpost; compiled; https://github.com/jqnatividad/qsv)","X-Amzn-Trace-Id":"Root=1-6727a9d5-784c0cdf4d78bf1257f9a4d4"},"json":null,"origin":"149.88.100.35","url":"https://httpbin.org/post"}"#
],
];

for (got_row, expected_row) in got.iter().skip(1).zip(expected.iter().skip(1)) {
// Assert first 4 columns match
assert_eq!(&got_row[..4], &expected_row[..4]);
// Assert the first 50 characters of response column match
assert_eq!(
&got_row[4][..50],
&expected_row[4][..50],
"Response column first 50 chars mismatch"
);
}

let mut cmd = wrk.command("select");
cmd.arg("url,form,status,cache_hit,retries,response")
.arg(wrk.load_test_file("data.csv.fetchpost-report.tsv"));

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

let expected = vec![
svec!["url", "form", "status", "cache_hit", "retries", "response"],
svec![
"https://httpbin.org/post",
"{\"bool_col\": String(\"true\"), \"col1\": String(\"a\"), \"number_col\": \
String(\"42\")}",
"200",
"0",
"0",
r#"{"bool_col": String("true"), "col1": String("a"), "number_col": String("42")}"#
],
svec![
"https://httpbin.org/post",
"{\"bool_col\": String(\"false\"), \"col1\": String(\"b\"), \"number_col\": \
String(\"3.14\")}",
"200",
"0",
"0",
r#"{"bool_col": String("false"), "col1": String("b"), "number_col": String("3.14")}"#
],
svec![
"https://httpbin.org/post",
"{\"bool_col\": String(\"true\"), \"col1\": String(\"c\"), \"number_col\": \
String(\"666\")}",
"200",
"0",
"0",
r#"{"bool_col": String("true"), "col1": String("c"), "number_col": String("666")}"#
],
];
assert_eq!(got, expected);
}

0 comments on commit cff4d89

Please sign in to comment.