-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1880 from jqnatividad/jsonp
`jsonp`: add `jsonp` command allowing non-nested JSON to CSV conversion with Polars
- Loading branch information
Showing
6 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
static USAGE: &str = r#" | ||
Convert non-nested JSON to CSV (polars feature only). | ||
You may provide JSON data either from stdin or a file path. | ||
This command may not work with nested JSON data. | ||
As a basic example, say we have a file fruits.json with contents: | ||
[ | ||
{ | ||
"fruit": "apple", | ||
"price": 2.5 | ||
}, | ||
{ | ||
"fruit": "banana", | ||
"price": 3.0 | ||
} | ||
] | ||
To convert it to CSV format, run: | ||
qsv jsonp fruits.json | ||
And the following is printed to the terminal: | ||
fruit,price | ||
apple,2.5 | ||
banana,3.0 | ||
If fruits.json was provided using stdin then either use - or do not provide a file path. For example: | ||
cat fruits.json | qsv jsonp - | ||
For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_jsonp.rs. | ||
Usage: | ||
qsv jsonp [options] [<input>] | ||
qsv jsonp --help | ||
jsonp options: | ||
--datetime-format <fmt> The datetime format to use writing datetimes. | ||
See https://docs.rs/chrono/latest/chrono/format/strftime/index.html | ||
for the list of valid format specifiers. | ||
--date-format <fmt> The date format to use writing dates. | ||
--time-format <fmt> The time format to use writing times. | ||
--float-precision <arg> The number of digits of precision to use when writing floats. | ||
--wnull-value <arg> The string to use when WRITING null values. | ||
Common options: | ||
-h, --help Display this message | ||
-o, --output <file> Write output to <file> instead of stdout. | ||
"#; | ||
|
||
use std::io::{Cursor, Read, Seek, SeekFrom, Write}; | ||
|
||
use polars::prelude::*; | ||
use serde::Deserialize; | ||
|
||
use crate::{util, CliResult}; | ||
|
||
#[derive(Deserialize)] | ||
struct Args { | ||
arg_input: Option<String>, | ||
flag_datetime_format: Option<String>, | ||
flag_date_format: Option<String>, | ||
flag_time_format: Option<String>, | ||
flag_float_precision: Option<usize>, | ||
flag_wnull_value: Option<String>, | ||
flag_output: Option<String>, | ||
} | ||
|
||
pub fn run(argv: &[&str]) -> CliResult<()> { | ||
let args: Args = util::get_args(USAGE, argv)?; | ||
|
||
fn df_from_stdin() -> PolarsResult<DataFrame> { | ||
// Create a buffer in memory for stdin | ||
let mut buffer: Vec<u8> = Vec::new(); | ||
let stdin = std::io::stdin(); | ||
stdin.lock().read_to_end(&mut buffer)?; | ||
JsonReader::new(Box::new(std::io::Cursor::new(buffer))).finish() | ||
} | ||
|
||
fn df_from_path(path: String) -> PolarsResult<DataFrame> { | ||
JsonReader::new(std::fs::File::open(path)?).finish() | ||
} | ||
|
||
let df = match args.arg_input.clone() { | ||
Some(path) => { | ||
if path == "-" { | ||
df_from_stdin()? | ||
} else { | ||
df_from_path(path)? | ||
} | ||
}, | ||
None => df_from_stdin()?, | ||
}; | ||
|
||
fn df_to_csv<W: Write>(mut writer: W, mut df: DataFrame, args: &Args) -> PolarsResult<()> { | ||
CsvWriter::new(&mut writer) | ||
.with_datetime_format(args.flag_datetime_format.clone()) | ||
.with_date_format(args.flag_date_format.clone()) | ||
.with_time_format(args.flag_time_format.clone()) | ||
.with_float_precision(args.flag_float_precision) | ||
.with_null_value(args.flag_wnull_value.clone().unwrap_or("".to_string())) | ||
.include_bom(util::get_envvar_flag("QSV_OUTPUT_BOM")) | ||
.finish(&mut df)?; | ||
Ok(()) | ||
} | ||
|
||
if let Some(output_path) = args.flag_output.clone() { | ||
let mut output = std::fs::File::create(output_path)?; | ||
df_to_csv(&mut output, df, &args)?; | ||
} else { | ||
let mut res = Cursor::new(Vec::new()); | ||
df_to_csv(&mut res, df, &args)?; | ||
res.seek(SeekFrom::Start(0))?; | ||
let mut out = String::new(); | ||
res.read_to_string(&mut out)?; | ||
println!("{out}"); | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
use crate::workdir::Workdir; | ||
|
||
#[test] | ||
fn jsonp_simple() { | ||
let wrk = Workdir::new("jsonp_simple"); | ||
wrk.create_from_string( | ||
"data.json", | ||
r#"[{"id":1,"father":"Mark","mother":"Charlotte","oldest_child":"Tom","boy":true}, | ||
{"id":2,"father":"John","mother":"Ann","oldest_child":"Jessika","boy":false}, | ||
{"id":3,"father":"Bob","mother":"Monika","oldest_child":"Jerry","boy":true}]"#, | ||
); | ||
let mut cmd = wrk.command("jsonp"); | ||
cmd.arg("data.json"); | ||
|
||
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd); | ||
let expected = vec![ | ||
svec!["id", "father", "mother", "oldest_child", "boy"], | ||
svec!["1", "Mark", "Charlotte", "Tom", "true"], | ||
svec!["2", "John", "Ann", "Jessika", "false"], | ||
svec!["3", "Bob", "Monika", "Jerry", "true"], | ||
]; | ||
assert_eq!(got, expected); | ||
} | ||
|
||
#[test] | ||
fn jsonp_fruits_stats() { | ||
let wrk = Workdir::new("jsonp_fruits_stats"); | ||
wrk.create_from_string( | ||
"data.json", | ||
r#"[{"field":"fruit","type":"String","is_ascii":true,"sum":null,"min":"apple","max":"strawberry","range":null,"min_length":5,"max_length":10,"mean":null,"stddev":null,"variance":null,"nullcount":0,"max_precision":null,"sparsity":0},{"field":"price","type":"Float","is_ascii":null,"sum":7,"min":"1.5","max":"3.0","range":1.5,"min_length":4,"max_length":4,"mean":2.3333,"stddev":0.6236,"variance":0.3889,"nullcount":0,"max_precision":1,"sparsity":0}]"#, | ||
); | ||
let mut cmd = wrk.command("jsonp"); | ||
cmd.arg("data.json"); | ||
|
||
let got: String = wrk.stdout(&mut cmd); | ||
let expected = r#"field,type,is_ascii,sum,min,max,range,min_length,max_length,mean,stddev,variance,nullcount,max_precision,sparsity | ||
fruit,String,true,,apple,strawberry,,5,10,,,,0,,0 | ||
price,Float,,7,1.5,3.0,1.5,4,4,2.3333,0.6236,0.3889,0,1,0"#.to_string(); | ||
assert_eq!(got, expected); | ||
} | ||
|
||
#[test] | ||
fn jsonp_fruits_stats_fp_2() { | ||
let wrk = Workdir::new("jsonp_fruits_stats_fp_2"); | ||
wrk.create_from_string( | ||
"data.json", | ||
r#"[{"field":"fruit","type":"String","is_ascii":true,"sum":null,"min":"apple","max":"strawberry","range":null,"min_length":5,"max_length":10,"mean":null,"stddev":null,"variance":null,"nullcount":0,"max_precision":null,"sparsity":0},{"field":"price","type":"Float","is_ascii":null,"sum":7,"min":"1.5","max":"3.0","range":1.5,"min_length":4,"max_length":4,"mean":2.3333,"stddev":0.6236,"variance":0.3889,"nullcount":0,"max_precision":1,"sparsity":0}]"#, | ||
); | ||
let mut cmd = wrk.command("jsonp"); | ||
cmd.arg("data.json"); | ||
cmd.args(&["--float-precision", "2"]); | ||
|
||
let got: String = wrk.stdout(&mut cmd); | ||
let expected = r#"field,type,is_ascii,sum,min,max,range,min_length,max_length,mean,stddev,variance,nullcount,max_precision,sparsity | ||
fruit,String,true,,apple,strawberry,,5,10,,,,0,,0 | ||
price,Float,,7,1.5,3.0,1.50,4,4,2.33,0.62,0.39,0,1,0"#.to_string(); | ||
assert_eq!(got, expected); | ||
} | ||
|
||
#[test] | ||
// Verify that qsv stats fruits.csv has the same content as | ||
// qsv stats fruits.csv | qsv slice --json | qsv jsonp | ||
fn jsonp_fruits_stats_slice_jsonp() { | ||
let wrk = Workdir::new("jsonp_fruits_stats_slice_jsonp"); | ||
let test_file = wrk.load_test_file("fruits.csv"); | ||
|
||
// qsv stats fruits.csv | ||
let mut stats_cmd = wrk.command("stats"); | ||
stats_cmd.arg(test_file); | ||
let stats_output: String = wrk.stdout(&mut stats_cmd); | ||
wrk.create_from_string("stats.csv", stats_output.as_str()); | ||
|
||
// qsv slice --json | ||
let mut slice_cmd = wrk.command("slice"); | ||
slice_cmd.arg("stats.csv"); | ||
slice_cmd.arg("--json"); | ||
let slice_output: String = wrk.stdout(&mut slice_cmd); | ||
wrk.create_from_string("slice.json", slice_output.as_str()); | ||
|
||
// qsv jsonp | ||
let mut jsonp_cmd = wrk.command("jsonp"); | ||
jsonp_cmd.arg("slice.json"); | ||
let jsonp_output: String = wrk.stdout(&mut jsonp_cmd); | ||
|
||
assert_eq!(stats_output, jsonp_output); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters