From 29d610f0e3e4b3d0928bee1b614f48f7330052ae Mon Sep 17 00:00:00 2001 From: rzmk <30333942+rzmk@users.noreply.github.com> Date: Wed, 26 Jun 2024 19:59:52 -0400 Subject: [PATCH] `json`: change `jsonp` to `json` using new implementation --- Cargo.lock | 24 +++++ Cargo.toml | 1 + README.md | 2 +- contrib/bashly/completions.bash | 50 +++++----- contrib/bashly/src/bashly.yml | 12 +-- contrib/fish/qsv.fish | 2 +- docs/FEATURES.md | 2 +- scripts/benchmarks.sh | 2 +- src/cmd/json.rs | 133 ++++++++++++++++++++++++++ src/cmd/jsonp.rs | 127 ------------------------ src/cmd/mod.rs | 3 +- src/main.rs | 9 +- tests/{test_jsonp.rs => test_json.rs} | 78 ++++++--------- tests/tests.rs | 4 +- 14 files changed, 224 insertions(+), 225 deletions(-) create mode 100644 src/cmd/json.rs delete mode 100644 src/cmd/jsonp.rs rename tests/{test_jsonp.rs => test_json.rs} (63%) diff --git a/Cargo.lock b/Cargo.lock index c98e4b470..f9e248793 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2126,6 +2126,16 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flatten-json-object" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9539d6d8c87acbf7c3189fb4d1c8ce926de16369212e97ba1629b62febb3d512" +dependencies = [ + "serde_json", + "thiserror", +] + [[package]] name = "flexi_logger" version = "0.28.5" @@ -2917,6 +2927,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-objects-to-csv" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af3b789ec1548099bb74f65733e795b2d120f44fa0b34be8bae9fdb34d579b09" +dependencies = [ + "csv", + "flatten-json-object", + "serde_json", + "tempfile", + "thiserror", +] + [[package]] name = "jsonpath_lib_polars_vendor" version = "0.0.1" @@ -4767,6 +4790,7 @@ dependencies = [ "itoa", "jemallocator", "jql-runner", + "json-objects-to-csv", "jsonschema", "local-encoding", "localzone", diff --git a/Cargo.toml b/Cargo.toml index 92563c737..4f85aa693 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -226,6 +226,7 @@ url = "2.5" vader_sentiment = { version = "0.1", optional = true } whatlang = { version = "0.16", optional = true } xxhash-rust = { version = "0.8", features = ["xxh3"] } +json-objects-to-csv = "0.1.3" [target.'cfg(not(target_arch = "aarch64"))'.dependencies] simdutf8 = "0.1" diff --git a/README.md b/README.md index 82f005f41..727069e17 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ | [join](/src/cmd/join.rs#L2)
👆 | Inner, outer, right, cross, anti & semi joins. Automatically creates a simple, in-memory hash index to make it fast. | | [joinp](/src/cmd/joinp.rs#L2)
✨🚀🐻‍❄️ | Inner, outer, cross, anti, semi & asof joins using the [Pola.rs](https://www.pola.rs) engine. Unlike the `join` command, `joinp` can process files larger than RAM, is multithreaded, has join key validation, pre-join filtering, supports [asof joins](https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.join_asof.html) (which is [particularly useful for time series data](https://github.com/jqnatividad/qsv/blob/30cc920d0812a854fcbfedc5db81788a0600c92b/tests/test_joinp.rs#L509-L983)) & its output doesn't have duplicate columns. However, `joinp` doesn't have an --ignore-case option & it doesn't support right outer joins. | | [jsonl](/src/cmd/jsonl.rs#L2)
🚀🔣 | Convert newline-delimited JSON ([JSONL](https://jsonlines.org/)/[NDJSON](http://ndjson.org/)) to CSV. See `tojsonl` command to convert CSV to JSONL. -| [jsonp](/src/cmd/jsonp.rs#L2)
✨🐻‍❄️ | Convert non-nested JSON to CSV. Only available with the polars feature enabled. +| [json](/src/cmd/json.rs#L2)
| Convert non-nested JSON to CSV. |
[luau](/src/cmd/luau.rs#L2) 👑
✨📇🌐🔣 ![CKAN](docs/images/ckan.png) | Create multiple new computed columns, filter rows, compute aggregations and build complex data pipelines by executing a [Luau](https://luau-lang.org) [0.630](https://github.com/Roblox/luau/releases/tag/0.630) expression/script for every row of a CSV file ([sequential mode](https://github.com/jqnatividad/qsv/blob/bb72c4ef369d192d85d8b7cc6e972c1b7df77635/tests/test_luau.rs#L254-L298)), or using [random access](https://www.webopedia.com/definitions/random-access/) with an index ([random access mode](https://github.com/jqnatividad/qsv/blob/bb72c4ef369d192d85d8b7cc6e972c1b7df77635/tests/test_luau.rs#L367-L415)).
Can process a single Luau expression or [full-fledged data-wrangling scripts using lookup tables](https://github.com/dathere/qsv-lookup-tables#example) with discrete BEGIN, MAIN and END sections.
It is not just another qsv command, it is qsv's [Domain-specific Language](https://en.wikipedia.org/wiki/Domain-specific_language) (DSL) with [numerous qsv-specific helper functions](https://github.com/jqnatividad/qsv/blob/113eee17b97882dc368b2e65fec52b86df09f78b/src/cmd/luau.rs#L1356-L2290) to build production data pipelines. | | [partition](/src/cmd/partition.rs#L2)
👆 | Partition a CSV based on a column value. | | [prompt](/src/cmd/prompt.rs#L2) | Open a file dialog to pick a file. | diff --git a/contrib/bashly/completions.bash b/contrib/bashly/completions.bash index 75948e114..2d8c17385 100644 --- a/contrib/bashly/completions.bash +++ b/contrib/bashly/completions.bash @@ -108,10 +108,6 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--help --k_weight -h")" -- "$cur" ) ;; - *'geocode'*'suggest'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--admin1 --help --min-score -h")" -- "$cur" ) - ;; - *'snappy'*'compress'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--help -h")" -- "$cur" ) ;; @@ -120,6 +116,10 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--help -h")" -- "$cur" ) ;; + *'geocode'*'suggest'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--admin1 --help --min-score -h")" -- "$cur" ) + ;; + *'fetch'*'--jqlfile') while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -- "$cur" ) ;; @@ -340,8 +340,8 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--delimiter --end --help --index --json --len --no-headers --output --start -h")" -- "$cur" ) ;; - *'jsonl'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--batch --delimiter --help --ignore-errors --jobs --output -h")" -- "$cur" ) + *'table'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--align --condense --delimiter --help --memcheck --output --pad --width -h")" -- "$cur" ) ;; *'count'*) @@ -364,22 +364,18 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--delimiter --harvest-mode --help --json --just-mime --no-infer --prefer-dmy --pretty-json --progressbar --quick --quote --sample --save-urlsample --stats-types --timeout --user-agent -h")" -- "$cur" ) ;; - *'table'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--align --condense --delimiter --help --memcheck --output --pad --width -h")" -- "$cur" ) + *'stats'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--cache-threshold --cardinality --dates-whitelist --delimiter --everything --force --help --infer-boolean --infer-dates --jobs --mad --median --memcheck --mode --no-headers --nulls --output --prefer-dmy --quartiles --round --select --stats-binout --typesonly -h")" -- "$cur" ) ;; - *'jsonp'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--date-format --datetime-format --float-precision --help --output --time-format --wnull-value -h")" -- "$cur" ) + *'jsonl'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--batch --delimiter --help --ignore-errors --jobs --output -h")" -- "$cur" ) ;; *'split'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--chunks --delimiter --filename --help --jobs --kb-size --no-headers --pad --quiet --size -h")" -- "$cur" ) ;; - *'stats'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--cache-threshold --cardinality --dates-whitelist --delimiter --everything --force --help --infer-boolean --infer-dates --jobs --mad --median --memcheck --mode --no-headers --nulls --output --prefer-dmy --quartiles --round --select --stats-binout --typesonly -h")" -- "$cur" ) - ;; - *'sqlp'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--compress-level --compression --date-format --datetime-format --decimal-comma --delimiter --float-precision --format --help --ignore-errors --infer-len --low-memory --memcheck --no-headers --no-optimizations --output --quiet --rnull-values --statistics --time-format --truncate-ragged-lines --try-parsedates --wnull-value -h")" -- "$cur" ) ;; @@ -388,24 +384,28 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--delimiter --faster --help --ignore-case --jobs --memcheck --no-headers --numeric --output --random --reverse --rng --seed --select --unique -h")" -- "$cur" ) ;; - *'fill'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--backfill --default --delimiter --first --groupby --help --no-headers --output -h")" -- "$cur" ) - ;; - *'diff'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--delimiter right --delimiter-left --delimiter-output --help --jobs --key --no-headers-left --no-headers-output --no-headers-right --output --sort-columns -h")" -- "$cur" ) ;; + *'enum'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--constant --copy --delimiter --hash --help --increment --new-column --no-headers --output --start --uuid4 --uuid7 -h")" -- "$cur" ) + ;; + *'luau'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--begin --cache-dir --ckan-api --ckan-token --colindex --delimiter --end --help --luau-path --max-errors --no-globals --no-headers --output --progressbar --remap --timeout -h filter map")" -- "$cur" ) ;; + *'json'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--help --output -h")" -- "$cur" ) + ;; + *'join'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--cross --delimiter --full --help --ignore-case --left --left-anti --left-semi --no-headers --nulls --output --right -h")" -- "$cur" ) ;; - *'enum'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--constant --copy --delimiter --hash --help --increment --new-column --no-headers --output --start --uuid4 --uuid7 -h")" -- "$cur" ) + *'fill'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--backfill --default --delimiter --first --groupby --help --no-headers --output -h")" -- "$cur" ) ;; *'cat'*) @@ -416,16 +416,16 @@ _qsv_completions() { while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -A file -W "$(_qsv_completions_filter "--ascii --crlf --delimiter --escape --help --no-final-newline --out-delimiter --output --quote --quote-always --quote-never -h")" -- "$cur" ) ;; - *'to'*) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--delimiter --drop --dump --evolve --help --jobs --pipe --print-package --quiet --schema --separator --stats --stats-csv -h datapackage parquet postgres sqlite xlsx")" -- "$cur" ) - ;; - *'py'*) while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--batch --delimiter --help --helper --no-headers --output --progressbar -h filter map")" -- "$cur" ) ;; + *'to'*) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--delimiter --drop --dump --evolve --help --jobs --pipe --print-package --quiet --schema --separator --stats --stats-csv -h datapackage parquet postgres sqlite xlsx")" -- "$cur" ) + ;; + *) - while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--envlist --help --list --update --updatenow --version -h -v apply applydp behead cat count datefmt dedup describegpt diff enum excel exclude explode extdudup extsort fetch fetchpost fill fixlengths flatten fmt foreach frequency generate geocode headers index input join joinp jsonl jsonp luau partition prompt pseudo py rename reverse safenames sample schema search searchset select slice snappy sniff sort sortcheck split sqlp stats table to tojsonl transpose validate")" -- "$cur" ) + while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_qsv_completions_filter "--envlist --help --list --update --updatenow --version -h -v apply applydp behead cat count datefmt dedup describegpt diff enum excel exclude explode extdudup extsort fetch fetchpost fill fixlengths flatten fmt foreach frequency generate geocode headers index input join joinp json jsonl luau partition prompt pseudo py rename reverse safenames sample schema search searchset select slice snappy sniff sort sortcheck split sqlp stats table to tojsonl transpose validate")" -- "$cur" ) ;; esac diff --git a/contrib/bashly/src/bashly.yml b/contrib/bashly/src/bashly.yml index 1f1c717e1..860191051 100644 --- a/contrib/bashly/src/bashly.yml +++ b/contrib/bashly/src/bashly.yml @@ -814,20 +814,10 @@ commands: - *output - *delimiter - - name: jsonp + - name: json completions: - flags: - - long: --datetime-format - arg: - - long: --date-format - arg: - - long: --time-format - arg: - - long: --float-precision - arg: - - long: --wnull-value - arg: - *output - name: luau diff --git a/contrib/fish/qsv.fish b/contrib/fish/qsv.fish index 245e60c6c..4cd948190 100644 --- a/contrib/fish/qsv.fish +++ b/contrib/fish/qsv.fish @@ -1,5 +1,5 @@ # Set all available subcommands for qsv -set -l qsv_commands apply applydp behead cat count datefmt dedup describegpt diff enum excel exclude explode extdedup extsort fetch fetchpost fill fixlengths flatten fmt frequency geocode headers index input join joinp jsonl jsonp luau partition prompt pseudo py rename replace reverse safenames sample schema search searchset select slice snappy sniff sort sortcheck split sqlp stats table to tojsonl transpose validate +set -l qsv_commands apply applydp behead cat count datefmt dedup describegpt diff enum excel exclude explode extdedup extsort fetch fetchpost fill fixlengths flatten fmt frequency geocode headers index input join joinp jsonl json luau partition prompt pseudo py rename replace reverse safenames sample schema search searchset select slice snappy sniff sort sortcheck split sqlp stats table to tojsonl transpose validate # Enable completions for qsv complete -c qsv diff --git a/docs/FEATURES.md b/docs/FEATURES.md index ba16adf95..7898a416e 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -9,7 +9,7 @@ * `foreach` - enable `foreach` command (not valid for Windows). * `geocode` - enable `geocode` command. * `luau` - enable `luau` command. Embeds a [Luau](https://luau-lang.org) interpreter into qsv. [Luau has type-checking, sandboxing, additional language operators, increased performance & other improvements](https://luau-lang.org/2022/11/04/luau-origins-and-evolution.html) over Lua. -* `polars` - enables all [Polars](https://pola.rs)-powered commands (currently, `joinp`, `jsonp` and `sqlp`. Also enables polars mode in `count`). Note that Polars is a very powerful library, but it has a lot of dependencies that drastically increases both compile time and binary size. +* `polars` - enables all [Polars](https://pola.rs)-powered commands (currently, `joinp` and `sqlp`. Also enables polars mode in `count`). Note that Polars is a very powerful library, but it has a lot of dependencies that drastically increases both compile time and binary size. * `python` - enable `py` command. Note that qsv will look for the shared library for the Python version (Python 3.7 & above supported) it was compiled against & will abort on startup if the library is not found, even if you're NOT using the `py` command. Check [Python](#python) section for more info. * `to` - enables the `to` command except the parquet option. * `to_parquet` - enables the `parquet` option of the `to` command. This is a separate feature as it brings in the `duckdb` dependency, which markedly increases binary size and compile time. diff --git a/scripts/benchmarks.sh b/scripts/benchmarks.sh index f3fc96337..4cfd3af41 100755 --- a/scripts/benchmarks.sh +++ b/scripts/benchmarks.sh @@ -514,7 +514,7 @@ run geocode_suggest "$qsv_bin" geocode suggest City --new-column geocoded_city " run geocode_reverse "$qsv_bin" geocode reverse Location --new-column geocoded_location "$data" run index "$qsv_bin" index "$data" run input "$qsv_bin" input "$data" -run jsonp "$qsv_bin" jsonp benchmark_data.json +run json "$qsv_bin" json benchmark_data.json run join "$qsv_bin" join \'Community Board\' "$data" community_board communityboards.csv run join_casei "$qsv_bin" join \'Community Board\' "$data" community_board --ignore-case communityboards.csv run joinp "$qsv_bin" joinp \'Community Board\' "$data" community_board communityboards.csv diff --git a/src/cmd/json.rs b/src/cmd/json.rs new file mode 100644 index 000000000..654e42d9d --- /dev/null +++ b/src/cmd/json.rs @@ -0,0 +1,133 @@ +static USAGE: &str = r#" +Convert non-nested JSON to CSV. + +You may provide JSON data either from stdin or a file path. +This command may not work with nested JSON data. + +As a basic example, say we have a file fruits.json with contents: + +[ + { + "fruit": "apple", + "price": 2.5 + }, + { + "fruit": "banana", + "price": 3.0 + } +] + +To convert it to CSV format, run: + +qsv json fruits.json + +And the following is printed to the terminal: + +fruit,price +apple,2.5 +banana,3.0 + +If fruits.json was provided using stdin then either use - or do not provide a file path. For example: + +cat fruits.json | qsv json - + +For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_json.rs. + +Usage: + qsv json [options] [] + qsv json --help + +Common options: + -h, --help Display this message + -o, --output Write output to instead of stdout. +"#; + +use std::{ + env, + io::{Read, Write}, +}; + +use json_objects_to_csv::{flatten_json_object::Flattener, Json2Csv}; +use serde::Deserialize; + +use crate::{util, CliResult}; + +#[derive(Deserialize)] +struct Args { + arg_input: Option, + flag_output: Option, +} + +pub fn run(argv: &[&str]) -> CliResult<()> { + let args: Args = util::get_args(USAGE, argv)?; + + fn get_value_from_stdin() -> serde_json::Value { + // Create a buffer in memory for stdin + let mut buffer: Vec = Vec::new(); + let stdin = std::io::stdin(); + let mut stdin_handle = stdin.lock(); + stdin_handle.read_to_end(&mut buffer).unwrap(); + drop(stdin_handle); + + // Return the JSON contents of the buffer as serde_json::Value + serde_json::from_slice(&buffer).unwrap() + } + + fn get_value_from_path(path: String) -> serde_json::Value { + // Open the file in read-only mode with buffer. + let file = std::fs::File::open(path).unwrap(); + let reader = std::io::BufReader::new(file); + + // Return the JSON contents of the file as serde_json::Value + serde_json::from_reader(reader).unwrap() + } + + let flattener = Flattener::new(); + let mut output = Vec::::new(); + let value = match args.arg_input { + Some(path) => get_value_from_path(path.into()), + _ => get_value_from_stdin(), + }; + let csv_writer = csv::WriterBuilder::new().from_writer(&mut output); + let first_dict = value + .as_array() + .unwrap() + .get(0) + .unwrap() + .as_object() + .unwrap(); + let mut headers: Vec<&str> = Vec::new(); + for key in first_dict.keys() { + headers.push(key.as_str()); + } + let values = value.as_array().unwrap(); + Json2Csv::new(flattener) + .convert_from_array(values, csv_writer) + .unwrap(); + + // Use qsv select to reorder headers to first dict's keys order + let mut select_child = std::process::Command::new(env::current_exe().unwrap().clone()) + .arg("select") + .arg(headers.join(",")) + .stdin(std::process::Stdio::piped()) + .spawn()?; + let mut stdin = select_child.stdin.take().expect("Failed to open stdin"); + std::thread::spawn(move || { + stdin.write_all(&output).expect("Failed to write to stdin"); + }); + + let select_output = select_child + .wait_with_output() + .expect("Failed to read stdout"); + + if let Some(output_path) = args.flag_output { + let mut file = std::fs::File::create(&output_path)?; + let buf = select_output.stdout; + file.write_all(&buf)?; + file.flush()?; + } else { + print!("{}", String::from_utf8_lossy(&select_output.stdout)); + } + + Ok(()) +} diff --git a/src/cmd/jsonp.rs b/src/cmd/jsonp.rs deleted file mode 100644 index 92784fddc..000000000 --- a/src/cmd/jsonp.rs +++ /dev/null @@ -1,127 +0,0 @@ -static USAGE: &str = r#" -Convert non-nested JSON to CSV (polars feature only). - -You may provide JSON data either from stdin or a file path. -This command may not work with nested JSON data. - -As a basic example, say we have a file fruits.json with contents: - -[ - { - "fruit": "apple", - "price": 2.5 - }, - { - "fruit": "banana", - "price": 3.0 - } -] - -To convert it to CSV format, run: - -qsv jsonp fruits.json - -And the following is printed to the terminal: - -fruit,price -apple,2.5 -banana,3.0 - -If fruits.json was provided using stdin then either use - or do not provide a file path. For example: - -cat fruits.json | qsv jsonp - - -For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_jsonp.rs. - -Usage: - qsv jsonp [options] [] - qsv jsonp --help - -jsonp options: - --datetime-format The datetime format to use writing datetimes. - See https://docs.rs/chrono/latest/chrono/format/strftime/index.html - for the list of valid format specifiers. - --date-format The date format to use writing dates. - --time-format The time format to use writing times. - --float-precision The number of digits of precision to use when writing floats. - --wnull-value The string to use when WRITING null values. - -Common options: - -h, --help Display this message - -o, --output Write output to instead of stdout. -"#; - -use std::io::{Cursor, Read, Seek, SeekFrom, Write}; - -use polars::prelude::*; -use serde::Deserialize; - -use crate::{util, CliResult}; - -#[derive(Deserialize)] -struct Args { - arg_input: Option, - flag_datetime_format: Option, - flag_date_format: Option, - flag_time_format: Option, - flag_float_precision: Option, - flag_wnull_value: Option, - flag_output: Option, -} - -pub fn run(argv: &[&str]) -> CliResult<()> { - fn df_from_stdin() -> PolarsResult { - // Create a buffer in memory for stdin - let mut buffer: Vec = Vec::new(); - let stdin = std::io::stdin(); - let mut stdin_handle = stdin.lock(); - stdin_handle.read_to_end(&mut buffer)?; - drop(stdin_handle); - JsonReader::new(Box::new(std::io::Cursor::new(buffer))).finish() - } - - fn df_from_path(path: String) -> PolarsResult { - JsonReader::new(std::fs::File::open(path)?).finish() - } - - fn df_to_csv(mut writer: W, mut df: DataFrame, args: &Args) -> PolarsResult<()> { - CsvWriter::new(&mut writer) - .with_datetime_format(args.flag_datetime_format.clone()) - .with_date_format(args.flag_date_format.clone()) - .with_time_format(args.flag_time_format.clone()) - .with_float_precision(args.flag_float_precision) - .with_null_value(args.flag_wnull_value.clone().unwrap_or_default()) - .include_bom(util::get_envvar_flag("QSV_OUTPUT_BOM")) - .finish(&mut df)?; - Ok(()) - } - - // main - - let args: Args = util::get_args(USAGE, argv)?; - - let df = match args.arg_input.clone() { - Some(path) => { - if path == "-" { - df_from_stdin()? - } else { - df_from_path(path)? - } - }, - None => df_from_stdin()?, - }; - - if let Some(output_path) = args.flag_output.clone() { - let mut output = std::fs::File::create(output_path)?; - df_to_csv(&mut output, df, &args)?; - } else { - let mut res = Cursor::new(Vec::new()); - df_to_csv(&mut res, df, &args)?; - res.seek(SeekFrom::Start(0))?; - let mut out = String::new(); - res.read_to_string(&mut out)?; - print!("{out}"); - } - - Ok(()) -} diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index e643ecb71..cf83df780 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -44,10 +44,9 @@ pub mod input; pub mod join; #[cfg(feature = "polars")] pub mod joinp; +pub mod json; #[cfg(any(feature = "feature_capable", feature = "lite"))] pub mod jsonl; -#[cfg(feature = "polars")] -pub mod jsonp; #[cfg(feature = "luau")] pub mod luau; #[cfg(any(feature = "feature_capable", feature = "lite"))] diff --git a/src/main.rs b/src/main.rs index 86f9f6c3e..d725d7736 100644 --- a/src/main.rs +++ b/src/main.rs @@ -144,8 +144,7 @@ fn main() -> QsvExitCode { enabled_commands.push_str(" jsonl Convert newline-delimited JSON files to CSV\n"); #[cfg(all(feature = "polars", feature = "feature_capable"))] - enabled_commands - .push_str(" jsonp Convert non-nested JSON to CSV (polars feature only)\n"); + enabled_commands.push_str(" json Convert non-nested JSON to CSV\n"); #[cfg(all(feature = "luau", feature = "feature_capable"))] enabled_commands.push_str(" luau Execute Luau script on CSV data\n"); @@ -359,8 +358,7 @@ enum Command { #[cfg(all(feature = "polars", feature = "feature_capable"))] JoinP, Jsonl, - #[cfg(all(feature = "polars", feature = "feature_capable"))] - JsonP, + Json, #[cfg(all(feature = "luau", feature = "feature_capable"))] Luau, Partition, @@ -449,9 +447,8 @@ impl Command { Command::Join => cmd::join::run(argv), #[cfg(all(feature = "polars", feature = "feature_capable"))] Command::JoinP => cmd::joinp::run(argv), + Command::Json => cmd::json::run(argv), Command::Jsonl => cmd::jsonl::run(argv), - #[cfg(all(feature = "polars", feature = "feature_capable"))] - Command::JsonP => cmd::jsonp::run(argv), #[cfg(all(feature = "luau", feature = "feature_capable"))] Command::Luau => cmd::luau::run(argv), Command::Partition => cmd::partition::run(argv), diff --git a/tests/test_jsonp.rs b/tests/test_json.rs similarity index 63% rename from tests/test_jsonp.rs rename to tests/test_json.rs index 5608e21ad..026dc8090 100644 --- a/tests/test_jsonp.rs +++ b/tests/test_json.rs @@ -1,15 +1,15 @@ use crate::workdir::Workdir; #[test] -fn jsonp_simple() { - let wrk = Workdir::new("jsonp_simple"); +fn json_simple() { + let wrk = Workdir::new("json_simple"); wrk.create_from_string( "data.json", r#"[{"id":1,"father":"Mark","mother":"Charlotte","oldest_child":"Tom","boy":true}, {"id":2,"father":"John","mother":"Ann","oldest_child":"Jessika","boy":false}, {"id":3,"father":"Bob","mother":"Monika","oldest_child":"Jerry","boy":true}]"#, ); - let mut cmd = wrk.command("jsonp"); + let mut cmd = wrk.command("json"); cmd.arg("data.json"); let got: Vec> = wrk.read_stdout(&mut cmd); @@ -23,13 +23,13 @@ fn jsonp_simple() { } #[test] -fn jsonp_fruits_stats() { - let wrk = Workdir::new("jsonp_fruits_stats"); +fn json_fruits_stats() { + let wrk = Workdir::new("json_fruits_stats"); wrk.create_from_string( "data.json", r#"[{"field":"fruit","type":"String","is_ascii":true,"sum":null,"min":"apple","max":"strawberry","range":null,"min_length":5,"max_length":10,"mean":null,"stddev":null,"variance":null,"nullcount":0,"max_precision":null,"sparsity":0},{"field":"price","type":"Float","is_ascii":null,"sum":7,"min":"1.5","max":"3.0","range":1.5,"min_length":4,"max_length":4,"mean":2.3333,"stddev":0.6236,"variance":0.3889,"nullcount":0,"max_precision":1,"sparsity":0}]"#, ); - let mut cmd = wrk.command("jsonp"); + let mut cmd = wrk.command("json"); cmd.arg("data.json"); let got: String = wrk.stdout(&mut cmd); @@ -39,29 +39,11 @@ price,Float,,7,1.5,3.0,1.5,4,4,2.3333,0.6236,0.3889,0,1,0"#.to_string(); assert_eq!(got, expected); } -#[test] -fn jsonp_fruits_stats_fp_2() { - let wrk = Workdir::new("jsonp_fruits_stats_fp_2"); - wrk.create_from_string( - "data.json", - r#"[{"field":"fruit","type":"String","is_ascii":true,"sum":null,"min":"apple","max":"strawberry","range":null,"min_length":5,"max_length":10,"mean":null,"stddev":null,"variance":null,"nullcount":0,"max_precision":null,"sparsity":0},{"field":"price","type":"Float","is_ascii":null,"sum":7,"min":"1.5","max":"3.0","range":1.5,"min_length":4,"max_length":4,"mean":2.3333,"stddev":0.6236,"variance":0.3889,"nullcount":0,"max_precision":1,"sparsity":0}]"#, - ); - let mut cmd = wrk.command("jsonp"); - cmd.arg("data.json"); - cmd.args(&["--float-precision", "2"]); - - let got: String = wrk.stdout(&mut cmd); - let expected = r#"field,type,is_ascii,sum,min,max,range,min_length,max_length,mean,stddev,variance,nullcount,max_precision,sparsity -fruit,String,true,,apple,strawberry,,5,10,,,,0,,0 -price,Float,,7,1.5,3.0,1.50,4,4,2.33,0.62,0.39,0,1,0"#.to_string(); - assert_eq!(got, expected); -} - #[test] // Verify that qsv stats fruits.csv has the same content as -// qsv stats fruits.csv | qsv slice --json | qsv jsonp -fn jsonp_fruits_stats_slice_jsonp() { - let wrk = Workdir::new("jsonp_fruits_stats_slice_jsonp"); +// qsv stats fruits.csv | qsv slice --json | qsv json +fn json_fruits_stats_slice_json() { + let wrk = Workdir::new("json_fruits_stats_slice_json"); let test_file = wrk.load_test_file("fruits.csv"); // qsv stats fruits.csv @@ -77,19 +59,19 @@ fn jsonp_fruits_stats_slice_jsonp() { let slice_output: String = wrk.stdout(&mut slice_cmd); wrk.create_from_string("slice.json", slice_output.as_str()); - // qsv jsonp - let mut jsonp_cmd = wrk.command("jsonp"); - jsonp_cmd.arg("slice.json"); - let jsonp_output: String = wrk.stdout(&mut jsonp_cmd); + // qsv json + let mut json_cmd = wrk.command("json"); + json_cmd.arg("slice.json"); + let json_output: String = wrk.stdout(&mut json_cmd); - assert_eq!(stats_output, jsonp_output); + assert_eq!(stats_output, json_output); } #[test] // Verify that qsv stats House.csv has the same content as -// qsv stats House.csv | qsv slice --json | qsv jsonp -fn jsonp_house_stats_slice_jsonp() { - let wrk = Workdir::new("jsonp_house_stats_slice_jsonp"); +// qsv stats House.csv | qsv slice --json | qsv json +fn json_house_stats_slice_json() { + let wrk = Workdir::new("json_house_stats_slice_json"); let test_file = wrk.load_test_file("House.csv"); // qsv stats fruits.csv @@ -105,20 +87,20 @@ fn jsonp_house_stats_slice_jsonp() { let slice_output: String = wrk.stdout(&mut slice_cmd); wrk.create_from_string("slice.json", slice_output.as_str()); - // qsv jsonp - let mut jsonp_cmd = wrk.command("jsonp"); - jsonp_cmd.arg("slice.json"); - let jsonp_output: String = wrk.stdout(&mut jsonp_cmd); + // qsv json + let mut json_cmd = wrk.command("json"); + json_cmd.arg("slice.json"); + let json_output: String = wrk.stdout(&mut json_cmd); - assert_eq!(stats_output, jsonp_output); + assert_eq!(stats_output, json_output); } #[test] // Verify that House.csv has the same content as -// qsv slice House.csv --json | qsv jsonp +// qsv slice House.csv --json | qsv json // according to qsv diff -fn jsonp_house_diff() { - let wrk = Workdir::new("jsonp_house_diff"); +fn json_house_diff() { + let wrk = Workdir::new("json_house_diff"); let _ = wrk.load_test_file("House.csv"); // qsv enum House.csv -o House_enum.csv @@ -134,11 +116,11 @@ fn jsonp_house_diff() { let slice_output: String = wrk.stdout(&mut slice_cmd); wrk.create_from_string("slice.json", slice_output.as_str()); - // qsv jsonp - let mut jsonp_cmd = wrk.command("jsonp"); - jsonp_cmd.arg("slice.json"); - let jsonp_output: String = wrk.stdout(&mut jsonp_cmd); - wrk.create_from_string("House2.csv", jsonp_output.as_str()); + // qsv json + let mut json_cmd = wrk.command("json"); + json_cmd.arg("slice.json"); + let json_output: String = wrk.stdout(&mut json_cmd); + wrk.create_from_string("House2.csv", json_output.as_str()); // qsv enum House2.csv -o House2_enum.csv let mut enum2_cmd = wrk.command("enum"); diff --git a/tests/tests.rs b/tests/tests.rs index 9093aeca8..e81e928aa 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -77,10 +77,10 @@ mod test_input; mod test_join; #[cfg(feature = "polars")] mod test_joinp; +#[cfg(not(feature = "datapusher_plus"))] +mod test_json; #[cfg(any(feature = "feature_capable", feature = "lite"))] mod test_jsonl; -#[cfg(all(feature = "polars", not(feature = "datapusher_plus")))] -mod test_jsonp; #[cfg(feature = "luau")] mod test_luau; #[cfg(any(feature = "feature_capable", feature = "lite"))]