diff --git a/Cargo.lock b/Cargo.lock index 80509109f..01a27f83a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -912,8 +912,7 @@ checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663" [[package]] name = "calamine" version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95f023f9ae2c2f017564b7eca85660c3c09477bb037f2fbfbaaba732b5642a32" +source = "git+https://github.com/tafia/calamine?rev=a54bd9c#a54bd9cddc61a4a30475c4454efbbb2eecf9f975" dependencies = [ "byteorder", "chrono", @@ -921,7 +920,7 @@ dependencies = [ "encoding_rs", "log", "once_cell", - "quick-xml 0.28.2", + "quick-xml 0.30.0", "serde", "zip", ] @@ -4101,9 +4100,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.28.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" dependencies = [ "encoding_rs", "memchr", diff --git a/Cargo.toml b/Cargo.toml index bdddced48..f52597c2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -226,6 +226,7 @@ serial_test = { version = "2.0", features = ["file_locks"] } [patch.crates-io] geosuggest-core = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" } geosuggest-utils = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" } +calamine = { git = "https://github.com/tafia/calamine", rev = "a54bd9c" } [features] default = ["mimalloc"] diff --git a/resources/test/any_sheets.ods b/resources/test/any_sheets.ods new file mode 100644 index 000000000..fcce70d70 Binary files /dev/null and b/resources/test/any_sheets.ods differ diff --git a/resources/test/any_sheets.xls b/resources/test/any_sheets.xls new file mode 100644 index 000000000..c9838e8cd Binary files /dev/null and b/resources/test/any_sheets.xls differ diff --git a/resources/test/any_sheets.xlsb b/resources/test/any_sheets.xlsb new file mode 100644 index 000000000..2df8c37f3 Binary files /dev/null and b/resources/test/any_sheets.xlsb differ diff --git a/resources/test/any_sheets.xlsx b/resources/test/any_sheets.xlsx new file mode 100644 index 000000000..693894d89 Binary files /dev/null and b/resources/test/any_sheets.xlsx differ diff --git a/src/cmd/excel.rs b/src/cmd/excel.rs index 828582c06..20bb0e0cd 100644 --- a/src/cmd/excel.rs +++ b/src/cmd/excel.rs @@ -2,7 +2,50 @@ static USAGE: &str = r#" Exports a specified Excel/ODS sheet to a CSV file. The first row of a sheet is assumed to be the header row. -For examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_excel.rs. +Examples: + +Export the first sheet of an Excel file to a CSV file: + qsv excel input.xlsx > output.csv + qsv excel input.xlsx --output output.csv + +Export the first sheet of an ODS file to a CSV file: + qsv excel input.ods > output.csv + qsv excel input.ods -o output.csv + +Export the first sheet of an Excel file to a CSV file with different delimiters: + # semicolon + qsv excel input.xlsx -d ";" > output.csv + # tab + qsv excel input.xlsx -d "\t" > output.tsv + +Export a sheet by name (case-insensitive): + qsv excel --sheet "Sheet 3" input.xlsx + +Export a sheet by index: + # this exports the 3nd sheet (0-based index) + qsv excel -s 2 input.xlsx + +Export the last sheet (negative index)): + qsv excel -s -1 input.xlsx + +Export the second to last sheet: + qsv excel -s -2 input.xls + +Export a range of cells in the first sheet: + qsv excel --range C3:T25 input.xlsx + +Export a range of cells in the second sheet: + qsv excel --range C3:T25 -s 1 input.xlsx + +Export metadata for all sheets in CSV format: + qsv excel --metadata c input.xlsx + +Export metadata for all sheets in JSON format: + qsv excel --metadata j input.xlsx + # pretty-printed JSON + qsv excel --metadata J input.xlsx + +For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_excel.rs. Usage: qsv excel [options] [] @@ -53,7 +96,7 @@ Common options: use std::{cmp, fmt::Write, path::PathBuf}; -use calamine::{open_workbook_auto, DataType, Range, Reader}; +use calamine::{open_workbook_auto, DataType, Range, Reader, SheetType}; use indicatif::HumanCount; #[cfg(any(feature = "feature_capable", feature = "lite"))] use indicatif::{ProgressBar, ProgressDrawTarget}; @@ -92,6 +135,8 @@ enum MetadataMode { struct SheetMetadata { index: usize, name: String, + typ: String, + visible: String, headers: Vec, num_columns: usize, num_rows: usize, @@ -213,6 +258,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail!("No sheets found."); } let num_sheets = sheet_names.len(); + #[allow(clippy::redundant_clone)] let sheet_vec = sheet_names.to_owned(); let mut wtr = Config::new(&args.flag_output) @@ -251,7 +297,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { match result { Ok(result) => result, Err(e) => { - return fail_clierror!("Cannot retrieve range from {sheet_name}: {e}."); + let sheet_type = workbook.sheets_metadata()[i].typ; + if sheet_type == SheetType::ChartSheet { + // return an empty range for ChartSheet + Range::empty() + } else { + return fail_clierror!("Cannot retrieve range from {sheet_name}: {e}."); + } }, } } else { @@ -308,6 +360,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let sheetmetadata_struct = SheetMetadata { index: i, name: sheet_name.to_string(), + typ: format!("{:?}", workbook.sheets_metadata()[i].typ), + visible: format!("{:?}", workbook.sheets_metadata()[i].visible), headers: header_vec, num_columns, num_rows, @@ -326,6 +380,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> { metadata_fields.extend_from_slice(&[ "index", "sheet_name", + "type", + "visible", "headers", "num_columns", "num_rows", @@ -344,6 +400,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> { sheetmetadata.index.to_string(), sheetmetadata.name, format!("{:?}", sheetmetadata.headers), + sheetmetadata.typ, + sheetmetadata.visible, sheetmetadata.num_columns.to_string(), sheetmetadata.num_rows.to_string(), format!("{:?}", sheetmetadata.safe_headers), @@ -433,6 +491,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail_clierror!("Cannot get sheet index for {sheet}"); }; + let sheet_type = workbook.sheets_metadata()[sheet_index].typ; + if sheet_type != SheetType::WorkSheet { + return fail_incorrectusage_clierror!( + "Can only export Worksheets. {sheet} is a {sheet_type:?}." + ); + } + let mut range = if let Some(result) = workbook.worksheet_range_at(sheet_index) { match result { Ok(result) => result, @@ -648,7 +713,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { wtr.write_record(&record)?; } // end of main processing loop } else { - return fail_clierror!("\"{sheet}\" sheet is empty"); + return fail_clierror!("\"{sheet}\" sheet is empty."); } wtr.flush()?; diff --git a/tests/test_excel.rs b/tests/test_excel.rs index 6f886bf3a..07d8aa84f 100644 --- a/tests/test_excel.rs +++ b/tests/test_excel.rs @@ -419,6 +419,8 @@ fn excel_metadata() { svec![ "index", "sheet_name", + "type", + "visible", "headers", "num_columns", "num_rows", @@ -432,6 +434,8 @@ fn excel_metadata() { "0", "First", "[\"URL\", \"City\"]", + "WorkSheet", + "Visible", "2", "4", "[\"URL\", \"City\"]", @@ -444,6 +448,8 @@ fn excel_metadata() { "1", "Flexibility Test", "[\"URL\", \"City\", \"\"]", + "WorkSheet", + "Visible", "3", "6", "[\"URL\", \"City\"]", @@ -456,6 +462,8 @@ fn excel_metadata() { "2", "Middle", "[\"Middle sheet col1\", \"Middle-2\"]", + "WorkSheet", + "Visible", "2", "6", "[\"Middle sheet col1\", \"Middle-2\"]", @@ -464,11 +472,26 @@ fn excel_metadata() { "0", "0" ], - svec!["3", "Sheet1", "[]", "0", "0", "[]", "0", "[]", "0", "0"], + svec![ + "3", + "Sheet1", + "[]", + "WorkSheet", + "Visible", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], svec![ "4", "trim test", "[\"col1\", \" col2\", \"col3\"]", + "WorkSheet", + "Visible", "3", "6", "[\"col1\", \"col3\"]", @@ -481,6 +504,8 @@ fn excel_metadata() { "5", "date test", "[\"date_col\", \"num_col\", \"col_Petsa\", \"just another col\"]", + "WorkSheet", + "Visible", "4", "6", "[\"date_col\", \"num_col\", \"col_Petsa\", \"just another col\"]", @@ -493,6 +518,8 @@ fn excel_metadata() { "6", "NoData", "[\"col1\", \"col2\", \"col3\", \"col4\"]", + "WorkSheet", + "Visible", "4", "1", "[\"col1\", \"col2\", \"col3\", \"col4\"]", @@ -505,6 +532,8 @@ fn excel_metadata() { "7", "Last", "[\"Last sheet col1\", \"Last-2\"]", + "WorkSheet", + "Visible", "2", "6", "[\"Last sheet col1\", \"Last-2\"]", @@ -534,6 +563,8 @@ fn excel_metadata_pretty_json() { { "index": 0, "name": "First", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "URL", "City" @@ -552,6 +583,8 @@ fn excel_metadata_pretty_json() { { "index": 1, "name": "Flexibility Test", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "URL", "City", @@ -573,6 +606,8 @@ fn excel_metadata_pretty_json() { { "index": 2, "name": "Middle", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "Middle sheet col1", "Middle-2" @@ -591,6 +626,8 @@ fn excel_metadata_pretty_json() { { "index": 3, "name": "Sheet1", + "typ": "WorkSheet", + "visible": "Visible", "headers": [], "num_columns": 0, "num_rows": 0, @@ -603,6 +640,8 @@ fn excel_metadata_pretty_json() { { "index": 4, "name": "trim test", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "col1", " col2", @@ -624,6 +663,8 @@ fn excel_metadata_pretty_json() { { "index": 5, "name": "date test", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "date_col", "num_col", @@ -646,6 +687,8 @@ fn excel_metadata_pretty_json() { { "index": 6, "name": "NoData", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "col1", "col2", @@ -668,6 +711,8 @@ fn excel_metadata_pretty_json() { { "index": 7, "name": "Last", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "Last sheet col1", "Last-2" @@ -703,6 +748,8 @@ fn ods_metadata() { svec![ "index", "sheet_name", + "type", + "visible", "headers", "num_columns", "num_rows", @@ -716,6 +763,8 @@ fn ods_metadata() { "0", "Sheet1", "[\"URL\", \"City\"]", + "WorkSheet", + "Visible", "2", "4", "[\"URL\", \"City\"]", @@ -747,6 +796,8 @@ fn ods_metadata_pretty_json() { { "index": 0, "name": "Sheet1", + "typ": "WorkSheet", + "visible": "Visible", "headers": [ "URL", "City" @@ -768,6 +819,352 @@ fn ods_metadata_pretty_json() { wrk.assert_success(&mut cmd); } +#[test] +fn excel_metadata_sheet_types() { + let wrk = Workdir::new("excel_metadata_sheet_types"); + + let xls_file = wrk.load_test_file("any_sheets.xls"); + + let mut cmd = wrk.command("excel"); + cmd.arg("--metadata").arg("csv").arg(xls_file); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec![ + "index", + "sheet_name", + "type", + "visible", + "headers", + "num_columns", + "num_rows", + "safe_headers", + "safe_headers_count", + "unsafe_headers", + "unsafe_headers_count", + "duplicate_headers_count" + ], + svec![ + "0", + "Visible", + "[\"1\", \"2\"]", + "WorkSheet", + "Visible", + "2", + "5", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + svec![ + "1", + "Hidden", + "[\"1\", \"2\"]", + "WorkSheet", + "Hidden", + "2", + "3", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + svec![ + "2", + "VeryHidden", + "[]", + "WorkSheet", + "VeryHidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + svec![ + "3", + "Chart", + "[\"1\", \"2\"]", + "ChartSheet", + "Visible", + "2", + "3", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + ]; + assert_eq!(got, expected); + wrk.assert_success(&mut cmd); +} + +#[test] +fn excel_metadata_sheet_types_xlsx() { + let wrk = Workdir::new("excel_metadata_sheet_types_xlsx"); + + let xlsx_file = wrk.load_test_file("any_sheets.xlsx"); + + let mut cmd = wrk.command("excel"); + cmd.arg("--metadata").arg("csv").arg(xlsx_file); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec![ + "index", + "sheet_name", + "type", + "visible", + "headers", + "num_columns", + "num_rows", + "safe_headers", + "safe_headers_count", + "unsafe_headers", + "unsafe_headers_count", + "duplicate_headers_count" + ], + svec![ + "0", + "Visible", + "[\"1\", \"2\"]", + "WorkSheet", + "Visible", + "2", + "5", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + svec![ + "1", + "Hidden", + "[]", + "WorkSheet", + "Hidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + svec![ + "2", + "VeryHidden", + "[]", + "WorkSheet", + "VeryHidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + // we don't get metadata for chart sheets in xlsx + svec![ + "3", + "Chart", + "[]", + "ChartSheet", + "Visible", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + ]; + assert_eq!(got, expected); + wrk.assert_success(&mut cmd); +} + +#[test] +fn excel_metadata_sheet_types_xlsb() { + let wrk = Workdir::new("excel_metadata_sheet_types_xlsb"); + + let xlsb_file = wrk.load_test_file("any_sheets.xlsb"); + + let mut cmd = wrk.command("excel"); + cmd.arg("--metadata").arg("csv").arg(xlsb_file); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec![ + "index", + "sheet_name", + "type", + "visible", + "headers", + "num_columns", + "num_rows", + "safe_headers", + "safe_headers_count", + "unsafe_headers", + "unsafe_headers_count", + "duplicate_headers_count" + ], + svec![ + "0", + "Visible", + "[\"1\", \"2\"]", + "WorkSheet", + "Visible", + "2", + "5", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + svec![ + "1", + "Hidden", + "[]", + "WorkSheet", + "Hidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + svec![ + "2", + "VeryHidden", + "[]", + "WorkSheet", + "VeryHidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + // we don't get metadata for chart sheets in xlsb + svec![ + "3", + "Chart", + "[]", + "ChartSheet", + "Visible", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + ]; + assert_eq!(got, expected); + wrk.assert_success(&mut cmd); +} + +#[test] +fn excel_metadata_sheet_types_ods() { + let wrk = Workdir::new("excel_metadata_sheet_types_ods"); + + let ods_file = wrk.load_test_file("any_sheets.ods"); + + let mut cmd = wrk.command("excel"); + cmd.arg("--metadata").arg("csv").arg(ods_file); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec![ + "index", + "sheet_name", + "type", + "visible", + "headers", + "num_columns", + "num_rows", + "safe_headers", + "safe_headers_count", + "unsafe_headers", + "unsafe_headers_count", + "duplicate_headers_count" + ], + svec![ + "0", + "Visible", + "[\"1\", \"2\"]", + "WorkSheet", + "Visible", + "2", + "5", + "[]", + "0", + "[\"1\", \"2\"]", + "2", + "0" + ], + svec![ + "1", + "Hidden", + "[]", + "WorkSheet", + "Hidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + svec![ + "2", + "VeryHidden", + "[]", + "WorkSheet", + "Hidden", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + svec![ + "3", + "Chart", + "[]", + "WorkSheet", + "Visible", + "0", + "0", + "[]", + "0", + "[]", + "0", + "0" + ], + ]; + assert_eq!(got, expected); + wrk.assert_success(&mut cmd); +} + #[test] fn excel_message() { let wrk = Workdir::new("excel_message"); @@ -804,7 +1201,7 @@ fn excel_empty_sheet2_message() { cmd.arg("--sheet").arg("Sheet1").arg(xls_file); let got = wrk.output_stderr(&mut cmd); - assert_eq!(got, "\"Sheet1\" sheet is empty\n"); + assert_eq!(got, "\"Sheet1\" sheet is empty.\n"); wrk.assert_err(&mut cmd); }