diff --git a/Cargo.lock b/Cargo.lock
index 80509109f..01a27f83a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -912,8 +912,7 @@ checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
[[package]]
name = "calamine"
version = "0.21.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95f023f9ae2c2f017564b7eca85660c3c09477bb037f2fbfbaaba732b5642a32"
+source = "git+https://github.com/tafia/calamine?rev=a54bd9c#a54bd9cddc61a4a30475c4454efbbb2eecf9f975"
dependencies = [
"byteorder",
"chrono",
@@ -921,7 +920,7 @@ dependencies = [
"encoding_rs",
"log",
"once_cell",
- "quick-xml 0.28.2",
+ "quick-xml 0.30.0",
"serde",
"zip",
]
@@ -4101,9 +4100,9 @@ dependencies = [
[[package]]
name = "quick-xml"
-version = "0.28.2"
+version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1"
+checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
dependencies = [
"encoding_rs",
"memchr",
diff --git a/Cargo.toml b/Cargo.toml
index bdddced48..f52597c2d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -226,6 +226,7 @@ serial_test = { version = "2.0", features = ["file_locks"] }
[patch.crates-io]
geosuggest-core = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
geosuggest-utils = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
+calamine = { git = "https://github.com/tafia/calamine", rev = "a54bd9c" }
[features]
default = ["mimalloc"]
diff --git a/resources/test/any_sheets.ods b/resources/test/any_sheets.ods
new file mode 100644
index 000000000..fcce70d70
Binary files /dev/null and b/resources/test/any_sheets.ods differ
diff --git a/resources/test/any_sheets.xls b/resources/test/any_sheets.xls
new file mode 100644
index 000000000..c9838e8cd
Binary files /dev/null and b/resources/test/any_sheets.xls differ
diff --git a/resources/test/any_sheets.xlsb b/resources/test/any_sheets.xlsb
new file mode 100644
index 000000000..2df8c37f3
Binary files /dev/null and b/resources/test/any_sheets.xlsb differ
diff --git a/resources/test/any_sheets.xlsx b/resources/test/any_sheets.xlsx
new file mode 100644
index 000000000..693894d89
Binary files /dev/null and b/resources/test/any_sheets.xlsx differ
diff --git a/src/cmd/excel.rs b/src/cmd/excel.rs
index 828582c06..20bb0e0cd 100644
--- a/src/cmd/excel.rs
+++ b/src/cmd/excel.rs
@@ -2,7 +2,50 @@ static USAGE: &str = r#"
Exports a specified Excel/ODS sheet to a CSV file.
The first row of a sheet is assumed to be the header row.
-For examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_excel.rs.
+Examples:
+
+Export the first sheet of an Excel file to a CSV file:
+ qsv excel input.xlsx > output.csv
+ qsv excel input.xlsx --output output.csv
+
+Export the first sheet of an ODS file to a CSV file:
+ qsv excel input.ods > output.csv
+ qsv excel input.ods -o output.csv
+
+Export the first sheet of an Excel file to a CSV file with different delimiters:
+ # semicolon
+ qsv excel input.xlsx -d ";" > output.csv
+ # tab
+ qsv excel input.xlsx -d "\t" > output.tsv
+
+Export a sheet by name (case-insensitive):
+ qsv excel --sheet "Sheet 3" input.xlsx
+
+Export a sheet by index:
+ # this exports the 3nd sheet (0-based index)
+ qsv excel -s 2 input.xlsx
+
+Export the last sheet (negative index)):
+ qsv excel -s -1 input.xlsx
+
+Export the second to last sheet:
+ qsv excel -s -2 input.xls
+
+Export a range of cells in the first sheet:
+ qsv excel --range C3:T25 input.xlsx
+
+Export a range of cells in the second sheet:
+ qsv excel --range C3:T25 -s 1 input.xlsx
+
+Export metadata for all sheets in CSV format:
+ qsv excel --metadata c input.xlsx
+
+Export metadata for all sheets in JSON format:
+ qsv excel --metadata j input.xlsx
+ # pretty-printed JSON
+ qsv excel --metadata J input.xlsx
+
+For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_excel.rs.
Usage:
qsv excel [options] []
@@ -53,7 +96,7 @@ Common options:
use std::{cmp, fmt::Write, path::PathBuf};
-use calamine::{open_workbook_auto, DataType, Range, Reader};
+use calamine::{open_workbook_auto, DataType, Range, Reader, SheetType};
use indicatif::HumanCount;
#[cfg(any(feature = "feature_capable", feature = "lite"))]
use indicatif::{ProgressBar, ProgressDrawTarget};
@@ -92,6 +135,8 @@ enum MetadataMode {
struct SheetMetadata {
index: usize,
name: String,
+ typ: String,
+ visible: String,
headers: Vec,
num_columns: usize,
num_rows: usize,
@@ -213,6 +258,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
return fail!("No sheets found.");
}
let num_sheets = sheet_names.len();
+ #[allow(clippy::redundant_clone)]
let sheet_vec = sheet_names.to_owned();
let mut wtr = Config::new(&args.flag_output)
@@ -251,7 +297,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
match result {
Ok(result) => result,
Err(e) => {
- return fail_clierror!("Cannot retrieve range from {sheet_name}: {e}.");
+ let sheet_type = workbook.sheets_metadata()[i].typ;
+ if sheet_type == SheetType::ChartSheet {
+ // return an empty range for ChartSheet
+ Range::empty()
+ } else {
+ return fail_clierror!("Cannot retrieve range from {sheet_name}: {e}.");
+ }
},
}
} else {
@@ -308,6 +360,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let sheetmetadata_struct = SheetMetadata {
index: i,
name: sheet_name.to_string(),
+ typ: format!("{:?}", workbook.sheets_metadata()[i].typ),
+ visible: format!("{:?}", workbook.sheets_metadata()[i].visible),
headers: header_vec,
num_columns,
num_rows,
@@ -326,6 +380,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
metadata_fields.extend_from_slice(&[
"index",
"sheet_name",
+ "type",
+ "visible",
"headers",
"num_columns",
"num_rows",
@@ -344,6 +400,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
sheetmetadata.index.to_string(),
sheetmetadata.name,
format!("{:?}", sheetmetadata.headers),
+ sheetmetadata.typ,
+ sheetmetadata.visible,
sheetmetadata.num_columns.to_string(),
sheetmetadata.num_rows.to_string(),
format!("{:?}", sheetmetadata.safe_headers),
@@ -433,6 +491,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
return fail_clierror!("Cannot get sheet index for {sheet}");
};
+ let sheet_type = workbook.sheets_metadata()[sheet_index].typ;
+ if sheet_type != SheetType::WorkSheet {
+ return fail_incorrectusage_clierror!(
+ "Can only export Worksheets. {sheet} is a {sheet_type:?}."
+ );
+ }
+
let mut range = if let Some(result) = workbook.worksheet_range_at(sheet_index) {
match result {
Ok(result) => result,
@@ -648,7 +713,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.write_record(&record)?;
} // end of main processing loop
} else {
- return fail_clierror!("\"{sheet}\" sheet is empty");
+ return fail_clierror!("\"{sheet}\" sheet is empty.");
}
wtr.flush()?;
diff --git a/tests/test_excel.rs b/tests/test_excel.rs
index 6f886bf3a..07d8aa84f 100644
--- a/tests/test_excel.rs
+++ b/tests/test_excel.rs
@@ -419,6 +419,8 @@ fn excel_metadata() {
svec![
"index",
"sheet_name",
+ "type",
+ "visible",
"headers",
"num_columns",
"num_rows",
@@ -432,6 +434,8 @@ fn excel_metadata() {
"0",
"First",
"[\"URL\", \"City\"]",
+ "WorkSheet",
+ "Visible",
"2",
"4",
"[\"URL\", \"City\"]",
@@ -444,6 +448,8 @@ fn excel_metadata() {
"1",
"Flexibility Test",
"[\"URL\", \"City\", \"\"]",
+ "WorkSheet",
+ "Visible",
"3",
"6",
"[\"URL\", \"City\"]",
@@ -456,6 +462,8 @@ fn excel_metadata() {
"2",
"Middle",
"[\"Middle sheet col1\", \"Middle-2\"]",
+ "WorkSheet",
+ "Visible",
"2",
"6",
"[\"Middle sheet col1\", \"Middle-2\"]",
@@ -464,11 +472,26 @@ fn excel_metadata() {
"0",
"0"
],
- svec!["3", "Sheet1", "[]", "0", "0", "[]", "0", "[]", "0", "0"],
+ svec![
+ "3",
+ "Sheet1",
+ "[]",
+ "WorkSheet",
+ "Visible",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
svec![
"4",
"trim test",
"[\"col1\", \" col2\", \"col3\"]",
+ "WorkSheet",
+ "Visible",
"3",
"6",
"[\"col1\", \"col3\"]",
@@ -481,6 +504,8 @@ fn excel_metadata() {
"5",
"date test",
"[\"date_col\", \"num_col\", \"col_Petsa\", \"just another col\"]",
+ "WorkSheet",
+ "Visible",
"4",
"6",
"[\"date_col\", \"num_col\", \"col_Petsa\", \"just another col\"]",
@@ -493,6 +518,8 @@ fn excel_metadata() {
"6",
"NoData",
"[\"col1\", \"col2\", \"col3\", \"col4\"]",
+ "WorkSheet",
+ "Visible",
"4",
"1",
"[\"col1\", \"col2\", \"col3\", \"col4\"]",
@@ -505,6 +532,8 @@ fn excel_metadata() {
"7",
"Last",
"[\"Last sheet col1\", \"Last-2\"]",
+ "WorkSheet",
+ "Visible",
"2",
"6",
"[\"Last sheet col1\", \"Last-2\"]",
@@ -534,6 +563,8 @@ fn excel_metadata_pretty_json() {
{
"index": 0,
"name": "First",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"URL",
"City"
@@ -552,6 +583,8 @@ fn excel_metadata_pretty_json() {
{
"index": 1,
"name": "Flexibility Test",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"URL",
"City",
@@ -573,6 +606,8 @@ fn excel_metadata_pretty_json() {
{
"index": 2,
"name": "Middle",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"Middle sheet col1",
"Middle-2"
@@ -591,6 +626,8 @@ fn excel_metadata_pretty_json() {
{
"index": 3,
"name": "Sheet1",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [],
"num_columns": 0,
"num_rows": 0,
@@ -603,6 +640,8 @@ fn excel_metadata_pretty_json() {
{
"index": 4,
"name": "trim test",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"col1",
" col2",
@@ -624,6 +663,8 @@ fn excel_metadata_pretty_json() {
{
"index": 5,
"name": "date test",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"date_col",
"num_col",
@@ -646,6 +687,8 @@ fn excel_metadata_pretty_json() {
{
"index": 6,
"name": "NoData",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"col1",
"col2",
@@ -668,6 +711,8 @@ fn excel_metadata_pretty_json() {
{
"index": 7,
"name": "Last",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"Last sheet col1",
"Last-2"
@@ -703,6 +748,8 @@ fn ods_metadata() {
svec![
"index",
"sheet_name",
+ "type",
+ "visible",
"headers",
"num_columns",
"num_rows",
@@ -716,6 +763,8 @@ fn ods_metadata() {
"0",
"Sheet1",
"[\"URL\", \"City\"]",
+ "WorkSheet",
+ "Visible",
"2",
"4",
"[\"URL\", \"City\"]",
@@ -747,6 +796,8 @@ fn ods_metadata_pretty_json() {
{
"index": 0,
"name": "Sheet1",
+ "typ": "WorkSheet",
+ "visible": "Visible",
"headers": [
"URL",
"City"
@@ -768,6 +819,352 @@ fn ods_metadata_pretty_json() {
wrk.assert_success(&mut cmd);
}
+#[test]
+fn excel_metadata_sheet_types() {
+ let wrk = Workdir::new("excel_metadata_sheet_types");
+
+ let xls_file = wrk.load_test_file("any_sheets.xls");
+
+ let mut cmd = wrk.command("excel");
+ cmd.arg("--metadata").arg("csv").arg(xls_file);
+
+ let got: Vec> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec![
+ "index",
+ "sheet_name",
+ "type",
+ "visible",
+ "headers",
+ "num_columns",
+ "num_rows",
+ "safe_headers",
+ "safe_headers_count",
+ "unsafe_headers",
+ "unsafe_headers_count",
+ "duplicate_headers_count"
+ ],
+ svec![
+ "0",
+ "Visible",
+ "[\"1\", \"2\"]",
+ "WorkSheet",
+ "Visible",
+ "2",
+ "5",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ svec![
+ "1",
+ "Hidden",
+ "[\"1\", \"2\"]",
+ "WorkSheet",
+ "Hidden",
+ "2",
+ "3",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ svec![
+ "2",
+ "VeryHidden",
+ "[]",
+ "WorkSheet",
+ "VeryHidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ svec![
+ "3",
+ "Chart",
+ "[\"1\", \"2\"]",
+ "ChartSheet",
+ "Visible",
+ "2",
+ "3",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ ];
+ assert_eq!(got, expected);
+ wrk.assert_success(&mut cmd);
+}
+
+#[test]
+fn excel_metadata_sheet_types_xlsx() {
+ let wrk = Workdir::new("excel_metadata_sheet_types_xlsx");
+
+ let xlsx_file = wrk.load_test_file("any_sheets.xlsx");
+
+ let mut cmd = wrk.command("excel");
+ cmd.arg("--metadata").arg("csv").arg(xlsx_file);
+
+ let got: Vec> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec![
+ "index",
+ "sheet_name",
+ "type",
+ "visible",
+ "headers",
+ "num_columns",
+ "num_rows",
+ "safe_headers",
+ "safe_headers_count",
+ "unsafe_headers",
+ "unsafe_headers_count",
+ "duplicate_headers_count"
+ ],
+ svec![
+ "0",
+ "Visible",
+ "[\"1\", \"2\"]",
+ "WorkSheet",
+ "Visible",
+ "2",
+ "5",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ svec![
+ "1",
+ "Hidden",
+ "[]",
+ "WorkSheet",
+ "Hidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ svec![
+ "2",
+ "VeryHidden",
+ "[]",
+ "WorkSheet",
+ "VeryHidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ // we don't get metadata for chart sheets in xlsx
+ svec![
+ "3",
+ "Chart",
+ "[]",
+ "ChartSheet",
+ "Visible",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ ];
+ assert_eq!(got, expected);
+ wrk.assert_success(&mut cmd);
+}
+
+#[test]
+fn excel_metadata_sheet_types_xlsb() {
+ let wrk = Workdir::new("excel_metadata_sheet_types_xlsb");
+
+ let xlsb_file = wrk.load_test_file("any_sheets.xlsb");
+
+ let mut cmd = wrk.command("excel");
+ cmd.arg("--metadata").arg("csv").arg(xlsb_file);
+
+ let got: Vec> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec![
+ "index",
+ "sheet_name",
+ "type",
+ "visible",
+ "headers",
+ "num_columns",
+ "num_rows",
+ "safe_headers",
+ "safe_headers_count",
+ "unsafe_headers",
+ "unsafe_headers_count",
+ "duplicate_headers_count"
+ ],
+ svec![
+ "0",
+ "Visible",
+ "[\"1\", \"2\"]",
+ "WorkSheet",
+ "Visible",
+ "2",
+ "5",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ svec![
+ "1",
+ "Hidden",
+ "[]",
+ "WorkSheet",
+ "Hidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ svec![
+ "2",
+ "VeryHidden",
+ "[]",
+ "WorkSheet",
+ "VeryHidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ // we don't get metadata for chart sheets in xlsb
+ svec![
+ "3",
+ "Chart",
+ "[]",
+ "ChartSheet",
+ "Visible",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ ];
+ assert_eq!(got, expected);
+ wrk.assert_success(&mut cmd);
+}
+
+#[test]
+fn excel_metadata_sheet_types_ods() {
+ let wrk = Workdir::new("excel_metadata_sheet_types_ods");
+
+ let ods_file = wrk.load_test_file("any_sheets.ods");
+
+ let mut cmd = wrk.command("excel");
+ cmd.arg("--metadata").arg("csv").arg(ods_file);
+
+ let got: Vec> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec![
+ "index",
+ "sheet_name",
+ "type",
+ "visible",
+ "headers",
+ "num_columns",
+ "num_rows",
+ "safe_headers",
+ "safe_headers_count",
+ "unsafe_headers",
+ "unsafe_headers_count",
+ "duplicate_headers_count"
+ ],
+ svec![
+ "0",
+ "Visible",
+ "[\"1\", \"2\"]",
+ "WorkSheet",
+ "Visible",
+ "2",
+ "5",
+ "[]",
+ "0",
+ "[\"1\", \"2\"]",
+ "2",
+ "0"
+ ],
+ svec![
+ "1",
+ "Hidden",
+ "[]",
+ "WorkSheet",
+ "Hidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ svec![
+ "2",
+ "VeryHidden",
+ "[]",
+ "WorkSheet",
+ "Hidden",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ svec![
+ "3",
+ "Chart",
+ "[]",
+ "WorkSheet",
+ "Visible",
+ "0",
+ "0",
+ "[]",
+ "0",
+ "[]",
+ "0",
+ "0"
+ ],
+ ];
+ assert_eq!(got, expected);
+ wrk.assert_success(&mut cmd);
+}
+
#[test]
fn excel_message() {
let wrk = Workdir::new("excel_message");
@@ -804,7 +1201,7 @@ fn excel_empty_sheet2_message() {
cmd.arg("--sheet").arg("Sheet1").arg(xls_file);
let got = wrk.output_stderr(&mut cmd);
- assert_eq!(got, "\"Sheet1\" sheet is empty\n");
+ assert_eq!(got, "\"Sheet1\" sheet is empty.\n");
wrk.assert_err(&mut cmd);
}