diff --git a/resources/test/adur-public-toilets-extsorted-csvmode.csv b/resources/test/adur-public-toilets-extsorted-csvmode.csv new file mode 100644 index 000000000..3a8805846 --- /dev/null +++ b/resources/test/adur-public-toilets-extsorted-csvmode.csv @@ -0,0 +1,16 @@ +ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,RADARKeyNeeded,BabyChange,FamilyToilet,ChangingPlace,AutomaticPublicConvenience,FullTimeStaffing,PartOfCommunityScheme,CommunitySchemeName,ChargeAmount,InfoURL,OpeningHours,ManagedBy,ReportEmail,ReportTel,Notes,UPRN,Postcode,StreetAddress,GeoAreaURI,GeoAreaLabel +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK,OSGB36,522083,105168,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,09.00 - 17.00,ADC,surveyor_15@adur-worthing.gov.uk,01903 221471,,60034215,,PUBLIC CONVENIENCES CIVIC CENTRE HAM ROAD SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK,OSGB36,522007,106062,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,None,No,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,,ADC,surveyor_14@adur-worthing.gov.uk,01903 221471,Grounds staff only not public,60032527,,PUBLIC CONVENIENCE NORTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES CEMETERY MILL LANE SHOREHAM-BY-SEA,OSGB36,521440,105725,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,None,No,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,,ADC,surveyor_9@adur-worthing.gov.uk,01903 221471,Grounds staff only not public,60014340,,PUBLIC CONVENIENCES CEMETERY MILL LANE SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCE SOUTHWICK STREET SOUTHWICK,OSGB36,524401,105405,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 08:00 - 21:00 W = 08:00 - 17:00,ADC,surveyor_11@adur-worthing.gov.uk,01903 221471,,60026354,,PUBLIC CONVENIENCE SOUTHWICK STREET SOUTHWICK,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES ADUR RECREATION GROUND BRIGHTON ROAD SHOREHAM-BY-SEA,OSGB36,521048,104977,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 08:00 - 21:00 W = 08:00 - 17:00,ADC,surveyor_6@adur-worthing.gov.uk,01903 221471,,60009666,,PUBLIC CONVENIENCES ADUR RECREATION GROUND BRIGHTON ROAD SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING,OSGB36,518225,104730,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,None,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 15:00 W = 09:00 - 15:00,ADC,surveyor_2@adur-worthing.gov.uk,01903 221471,,60002210,,PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING,, +,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00 ,ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK,OSGB36,524375,104753,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_13@adur-worthing.gov.uk,01903 221471,,60029181,,BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES BEACH GREEN SHOREHAM-BY-SEA,OSGB36,521299,104515,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_5@adur-worthing.gov.uk,01903 221471,,60009402,,PUBLIC CONVENIENCES BEACH GREEN SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES FORTHAVEN SHOREHAM-BY-SEA,OSGB36,523294,104588,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_7@adur-worthing.gov.uk,01903 221471,,60011970,,PUBLIC CONVENIENCES FORTHAVEN SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES MIDDLE STREET SHOREHAM-BY-SEA,OSGB36,521515,105083,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_8@adur-worthing.gov.uk,01903 221471,,60014163,,PUBLIC CONVENIENCES MIDDLE STREET SHOREHAM-BY-SEA,, +2014-07-07 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING,OSGB3,518915,103795,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Mens,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_3@adur-worthing.gov.uk,01903 221471,,60007428,,,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SOUTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA,OSGB36,522118,105939,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,None,No,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_10@adur-worthing.gov.uk,01903 221471,,60017866,,PUBLIC CONVENIENCES SOUTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES YEW TREE CLOSE LANCING,OSGB36,518222,104168,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_4@adur-worthing.gov.uk,01903 221471,,60008859,,PUBLIC CONVENIENCES YEW TREE CLOSE LANCING,, +07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,WEST BEACH PUBLIC CONVENIENCES WEST BEACH ROAD LANCING,OSGB36,520354,104246,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,,surveyor_12@adur-worthing.gov.uk,01903 221471,,60028994,,WEST BEACH PUBLIC CONVENIENCES WEST BEACH ROAD LANCING,, diff --git a/src/cmd/extsort.rs b/src/cmd/extsort.rs index 127c79d3d..a26643b11 100644 --- a/src/cmd/extsort.rs +++ b/src/cmd/extsort.rs @@ -1,17 +1,30 @@ static USAGE: &str = r#" Sort an arbitrarily large CSV/text file using a multithreaded external sort algorithm. -This command is not specific to CSV data, it sorts any text file on a -line-by-line basis. If sorting a non-CSV file, be sure to set --no-headers, -otherwise, the first line will not be included in the external sort. +This command has TWO modes of operation. + + * CSV MODE + when --select is set, it sorts based on the given column/s. Requires an index. + See `qsv select --help` for select syntax details. + * LINE MODE + when --select is NOT set, it sorts any input text file (not just CSVs) on a + line-by-line basis. If sorting a non-CSV file, be sure to set --no-headers, + otherwise, the first line will not be included in the external sort. Usage: qsv extsort [options] [] [] qsv extsort --help External sort option: - --memory-limit The maximum amount of memory to buffer the on-disk hash table. - This is a percentage of total memory. + -s, --select Select a subset of columns to sort (CSV MODE). + Note that the outputs will remain at the full width of the CSV. + If --select is NOT set, extsort will work in LINE MODE, sorting + the input as a text file on a line-by-line basis. + -R, --reverse Reverse order + --memory-limit The maximum amount of memory to buffer the external merge sort. + If less than 50, this is a percentage of total memory. + If more than 50, this is the memory in MB to allocate, capped + at 90 percent of total memory. [default: 10] --tmp-dir The directory to use for externally sorting file segments. [default: ./] @@ -20,6 +33,10 @@ External sort option: number of CPUs detected. Common options: + CSV MODE ONLY: + -d, --delimiter The field delimiter for reading CSV data. + Must be a single character. (default: ,) + -h, --help Display this message -n, --no-headers When set, the first row will not be interpreted as headers and will be sorted with the rest @@ -35,21 +52,28 @@ use std::{ use ext_sort::{buffer::mem::MemoryLimitedBufferBuilder, ExternalSorter, ExternalSorterBuilder}; use serde::Deserialize; -use sysinfo::System; -use crate::{config, util, CliResult}; +use crate::{ + cmd::extdedup::calculate_memory_limit, + config, + config::{Config, Delimiter}, + select::SelectColumns, + util, CliResult, +}; #[derive(Deserialize)] struct Args { arg_input: Option, arg_output: Option, + flag_select: Option, + flag_reverse: bool, + flag_delimiter: Option, flag_jobs: Option, - flag_memory_limit: Option, + flag_memory_limit: Option, flag_tmp_dir: Option, flag_no_headers: bool, } -const MEMORY_LIMITED_BUFFER: u64 = 100 * 1_000_000; // 100 MB const RW_BUFFER_CAPACITY: usize = 1_000_000; // 1 MB pub fn run(argv: &[&str]) -> CliResult<()> { @@ -66,20 +90,172 @@ pub fn run(argv: &[&str]) -> CliResult<()> { None => "./".to_string(), }; - // memory buffer to use for external merge sort, - // if we can detect the total memory, use 10% of it by default - // and up to --memory-limit (capped at 50%), - // otherwise, if we cannot detect the free memory use a default of 100 MB - let mem_limited_buffer = if sysinfo::IS_SUPPORTED_SYSTEM { - let mut sys = System::new(); - sys.refresh_memory(); - (sys.total_memory() * 1000) / u8::min(args.flag_memory_limit.unwrap_or(10), 50) as u64 + // Set the memory buffer size for the external merge sort based on --memory-limit + // and system capabilities. + let mem_limited_buffer_bytes = calculate_memory_limit(args.flag_memory_limit); + log::info!("{mem_limited_buffer_bytes} bytes used for in memory mergesort buffer..."); + + let sorter: ExternalSorter = + match ExternalSorterBuilder::new() + .with_tmp_dir(path::Path::new(&tmp_dir)) + .with_buffer(MemoryLimitedBufferBuilder::new(mem_limited_buffer_bytes)) + .with_rw_buf_size(RW_BUFFER_CAPACITY) + .with_threads_number(util::njobs(args.flag_jobs)) + .build() + { + Ok(sorter) => sorter, + Err(e) => { + return fail_clierror!("cannot create external sorter: {e}"); + }, + }; + + if args.flag_select.is_some() { + sort_csv(&args, &tmp_dir, &sorter) + } else { + sort_lines(&args, &sorter) + } +} + +fn sort_csv( + args: &Args, + tmp_dir: &str, + sorter: &ExternalSorter, +) -> Result<(), crate::clitypes::CliError> { + let rconfig = Config::new(args.arg_input.as_ref()) + .delimiter(args.flag_delimiter) + .no_headers(args.flag_no_headers) + .select(args.flag_select.clone().unwrap()); + + let mut idxfile = if let Ok(idx) = rconfig.indexed() { + if idx.is_none() { + return fail_incorrectusage_clierror!("extsort CSV mode requires an index"); + } + idx.unwrap() + } else { + return fail_incorrectusage_clierror!("extsort CSV mode requires an index"); + }; + + let mut input_rdr = rconfig.reader()?; + + let linewtr_tfile = tempfile::NamedTempFile::new_in(tmp_dir)?; + let mut line_wtr = io::BufWriter::with_capacity(RW_BUFFER_CAPACITY, linewtr_tfile.as_file()); + + let headers = input_rdr.byte_headers()?.clone(); + let sel = rconfig.selection(&headers)?; + + let mut sort_key = String::with_capacity(20); + let mut utf8_string = String::with_capacity(20); + let mut curr_row = csv::ByteRecord::new(); + + let rowcount = idxfile.count(); + let width = rowcount.to_string().len(); + + // first pass. get the selected columns, and the record position + // then write them to a temp text file with the selected columns and the position + // separated by "". Pad the position with leading zeroes, so it will always be the same width + for row in input_rdr.byte_records() { + curr_row.clone_from(&row?); + sort_key.clear(); + for field in sel.select(&curr_row) { + if let Ok(s_utf8) = simdutf8::basic::from_utf8(field) { + sort_key.push_str(s_utf8); + } else { + utf8_string.clear(); + utf8_string.push_str(&String::from_utf8_lossy(field)); + sort_key.push_str(&utf8_string); + } + } + let idx_position = curr_row.position().unwrap(); + + sort_key.push_str(&format!("{:01$}", idx_position.line(), width)); + + writeln!(line_wtr, "{sort_key}")?; + } + line_wtr.flush()?; + + let line_rdr = io::BufReader::with_capacity( + RW_BUFFER_CAPACITY, + std::fs::File::open(linewtr_tfile.path())?, + ); + + let reverse_flag = args.flag_reverse; + let compare = |a: &String, b: &String| { + if reverse_flag { + a.cmp(b).reverse() + } else { + a.cmp(b) + } + }; + + // Now sort the temp text file + let sorted = match sorter.sort_by(line_rdr.lines(), compare) { + Ok(sorted) => sorted, + Err(e) => { + return fail!(format!("cannot do external sort: {e:?}")); + }, + }; + + let sorted_tfile = tempfile::NamedTempFile::new_in(tmp_dir)?; + let mut sorted_line_wtr = + io::BufWriter::with_capacity(RW_BUFFER_CAPACITY, sorted_tfile.as_file()); + + for item in sorted.map(Result::unwrap) { + sorted_line_wtr.write_all(format!("{item}\n").as_bytes())?; + } + sorted_line_wtr.flush()?; + // Delete the temporary file containing unsorted lines + drop(line_wtr); + linewtr_tfile.close()?; + + // now write the sorted CSV file by reading the sorted_line temp file + // and extracting the position from each line + // and then using that to seek the input file to retrieve the record + // and then write the record to the final sorted CSV + let sorted_lines = std::fs::File::open(sorted_tfile.path())?; + let sorted_line_rdr = io::BufReader::with_capacity(RW_BUFFER_CAPACITY, sorted_lines); + + let mut sorted_csv_wtr = Config::new(args.arg_output.as_ref()).writer()?; + + let position_delta: u64 = if args.flag_no_headers { + 1 } else { - MEMORY_LIMITED_BUFFER + // Write the header row if --no-headers is false + sorted_csv_wtr.write_byte_record(&headers)?; + 2 }; - log::info!("{mem_limited_buffer} bytes used for in memory mergesort buffer..."); - let mut input_reader: Box = match &args.arg_input { + // amortize allocations + let mut record_wrk = csv::ByteRecord::new(); + let mut line = String::new(); + #[allow(unused_assignments)] + let mut line_parts: Vec<&str> = Vec::with_capacity(2); + + for l in sorted_line_rdr.lines() { + line.clone_from(&l?); + line_parts = line.rsplitn(2, "").collect(); + if line_parts.len() != 2 { + return fail_clierror!("Invalid sorted line format"); + } + let position: u64 = line_parts[0] + .parse() + .map_err(|e| format!("Failed to retrieve position: {e}"))?; + + idxfile.seek(position - position_delta)?; + idxfile.read_byte_record(&mut record_wrk)?; + sorted_csv_wtr.write_byte_record(&record_wrk)?; + } + sorted_csv_wtr.flush()?; + drop(sorted_line_wtr); + sorted_tfile.close()?; + + Ok(()) +} + +fn sort_lines( + args: &Args, + sorter: &ExternalSorter, +) -> Result<(), crate::clitypes::CliError> { + let mut input_rdr: Box = match &args.arg_input { Some(input_path) => { if input_path.to_lowercase().ends_with(".sz") { return fail_incorrectusage_clierror!( @@ -95,7 +271,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { None => Box::new(io::BufReader::new(stdin().lock())), }; - let mut output_writer: Box = match &args.arg_output { + let mut output_wtr: Box = match &args.arg_output { Some(output_path) => { if output_path.to_lowercase().ends_with(".sz") { return fail_clierror!( @@ -114,45 +290,34 @@ pub fn run(argv: &[&str]) -> CliResult<()> { )), }; - let sorter: ExternalSorter = - match ExternalSorterBuilder::new() - .with_tmp_dir(path::Path::new(&tmp_dir)) - .with_buffer(MemoryLimitedBufferBuilder::new(mem_limited_buffer)) - .with_rw_buf_size(RW_BUFFER_CAPACITY) - .with_threads_number(util::njobs(args.flag_jobs)) - .build() - { - Ok(sorter) => sorter, - Err(e) => { - return fail_clierror!("cannot create external sorter: {e}"); - }, - }; - let mut header = String::new(); if !args.flag_no_headers { - input_reader.read_line(&mut header)?; + input_rdr.read_line(&mut header)?; } - let Ok(sorted) = sorter.sort(input_reader.lines()) else { - return fail!("cannot do external sort"); + let reverse_flag = args.flag_reverse; + let compare = |a: &String, b: &String| { + if reverse_flag { + a.cmp(b).reverse() + } else { + a.cmp(b) + } + }; + + let sorted = match sorter.sort_by(input_rdr.lines(), compare) { + Ok(sorted) => sorted, + Err(e) => { + return fail!(format!("cannot do external sort: {e:?}")); + }, }; if !header.is_empty() { - output_writer.write_all(format!("{}\n", header.trim_end()).as_bytes())?; + output_wtr.write_all(format!("{}\n", header.trim_end()).as_bytes())?; } for item in sorted.map(Result::unwrap) { - output_writer.write_all(format!("{item}\n").as_bytes())?; + output_wtr.write_all(format!("{item}\n").as_bytes())?; } - output_writer.flush()?; + output_wtr.flush()?; Ok(()) } - -#[test] -fn test_mem_check() { - // check to see if sysinfo return meminfo without segfaulting - let mut sys = System::new(); - sys.refresh_memory(); - let mem10percent = (sys.total_memory() * 1000) / 10; // 10 percent of total memory - assert!(mem10percent > 0); -} diff --git a/tests/test_extsort.rs b/tests/test_extsort.rs index ba33a1ece..d91bb3788 100644 --- a/tests/test_extsort.rs +++ b/tests/test_extsort.rs @@ -3,8 +3,8 @@ use newline_converter::dos2unix; use crate::workdir::Workdir; #[test] -fn extsort() { - let wrk = Workdir::new("extsort").flexible(true); +fn extsort_linemode() { + let wrk = Workdir::new("extsort_linemode").flexible(true); wrk.clear_contents().unwrap(); // copy csv file to workdir @@ -24,3 +24,32 @@ fn extsort() { assert_eq!(dos2unix(&sorted_output), dos2unix(&expected_csv)); } + +#[test] +fn extsort_csvmode() { + let wrk = Workdir::new("extsort_csvmode").flexible(true); + wrk.clear_contents().unwrap(); + + // copy csv file to workdir + let unsorted_csv = wrk.load_test_resource("adur-public-toilets.csv"); + wrk.create_from_string("adur-public-toilets.csv", &unsorted_csv); + + // set the environment variable to autoindex + std::env::set_var("QSV_AUTOINDEX_SIZE", "1"); + + let mut cmd = wrk.command("extsort"); + cmd.arg("adur-public-toilets.csv") + .args(["--select", "OpeningHours,StreetAddress,LocationText"]) + .arg("adur-public-toilets-extsort-csvmode.csv"); + wrk.output(&mut cmd); + // unset the environment variable + std::env::remove_var("QSV_AUTOINDEX_SIZE"); + + // load sorted output + let sorted_output: String = wrk.from_str(&wrk.path("adur-public-toilets-extsort-csvmode.csv")); + + let expected_csv = wrk.load_test_resource("adur-public-toilets-extsorted-csvmode.csv"); + wrk.create_from_string("adur-public-toilets-extsorted-csvmode.csv", &expected_csv); + + assert_eq!(dos2unix(&sorted_output), dos2unix(&expected_csv)); +}