diff --git a/src/cmd/joinp.rs b/src/cmd/joinp.rs index 9adffd7c9..91c431c70 100644 --- a/src/cmd/joinp.rs +++ b/src/cmd/joinp.rs @@ -590,7 +590,18 @@ impl Args { if input2_path.extension().and_then(std::ffi::OsStr::to_str) == Some("sz") { let decompressed_path = util::decompress_snappy_file(&input2_path.to_path_buf(), tmpdir)?; - self.arg_input2 = decompressed_path; + let decomp_path = if decompressed_path.ends_with("__qsv_temp_decompressed") { + // use a regular expression to extract the original file name + // the original file name is between "qsv__" and "__qsv_temp_decompressed" + let re = + regex::Regex::new(r"qsv__(?P.*)__qsv_temp_decompressed").unwrap(); + let caps = re.captures(&decompressed_path).unwrap(); + let filename = caps.name("filename").unwrap().as_str(); + filename.to_string() + } else { + decompressed_path.clone() + }; + self.arg_input2 = decomp_path; } LazyCsvReader::new(&self.arg_input2) @@ -635,22 +646,30 @@ impl Args { } } -/// if the file has a TSV or TAB extension, we automatically use tab as the delimiter +/// if the file has a TSV/TAB or SSV extension, we automatically use +/// tab or semicolon as the delimiter /// otherwise, we use the delimiter specified by the user pub fn tsvssv_delim>(file: P, orig_delim: u8) -> u8 { let inputfile_extension = file .as_ref() .extension() .and_then(std::ffi::OsStr::to_str) - .unwrap_or_default(); - - if inputfile_extension.eq_ignore_ascii_case("tsv") - || inputfile_extension.eq_ignore_ascii_case("tab") - { - b'\t' - } else if inputfile_extension.eq_ignore_ascii_case("ssv") { - b';' - } else { - orig_delim + .unwrap_or_default() + .to_ascii_lowercase(); + + match inputfile_extension.as_str() { + "tsv" | "tab" => b'\t', + "ssv" => b';', + _ => orig_delim, } + + // if inputfile_extension.eq_ignore_ascii_case("tsv") + // || inputfile_extension.eq_ignore_ascii_case("tab") + // { + // b'\t' + // } else if inputfile_extension.eq_ignore_ascii_case("ssv") { + // b';' + // } else { + // orig_delim + // } } diff --git a/tests/test_joinp.rs b/tests/test_joinp.rs index e96ae8184..738c4f8d8 100644 --- a/tests/test_joinp.rs +++ b/tests/test_joinp.rs @@ -54,7 +54,7 @@ macro_rules! joinp_test_comments { let wrk = setup(stringify!($name2)); let mut cmd = wrk.command("joinp"); cmd.env("QSV_COMMENT_CHAR", "#"); - cmd.args(&["city", "cities_comments.csv", "city", "places.csv"]); + cmd.args(&["city", "cities_comments.csv", "city", "places.ssv"]); $fun(wrk, cmd); } } @@ -74,7 +74,7 @@ macro_rules! joinp_test_compressed { fn headers() { let wrk = setup(stringify!($name3)); let mut cmd = wrk.command("joinp"); - cmd.args(&["city", "cities.csv.sz", "city", "places.csv.sz"]); + cmd.args(&["city", "cities.csv.sz", "city", "places.ssv.sz"]); $fun(wrk, cmd); } } @@ -131,6 +131,13 @@ fn setup(name: &str) -> Workdir { .args(["--output", &out_file2]); wrk.assert_success(&mut cmd2); + let out_file3 = wrk.path("places.ssv.sz").to_string_lossy().to_string(); + let mut cmd3 = wrk.command("snappy"); + cmd3.arg("compress") + .arg("places.ssv") + .args(["--output", &out_file3]); + wrk.assert_success(&mut cmd3); + wrk }