From c46035d3ae24e52a7daeb143e4f86c4fca34fc5d Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:56:35 -0500 Subject: [PATCH 1/3] `stats`: amortize allocs in init_date_inference() fn & use non-allocating to_lowercase_into --- src/cmd/stats.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cmd/stats.rs b/src/cmd/stats.rs index a33d0b638..875960e63 100644 --- a/src/cmd/stats.rs +++ b/src/cmd/stats.rs @@ -890,6 +890,8 @@ fn init_date_inference( log::info!("inferring dates for ALL fields with DMY preference: {dmy_preferred}"); vec![true; headers.len()] } else { + let mut header_str = String::new(); + let mut date_found = false; let whitelist = whitelist_lower .split(',') .map(str::trim) @@ -897,8 +899,8 @@ fn init_date_inference( headers .iter() .map(|header| { - let header_str = from_bytes::(header).unwrap().to_lowercase(); - let date_found = whitelist + util::to_lowercase_into(&from_bytes::(header).unwrap(), &mut header_str); + date_found = whitelist .iter() .any(|whitelist_item| header_str.contains(whitelist_item)); if date_found { From 6cd4ae50deb589260baf4f74f55cac5e63c61ae2 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:02:22 -0500 Subject: [PATCH 2/3] `changelog`: bump uuid from 1.5.0 to 1.6.0 [skip ci] --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdede5a1c..0a311b94a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ As we prepare for version 1.0, we're focusing on performance, stability and reli * build(deps): bump itertools from 0.11.0 to 0.12.0 by @dependabot in https://github.com/jqnatividad/qsv/pull/1418 * build(deps): bump rust_decimal from 1.33.0 to 1.33.1 by @dependabot in https://github.com/jqnatividad/qsv/pull/1420 * build(deps): bump polars from 0.35.2 to 0.35.4 by @dependabot in https://github.com/jqnatividad/qsv/pull/1425 +* build(deps): bump uuid from 1.5.0 to 1.6.0 by @dependabot in https://github.com/jqnatividad/qsv/pull/1428 * bump MSRV to 1.74.0 * apply select clippy suggestions * update several indirect dependencies From 6e4d56aa2ff3bc1410743b18f42548613849046e Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:53:33 -0500 Subject: [PATCH 3/3] `geocode`: if index files are snappy compressed, decompress them first - also add timeout of 60 secs, instead of default 30 secs --- src/cmd/geocode.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/cmd/geocode.rs b/src/cmd/geocode.rs index 6f962bb0c..e0295900b 100644 --- a/src/cmd/geocode.rs +++ b/src/cmd/geocode.rs @@ -384,6 +384,7 @@ use regex::Regex; use serde::Deserialize; use serde_json::json; use simple_home_dir::expand_tilde; +use tempfile::tempdir; use url::Url; use uuid::Uuid; @@ -1336,7 +1337,7 @@ async fn load_engine(geocode_index_file: PathBuf, progressbar: &ProgressBar) -> geocode_index_file.clone(), !progressbar.is_hidden(), None, - None, + Some(60), None, ) .await?; @@ -1357,11 +1358,29 @@ async fn load_engine(geocode_index_file: PathBuf, progressbar: &ProgressBar) -> geocode_index_file.clone(), !progressbar.is_hidden(), None, - None, + Some(60), None, ) .await?; } + + // check if the geocode_index_file is snappy compressed + // if it is, decompress it + let geocode_index_file = if geocode_index_file.extension().unwrap() == "sz" { + let decompresssed_geocode_index_file = geocode_index_file.with_extension("bincode"); + progressbar.println(format!( + "Decompressing {} to {}", + geocode_index_file.display(), + decompresssed_geocode_index_file.display() + )); + let tmpdir = tempdir()?; + let decompressed_tmpfile = util::decompress_snappy_file(&geocode_index_file, &tmpdir)?; + fs::copy(decompressed_tmpfile, &decompresssed_geocode_index_file)?; + decompresssed_geocode_index_file + } else { + geocode_index_file + }; + let storage = storage::bincode::Storage::new(); let engine = storage