Skip to content

Commit

Permalink
Merge pull request #1429 from jqnatividad/1405-geocode-compressed_ind…
Browse files Browse the repository at this point in the history
…ex_files

`geocode`: automatically decompressed snappy compressed index files
  • Loading branch information
jqnatividad authored Nov 20, 2023
2 parents 57b8797 + 6e4d56a commit fa4d023
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ As we prepare for version 1.0, we're focusing on performance, stability and reli
* build(deps): bump itertools from 0.11.0 to 0.12.0 by @dependabot in https://github.com/jqnatividad/qsv/pull/1418
* build(deps): bump rust_decimal from 1.33.0 to 1.33.1 by @dependabot in https://github.com/jqnatividad/qsv/pull/1420
* build(deps): bump polars from 0.35.2 to 0.35.4 by @dependabot in https://github.com/jqnatividad/qsv/pull/1425
* build(deps): bump uuid from 1.5.0 to 1.6.0 by @dependabot in https://github.com/jqnatividad/qsv/pull/1428
* bump MSRV to 1.74.0
* apply select clippy suggestions
* update several indirect dependencies
Expand Down
23 changes: 21 additions & 2 deletions src/cmd/geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ use regex::Regex;
use serde::Deserialize;
use serde_json::json;
use simple_home_dir::expand_tilde;
use tempfile::tempdir;
use url::Url;
use uuid::Uuid;

Expand Down Expand Up @@ -1336,7 +1337,7 @@ async fn load_engine(geocode_index_file: PathBuf, progressbar: &ProgressBar) ->
geocode_index_file.clone(),
!progressbar.is_hidden(),
None,
None,
Some(60),
None,
)
.await?;
Expand All @@ -1357,11 +1358,29 @@ async fn load_engine(geocode_index_file: PathBuf, progressbar: &ProgressBar) ->
geocode_index_file.clone(),
!progressbar.is_hidden(),
None,
None,
Some(60),
None,
)
.await?;
}

// check if the geocode_index_file is snappy compressed
// if it is, decompress it
let geocode_index_file = if geocode_index_file.extension().unwrap() == "sz" {
let decompresssed_geocode_index_file = geocode_index_file.with_extension("bincode");
progressbar.println(format!(
"Decompressing {} to {}",
geocode_index_file.display(),
decompresssed_geocode_index_file.display()
));
let tmpdir = tempdir()?;
let decompressed_tmpfile = util::decompress_snappy_file(&geocode_index_file, &tmpdir)?;
fs::copy(decompressed_tmpfile, &decompresssed_geocode_index_file)?;
decompresssed_geocode_index_file
} else {
geocode_index_file
};

let storage = storage::bincode::Storage::new();

let engine = storage
Expand Down
6 changes: 4 additions & 2 deletions src/cmd/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,15 +890,17 @@ fn init_date_inference(
log::info!("inferring dates for ALL fields with DMY preference: {dmy_preferred}");
vec![true; headers.len()]
} else {
let mut header_str = String::new();
let mut date_found = false;
let whitelist = whitelist_lower
.split(',')
.map(str::trim)
.collect::<Vec<_>>();
headers
.iter()
.map(|header| {
let header_str = from_bytes::<String>(header).unwrap().to_lowercase();
let date_found = whitelist
util::to_lowercase_into(&from_bytes::<String>(header).unwrap(), &mut header_str);
date_found = whitelist
.iter()
.any(|whitelist_item| header_str.contains(whitelist_item));
if date_found {
Expand Down

0 comments on commit fa4d023

Please sign in to comment.