Skip to content

Commit

Permalink
Merge pull request #1275 from jqnatividad/geocode_suggest_country_filter
Browse files Browse the repository at this point in the history
`geocode`: add  suggest `--country` filter
  • Loading branch information
jqnatividad authored Sep 2, 2023
2 parents e391582 + 248f51f commit 6222752
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 13 deletions.
10 changes: 6 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 2 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,8 @@ flexi_logger = { version = "0.26", features = [
], default-features = false }
futures = "0.3"
futures-util = "0.3"
geosuggest-core = { version = "0.3", optional = true }
geosuggest-utils = { version = "0.3", optional = true }
# geosuggest-core = { path = "../geosuggest/geosuggest-core", optional = true}
# geosuggest-utils = { path = "../geosuggest/geosuggest-utils", optional = true}
geosuggest-core = { version = "0.4", optional = true }
geosuggest-utils = { version = "0.4", optional = true }
governor = { version = "0.6", optional = true }
grex = { version = "1.4", default-features = false }
gzp = { version = "0.11", default-features = false, features = [
Expand Down Expand Up @@ -224,8 +222,6 @@ rusqlite = { version = "0.29", features = ["bundled"] }
serial_test = { version = "2.0", features = ["file_locks"] }

[patch.crates-io]
geosuggest-core = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
geosuggest-utils = { git = "https://github.com/estin/geosuggest", rev = "5c6b08b" }
calamine = { git = "https://github.com/jqnatividad/calamine", branch = "formula_empty_string_value" }

[features]
Expand Down
35 changes: 32 additions & 3 deletions src/cmd/geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ Geocode file.csv city column and set the geocoded value to a new column named la
$ qsv geocode suggest city --new-column lat_long file.csv
Limit suggestions to the US, Canada and Mexico.
$ qsv geocode suggest city --country us,ca,mx file.csv
Geocode file.csv city column with --formatstr=%state and set the
geocoded value a new column named state.
$ qsv geocode suggest city --formatstr %state --new-column state file.csv
Use dynamic formatting to create a custom format.
$ qsv geocode suggest city --formatstr "{name}, {admin1}, {country} in {timezone}" file.csv
$ qsv geocode suggest city -f "{name}, {admin1}, {country} in {timezone}" file.csv
REVERSE
Reverse geocode a WGS 84 coordinate to the nearest Geonames city record.
Expand Down Expand Up @@ -109,6 +113,10 @@ geocode options:
Larger values will favor more populated cities.
If not set (default), the population is not used and the
nearest city is returned.
--country <country_list> The comma-delimited list of countries to filter for when calling suggest.
Country is specified as a ISO 3166-1 alpha-2 (two-letter) country code.
https://en.wikipedia.org/wiki/ISO_3166-2
If not set, suggest will search all countries in the current loaded index.
-f, --formatstr=<string> The place format to use. The predefined formats are:
- '%city-state' - e.g. Brooklyn, New York
- '%city-country' - Brooklyn, US
Expand Down Expand Up @@ -155,6 +163,7 @@ geocode options:
INDEX-UPDATE only options:
--languages <lang> The languages to use when building the Geonames cities index.
The languages are specified as a comma-separated list of ISO 639-1 codes.
https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
[default: en]
--force Force update the Geonames cities index. If not set, qsv will check if there
are updates available at Geonames.org before updating the index.
Expand Down Expand Up @@ -209,6 +218,7 @@ struct Args {
flag_rename: Option<String>,
flag_min_score: Option<f32>,
flag_k_weight: Option<f32>,
flag_country: Option<String>,
flag_formatstr: String,
flag_invalid_result: Option<String>,
flag_batch: u32,
Expand Down Expand Up @@ -496,6 +506,22 @@ async fn geocode_main(args: Args) -> CliResult<()> {
}
wtr.write_record(&headers)?;

let country_filter_list = if let Some(country_list) = args.flag_country {
if args.cmd_reverse {
return fail_incorrectusage_clierror!(
"Country filter is not supported for reverse geocoding."
);
}
Some(
country_list
.split(',')
.map(|s| s.trim().to_string())
.collect::<Vec<String>>(),
)
} else {
None
};

// amortize memory allocation by reusing record
#[allow(unused_assignments)]
let mut batch_record = csv::StringRecord::new();
Expand Down Expand Up @@ -552,6 +578,7 @@ async fn geocode_main(args: Args) -> CliResult<()> {
&args.flag_formatstr,
min_score,
k_weight,
&country_filter_list,
);
if let Some(geocoded_result) = search_result {
// we have a valid geocode result, so use that
Expand Down Expand Up @@ -659,9 +686,10 @@ fn search_cached(
formatstr: &str,
min_score: Option<f32>,
k: Option<f32>,
country_filter_list: &Option<Vec<String>>,
) -> Option<String> {
if mode == GeocodeSubCmd::Suggest {
let search_result = engine.suggest(cell, 1, min_score);
let search_result = engine.suggest(cell, 1, min_score, country_filter_list.as_deref());
let Some(cityrecord) = search_result.into_iter().next() else {
return None;
};
Expand All @@ -687,7 +715,8 @@ fn search_cached(
let lat = fast_float::parse(&loccaps[1]).unwrap_or_default();
let long = fast_float::parse(&loccaps[2]).unwrap_or_default();
if (-90.0..=90.0).contains(&lat) && (-180.0..=180.0).contains(&long) {
let search_result = engine.reverse((lat, long), 1, k);
let search_result =
engine.reverse((lat, long), 1, k, country_filter_list.as_deref());
let Some(cityrecord) = (match search_result {
Some(search_result) => search_result.into_iter().next().map(|ri| ri.city),
None => return None,
Expand Down
122 changes: 122 additions & 0 deletions tests/test_geocode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,128 @@ fn geocode_suggest() {
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl() {
let wrk = Workdir::new("geocode_suggest_intl");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Île-de-France Region France"],
svec!["Manila, National Capital Region Philippines"],
svec!["London, England United Kingdom"],
svec!["Berlin, Germany"],
svec!["Moscow, Moscow Russia"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brasília, Federal District Brazil"],
svec!["95.213424, 190,1234565"],
svec!["Havana, La Habana Province Cuba"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl_country_filter() {
let wrk = Workdir::new("geocode_suggest_intl_country_filter");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["--country", "us"])
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Texas United States"],
svec!["Manteca, California United States"],
svec!["Sterling, Virginia United States"],
svec!["Burlington, North Carolina United States"],
svec!["Moscow, Idaho United States"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Bradley, Illinois United States"],
svec!["95.213424, 190,1234565"],
svec!["Savannah, Georgia United States"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_intl_multi_country_filter() {
let wrk = Workdir::new("geocode_suggest_intl_multi_country_filter");
wrk.create(
"data.csv",
vec![
svec!["Location"],
svec!["Paris"],
svec!["Manila"],
svec!["London"],
svec!["Berlin"],
svec!["Moscow"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Brazil"],
svec!["95.213424, 190,1234565"], // invalid lat, long
svec!["Havana"],
],
);
let mut cmd = wrk.command("geocode");
cmd.arg("suggest")
.arg("Location")
.args(["--country", "us,fr,ru"])
.args(["-f", "%city-admin1-country"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["Location"],
svec!["Paris, Île-de-France Region France"],
svec!["Manteca, California United States"],
svec!["Sterling, Virginia United States"],
svec!["Burlington, North Carolina United States"],
svec!["Moscow, Moscow Russia"],
svec!["This is not a Location and it will not be geocoded"],
svec!["Bradley, Illinois United States"],
svec!["95.213424, 190,1234565"],
svec!["Savannah, Georgia United States"],
];
assert_eq!(got, expected);
}

#[test]
fn geocode_suggest_invalid() {
let wrk = Workdir::new("geocode_suggest_invalid");
Expand Down

0 comments on commit 6222752

Please sign in to comment.