Skip to content

Commit

Permalink
Set encoding to UTF-8 for tags and user names
Browse files Browse the repository at this point in the history
FIX #346
  • Loading branch information
jmaspons committed Jul 24, 2024
1 parent 88ba939 commit e53a616
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 8 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: osmdata
Title: Import 'OpenStreetMap' Data as Simple Features or Spatial Objects
Version: 0.2.5.018
Version: 0.2.5.019
Authors@R: c(
person("Mark", "Padgham", , "[email protected]", role = c("aut", "cre")),
person("Bob", "Rudis", role = "aut"),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Improved `get_bb(..., format_out = "sf_polygon")` to return full metadata
along with geometries (#338 thanks to @RegularnaMatrica)
- Mention key-only feature requests in README (#342 thanks to @joostschouppe)
- Set encoding to UTF-8 for tags and user names (#347)


0.2.5
Expand Down
7 changes: 5 additions & 2 deletions R/get-osmdata-df.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ osmdata_data_frame <- function (q,
colClasses = "character", # osm_id doesn't fit in integer
check.names = FALSE,
comment.char = "",
stringsAsFactors = stringsAsFactors
stringsAsFactors = stringsAsFactors,
encoding = "UTF-8"
)
} else if (isTRUE (obj$meta$query_type == "adiff")) {
datetime_from <- obj$meta$datetime_from
Expand Down Expand Up @@ -162,7 +163,7 @@ xml_to_df <- function (doc, stringsAsFactors = FALSE) {
osm_id = rownames (res [[i]]),
center [[i]],
meta [[i]],
tags [[i]],
setenc_utf8(tags [[i]]),
stringsAsFactors = stringsAsFactors,
check.names = FALSE
)
Expand Down Expand Up @@ -225,6 +226,7 @@ xml_adiff_to_df <- function (doc,
tagV <- vapply (tag, function (x) x, FUN.VALUE = character (2))
m [i, tagV [1, ]] <- tagV [2, ]
}
Encoding(m) <- "UTF-8"

osm_type <- xml2::xml_name (osm_obj)
osm_id <- xml2::xml_attr (osm_obj, "id")
Expand Down Expand Up @@ -325,6 +327,7 @@ get_meta_from_xml <- function (osm_obj) {
osm_uid = xml2::xml_attr (osm_obj, attr = "uid"),
osm_user = xml2::xml_attr (osm_obj, attr = "user")
)
Encoding(out$osm_user) <- "UTF-8"

} else {
out <- matrix (nrow = length (osm_obj), ncol = 0)
Expand Down
6 changes: 6 additions & 0 deletions R/get-osmdata-sc.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ osmdata_sc <- function (q, doc, quiet = TRUE) {
overpass_version = temp$obj$meta$overpass_version
)

has_tags <- c ("nodes", "relation_properties", "object")
obj [has_tags] <- lapply(obj [has_tags], function (x) {
x [, c ("key", "value")] <- setenc_utf8 (x [, c ("key", "value")])
x
})

if (!missing (q)) {
if (!is.character (q)) {
obj$meta$bbox <- q$bbox
Expand Down
5 changes: 3 additions & 2 deletions R/get-osmdata-sf.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ osmdata_sf <- function (q, doc, quiet = TRUE, stringsAsFactors = FALSE) { # noli
if (!"osm_id" %in% names (res$polygons_kv)[1]) {
res <- fill_kv (res, "polygons_kv", "polygons", stringsAsFactors)
}
kv_df <- grep ("_kv$", names (res))
res[kv_df] <- fix_columns_list (res[kv_df])
kv_df <- grep ("_kv$", names (res)) # objects with tags
res [kv_df] <- fix_columns_list (res[kv_df])
res [kv_df] <- lapply (res [kv_df], setenc_utf8)

if (missing (q)) {
obj$bbox <- paste (res$bbox, collapse = " ")
Expand Down
6 changes: 5 additions & 1 deletion R/get-osmdata-sp.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ osmdata_sp <- function (q, doc, quiet = TRUE) {
obj$osm_multipolygons <- res$multipolygons

osm_items <- grep ("^osm_", names (obj))
obj[osm_items] <- fix_columns_list (obj[osm_items])
obj [osm_items] <- fix_columns_list (obj [osm_items])
obj [osm_items] <- lapply (obj [osm_items], function (x) {
x@data <- setenc_utf8 (x@data)
x
})
class (obj) <- c (class (obj), "osmdata_sp")

return (obj)
Expand Down
17 changes: 17 additions & 0 deletions R/get-osmdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,20 @@ get_center_from_cpp_output <- function (res, what = "points") {

return (as.data.frame (this))
}


#' Set encoding to UTF-8
#'
#' @param x a data.frame or a list.
#'
#' @return `x` with all the columns or items of type character with UTF-8 encoding set.
#' @noRd
setenc_utf8 <- function(x) {
char_cols <- vapply (x, is.character, FUN.VALUE = logical (1))
x [char_cols] <- lapply (x [char_cols], function (y) {
Encoding (y) <- "UTF-8"
y
})

return(x)
}
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
"codeRepository": "https://github.com/ropensci/osmdata/",
"issueTracker": "https://github.com/ropensci/osmdata/issues",
"license": "https://spdx.org/licenses/GPL-3.0",
"version": "0.2.5.018",
"version": "0.2.5.19",
"programmingLanguage": {
"@type": "ComputerLanguage",
"name": "R",
"url": "https://r-project.org"
},
"runtimePlatform": "R version 4.3.1 (2023-06-16)",
"runtimePlatform": "R version 4.4.1 (2024-06-14)",
"provider": {
"@id": "https://cran.r-project.org",
"@type": "Organization",
Expand Down

0 comments on commit e53a616

Please sign in to comment.