Skip to content

Commit

Permalink
Create export/import functions for GW/SW data
Browse files Browse the repository at this point in the history
sets scraped from Wasserportal.

to do: clean vignettes in order to be in line with new functions (and also use new import functions!)
  • Loading branch information
mrustl committed Sep 8, 2022
1 parent 1d26eb5 commit 1cce450
Show file tree
Hide file tree
Showing 13 changed files with 355 additions and 17 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Imports:
archive,
data.table,
dplyr,
fs,
httr,
kwb.datetime,
kwb.utils,
Expand All @@ -41,6 +42,7 @@ Imports:
stringr,
tibble,
tidyr,
withr,
xml2
Suggests:
covr,
Expand Down
14 changes: 13 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(base_url_download)
export(columns_to_labels)
export(get_daily_surfacewater_data)
export(get_groundwater_data)
Expand All @@ -15,13 +16,18 @@ export(get_wasserportal_stations)
export(get_wasserportal_stations_table)
export(get_wasserportal_variables)
export(list_data_to_csv_or_zip)
export(list_masters_data_to_csv)
export(list_timeseries_data_to_zip)
export(read)
export(readPackageFile)
export(read_wasserportal)
export(read_wasserportal_raw)
export(read_wasserportal_raw_gw)
export(wasserportal_base_url)
import(kwb.utils)
export(wp_masters_data_to_list)
export(wp_timeseries_data_to_list)
importFrom(archive,archive_extract)
importFrom(archive,archive_write)
importFrom(data.table,rbindlist)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
Expand All @@ -32,18 +38,21 @@ importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,select_if)
importFrom(fs,dir_create)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(kwb.datetime,textToEuropeBerlinPosix)
importFrom(kwb.utils,catAndRun)
importFrom(kwb.utils,readPackageFile)
importFrom(kwb.utils,selectColumns)
importFrom(kwb.utils,selectElements)
importFrom(kwb.utils,stopFormatted)
importFrom(kwb.utils,substSpecialChars)
importFrom(magrittr,"%>%")
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(readr,read_csv)
importFrom(readr,write_csv)
importFrom(rlang,.data)
importFrom(rvest,html_attr)
Expand All @@ -56,9 +65,12 @@ importFrom(stringr,str_extract)
importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split_fixed)
importFrom(tibble,tibble)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(utils,download.file)
importFrom(utils,read.table)
importFrom(withr,with_dir)
importFrom(xml2,read_html)
10 changes: 9 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# [wasserportal 0.2.0](https://github.com/KWB-R/wasserportal/releases/tag/v0.2.0) <small>2022-09-07</small>
# [wasserportal 0.2.0](https://github.com/KWB-R/wasserportal/releases/tag/v0.2.0) <small>2022-09-08</small>

* Add functions for exporting time series data to `zip` files (`wp_masters_data_to_list()`)
and master data to `csv` files (`wp_timeseries_data_to_list()`), which will be
uploaded to [https://kwb-r.github.io/wasserportal](https://kwb-r.github.io/wasserportal)/`<filename>`

* In addition `import` functions for downloading and importing the datasets above
into R as lists were added (`list_timeseries_data_to_zip()`, `list_masters_data_to_csv()`)

* Code cleaning by `@hsonne` started

* Fix `master data` requests by using the `master_url` instead of `station_id`,
as the latter was not unique. Now functions `get_wasserportal_master_data()` and
Expand Down
56 changes: 54 additions & 2 deletions R/list_data_to_csv_or_zip.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
#' @param to_zip convert to zip file (default: FALSE)
#' @return loops through list of data frames and uses list names as filenames
#' @export
#' @importFrom archive archive_write
#' @importFrom readr write_csv
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace str_replace_all
list_data_to_csv_or_zip <- function(
data_list,
file_prefix = "",
Expand All @@ -18,11 +19,15 @@ list_data_to_csv_or_zip <- function(
filename_base <- paste0(
file_prefix,
name %>%
stringr::str_replace("_", "-") %>%
stringr::str_replace_all("_", "-") %>%
stringr::str_replace("\\.", "_"),
collapse = "_"
)

if(startsWith(filename_base, "surface")) {
filename_base <- paste0("daily_", filename_base)
}

filename_csv <- paste0(filename_base, ".csv")
filename_zip <- paste0(filename_base, ".zip")

Expand All @@ -47,3 +52,50 @@ list_data_to_csv_or_zip <- function(

unlist(tmp)
}


#' Helper function: list timeseries data to zip
#'
#' @param timeseries_data_list time series data in list form as retrieved by
#' \code{\link{get_groundwater_data}} or \code{\link{get_dailygroundwater_data}}
#' @return loops through list of data frames and uses list names as filenames
#' @export
#' @importFrom readr write_csv
#' @importFrom stringr str_replace
#' @examples
#' \dontrun{
#' stations <- wasserportal::get_stations()
#' # Groundwater Time Series
#' gw_tsdata_list <- wasserportal::get_groundwater_data(stations)
#' gw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(gw_tsdata_list)
#' # Surface Water Time Series
#' sw_tsdata_list <- wasserportal::get_daily_surfacewater_data(stations)
#' sw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(sw_tsdata_list)
#' }
list_timeseries_data_to_zip <- function(timeseries_data_list) {
list_data_to_csv_or_zip(timeseries_data_list,
file_prefix = "",
to_zip = TRUE)

}

#' Helper function: list masters data to csv
#'
#' @param masters_data_list masters data in list form as retrieved by
#' \code{\link{get_stations}} sublist element "overview_list"
#' @return loops through list of data frames and uses list names as filenames
#' @export
#' @importFrom readr write_csv
#' @importFrom stringr str_replace
#' @examples
#' \dontrun{
#' stations <- wasserportal::get_stations()
#' masters_data_csv_files <- wasserportal:list_masters_data_to_csv(stations$overview_list)
#' masters_data_csv_files
#' }
list_masters_data_to_csv <- function(masters_data_list) {
list_data_to_csv_or_zip(masters_data_list,
file_prefix = "stations_",
to_zip = FALSE)

}
4 changes: 3 additions & 1 deletion R/read_wasserportal_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ get_station_variables <- function(station_df)
#' i.e. `get_stations()$crosstable`
#' @return ????
#' @export
#' @import kwb.utils
#' @importFrom kwb.utils catAndRun selectColumns selectElements
#' @importFrom kwb.datetime textToEuropeBerlinPosix
#' @importFrom httr content POST
read_wasserportal_raw <- function(
variable,
station,
Expand Down Expand Up @@ -136,6 +137,7 @@ read_wasserportal_raw <- function(
}

# clean_timestamp_columns ------------------------------------------------------

clean_timestamp_columns <- function(data, include_raw_time)
{
raw_timestamps <- kwb.utils::selectColumns(data, "Datum")
Expand Down
66 changes: 66 additions & 0 deletions R/wp_masters_data_to_list.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' Wasserportal Master Data: download and Import in R List
#'
#' @param overview_list names of "overview_list" as retrieved by
#' \code{\link{get_stations}}
#' @param target_dir target directory for downloading data (default:
#' tempdir())
#' @param is_zipped are the data to be downloaded zipped (default:
#' FALSE)
#'
#' @return downloads csv master data from Wasserportal
#' @export
#' @importFrom archive archive_extract
#' @importFrom fs dir_create
#' @importFrom utils download.file
#' @importFrom readr read_csv
#' @importFrom stringr str_replace str_replace_all
#' @importFrom withr with_dir
#' @examples
#' \dontrun{
#' stations <- wasserportal::get_stations()
#' overview_list_names <- names(stations$overview_list)
#' wp_masters_data_list <- wp_masters_data_to_list(overview_list_names)
#' }
wp_masters_data_to_list <- function(overview_list_names,
target_dir = tempdir(),
file_prefix = "stations_",
is_zipped = FALSE) {

fs::dir_create(target_dir, recurse = TRUE)


filenames_base <- overview_list_names %>%
stringr::str_replace_all("_", "-") %>%
stringr::str_replace("\\.", "_")

filenames_base <- sprintf("%s%s",
file_prefix,
filenames_base)

filenames_csv <- paste0(filenames_base, ".csv")
filenames_zip <- paste0(filenames_base, ".zip")


stats::setNames(lapply(seq_len(length(filenames_base)), function(i) {

url <- sprintf("%s/%s",
base_url_download(),
ifelse(is_zipped,
filenames_zip[i],
filenames_csv[i]))

if(is_zipped) {

withr::with_dir(new = target_dir,
code = {
archive::archive_extract(url) %>%
readr::read_csv()
})
} else {
target_path <- file.path(target_dir, filenames_csv[i])
try(utils::download.file(url = url, destfile = target_path))
readr::read_csv(file = target_path)
}
}), nm = filenames_base
)
}
76 changes: 76 additions & 0 deletions R/wp_time-series_data_to_list.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#' Helper function: base url for download
#' @keywords internal
#' @noMd
#' @noRd
#' @return base url for download of csv/zip files prepared by R package
#' @export
#'
base_url_download <- function() {
"https://kwb-r.github.io/wasserportal"
}

#' Wasserportal Time Series Data: download and Import in R List
#'
#' @param overview_list names of "overview_list" as retrieved by
#' \code{\link{get_stations}}
#' @param target_dir target directory for downloading data (default:
#' tempdir())
#' @param is_zipped are the data to be downloaded zipped (default:
#' TRUE)
#'
#' @return downloads (zipped) data from wasserportal
#' @export
#' @importFrom archive archive_extract
#' @importFrom fs dir_create
#' @importFrom utils download.file
#' @importFrom readr read_csv
#' @importFrom stringr str_replace str_replace_all
#' @importFrom withr with_dir
#' @examples
#' \dontrun{
#' stations <- wasserportal::get_stations()
#' overview_list_names <- names(stations$overview_list)
#' wp_timeseries_data_list <- wp_timeseries_data_to_list(overview_list_names)
#' }
wp_timeseries_data_to_list <- function(overview_list_names,
target_dir = tempdir(),
is_zipped = TRUE) {

fs::dir_create(target_dir, recurse = TRUE)


filenames_base <- overview_list_names %>%
stringr::str_replace_all("_", "-") %>%
stringr::str_replace("\\.", "_")

filenames_base <- stringr::str_replace_all(filenames_base,
"^surface",
"daily_surface")

filenames_csv <- paste0(filenames_base, ".csv")
filenames_zip <- paste0(filenames_base, ".zip")


stats::setNames(lapply(seq_len(length(filenames_base)), function(i) {

url <- sprintf("%s/%s",
base_url_download(),
ifelse(is_zipped,
filenames_zip[i],
filenames_csv[i]))

if(is_zipped) {

withr::with_dir(new = target_dir,
code = {
archive::archive_extract(url) %>%
readr::read_csv()
})
} else {
target_path <- file.path(target_dir, filenames_csv[i])
try(utils::download.file(url = url, destfile = target_path))
readr::read_csv(file = target_path)
}
}), nm = filenames_base
)
}
25 changes: 25 additions & 0 deletions man/list_masters_data_to_csv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/list_timeseries_data_to_zip.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1cce450

Please sign in to comment.