Skip to content

Commit

Permalink
Work on small archive approach for forecast data
Browse files Browse the repository at this point in the history
  • Loading branch information
jcoliver committed Dec 1, 2018
1 parent 8cea5b5 commit e672b93
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 7 deletions.
1 change: 0 additions & 1 deletion .gitattributes

This file was deleted.

5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
.Ruserdata
biodiversity-sdm-lesson.Rproj
data/wc2-5
data/cmip5/2_5m/forecast-raster.gri
data/cmip5/2_5m/forecast-raster.grd
data/cmip5/2_5m/*.tif
data/cmip5/2_5m/*.gri
data/cmip5/2_5m/*.grd
output/*
!*.gitkeep
!output/*/
Expand Down
Binary file added data/cmip5/2_5m/forecast1.zip
Binary file not shown.
Binary file added data/cmip5/2_5m/forecast2.zip
Binary file not shown.
Binary file added data/cmip5/2_5m/forecast3.zip
Binary file not shown.
Binary file added data/cmip5/2_5m/forecast4.zip
Binary file not shown.
37 changes: 35 additions & 2 deletions dev/archive-forecast-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rm(list = ls())
#' 6. Update functions/sdm-functions.R to appropriately load in raster
#' (remember) to deal with names, i.e.
#' `names(forecast-data) <- names(bioclim.data)`
#' 7. remove rgdal
#' 7. Cleanup by removing remove rgdal

########################################
# LOAD DEPENDENCIES
Expand All @@ -47,10 +47,43 @@ forecast.data <- getData(name = "CMIP5", # forecast
########################################
# WRITE EACH LAYER TO RASTER FORMAT FILE
writeRaster(x = forecast.data,
filename = names(forecast.data),
filename = paste0("data/cmip5/2_5m/", names(forecast.data)),
bylayer = TRUE,
format = "raster")

########################################
# CREATE MULTIPLE ZIP FILES
raster.files <- list.files(path = "data/cmip5/2_5m",
pattern = "*.gr[id]$",
full.names = TRUE)

# Aiming for four archives, see how many files go in each
num.archives <- 4
archive.size <- ceiling(length(raster.files) / num.archives)
# Ensure archive has even number of files (to keep .grd and .gri files together)
if (archive.size %% 2 != 0) {
archive.size <- archive.size + 1
}

for (i in 1:num.archives) {
offset <- (i - 1) * archive.size
fileindexes <- c(1:archive.size) + offset
num.remaining.files <- length(raster.files[fileindexes[1]:length(raster.files)])
# Fewer files, need to adjust fileindexes
if (num.remaining.files < archive.size) {
fileindexes <- fileindexes[1:num.remaining.files]
}
cat(paste0("==== Archive ", i, " ===="), raster.files[fileindexes], sep = "\n")
zip(zipfile = paste0("data/cmip5/2_5m/forecast", i),
files = raster.files[fileindexes])
}



########################################
# REMOVE ALL tif, gri, AND grd FILES
obsolete.files <- list.files(path = "data/cmip5/2_5m",
pattern = "gd45bi*",
full.names = TRUE)

file.remove(obsolete.files)
8 changes: 7 additions & 1 deletion functions/sdm-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,13 @@ SDMForecast <- function(data, padding = 0.1) {
bioclim.data <- crop(x = bioclim.data, y = geographic.extent)

# Load forecast data
forecast.data <- raster::stack(x = "data/cmip5/2_5m/forecast-raster.gri")
forecast.files <- list.files(path = "data/cmip5/2_5m",
pattern = "*.gri$",
full.names = TRUE)

# forecast.data <- raster::stack(x = "data/cmip5/2_5m/forecast-raster.gri")
forecast.data <- raster::stack(forecast.files)
names(forecast.data) <- names(bioclim.data)
forecast.data <- crop(x = forecast.data, y = geographic.extent)

# Predict presence probability from model and bioclim data
Expand Down
6 changes: 5 additions & 1 deletion scripts/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ bioclim.data <- getData(name = "worldclim",

# Unzip forecast data
message("Extracting forecast climate data (this may take a moment)")
unzip(zipfile = "data/cmip5/2_5m/forecast-data.zip")
forecast.archives <- list.files(path = "data/cmip5/2_5m",
pattern = "*.zip$",
full.names = TRUE)
lapply(X = forecast.archives, FUN = unzip)
# unzip(zipfile = "data/cmip5/2_5m/forecast-data.zip")

# NOPE archive is too big (> 100 MB) for GitHub. But there might be a solution
# GitHub large file storage https://git-lfs.github.com/
Expand Down

0 comments on commit e672b93

Please sign in to comment.