Work on small archive approach for forecast data

jcoliver · Dec 1, 2018 · e672b93 · e672b93
1 parent 8cea5b5
commit e672b93
Show file tree

Hide file tree

Showing 9 changed files with 50 additions and 7 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,9 @@
 .Ruserdata
 biodiversity-sdm-lesson.Rproj
 data/wc2-5
-data/cmip5/2_5m/forecast-raster.gri
-data/cmip5/2_5m/forecast-raster.grd
+data/cmip5/2_5m/*.tif
+data/cmip5/2_5m/*.gri
+data/cmip5/2_5m/*.grd
 output/*
 !*.gitkeep
 !output/*/

diff --git a/data/cmip5/2_5m/forecast1.zip b/data/cmip5/2_5m/forecast1.zip
diff --git a/data/cmip5/2_5m/forecast2.zip b/data/cmip5/2_5m/forecast2.zip
diff --git a/data/cmip5/2_5m/forecast3.zip b/data/cmip5/2_5m/forecast3.zip
diff --git a/data/cmip5/2_5m/forecast4.zip b/data/cmip5/2_5m/forecast4.zip
diff --git a/dev/archive-forecast-data.R b/dev/archive-forecast-data.R
@@ -26,7 +26,7 @@ rm(list = ls())
 #' 6. Update functions/sdm-functions.R to appropriately load in raster 
 #'    (remember) to deal with names, i.e. 
 #'    `names(forecast-data) <- names(bioclim.data)`
-#' 7. remove rgdal
+#' 7. Cleanup by removing remove rgdal
 
 ########################################
 # LOAD DEPENDENCIES
@@ -47,10 +47,43 @@ forecast.data <- getData(name = "CMIP5", # forecast
 ########################################
 # WRITE EACH LAYER TO RASTER FORMAT FILE
 writeRaster(x = forecast.data, 
-            filename = names(forecast.data), 
+            filename = paste0("data/cmip5/2_5m/", names(forecast.data)), 
             bylayer = TRUE,
             format = "raster")
 
 ########################################
 # CREATE MULTIPLE ZIP FILES
+raster.files <- list.files(path = "data/cmip5/2_5m", 
+                           pattern = "*.gr[id]$", 
+                           full.names = TRUE)
 
+# Aiming for four archives, see how many files go in each
+num.archives <- 4
+archive.size <- ceiling(length(raster.files) / num.archives)
+# Ensure archive has even number of files (to keep .grd and .gri files together)
+if (archive.size %% 2 != 0) {
+  archive.size <- archive.size + 1
+}
+
+for (i in 1:num.archives) {
+  offset <- (i - 1) * archive.size
+  fileindexes <- c(1:archive.size) + offset
+  num.remaining.files <- length(raster.files[fileindexes[1]:length(raster.files)])
+  # Fewer files, need to adjust fileindexes
+  if (num.remaining.files < archive.size) {
+    fileindexes <- fileindexes[1:num.remaining.files]
+  }
+  cat(paste0("====  Archive ", i, "  ===="), raster.files[fileindexes], sep = "\n")
+  zip(zipfile = paste0("data/cmip5/2_5m/forecast", i),
+      files = raster.files[fileindexes])
+}
+
+
+
+########################################
+# REMOVE ALL tif, gri, AND grd FILES
+obsolete.files <- list.files(path = "data/cmip5/2_5m",
+                             pattern = "gd45bi*",
+                             full.names = TRUE)
+
+file.remove(obsolete.files)
diff --git a/functions/sdm-functions.R b/functions/sdm-functions.R
@@ -181,7 +181,13 @@ SDMForecast <- function(data, padding = 0.1) {
   bioclim.data <- crop(x = bioclim.data, y = geographic.extent)
 
   # Load forecast data
-  forecast.data <- raster::stack(x = "data/cmip5/2_5m/forecast-raster.gri")
+  forecast.files <- list.files(path = "data/cmip5/2_5m",
+                               pattern = "*.gri$",
+                               full.names = TRUE)
+
+  # forecast.data <- raster::stack(x = "data/cmip5/2_5m/forecast-raster.gri")
+  forecast.data <- raster::stack(forecast.files)
+  names(forecast.data) <- names(bioclim.data)
   forecast.data <- crop(x = forecast.data, y = geographic.extent)
 
   # Predict presence probability from model and bioclim data

diff --git a/scripts/setup.R b/scripts/setup.R
@@ -45,7 +45,11 @@ bioclim.data <- getData(name = "worldclim",
 
 # Unzip forecast data
 message("Extracting forecast climate data (this may take a moment)")
-unzip(zipfile = "data/cmip5/2_5m/forecast-data.zip")
+forecast.archives <- list.files(path = "data/cmip5/2_5m", 
+                                pattern = "*.zip$",
+                                full.names = TRUE)
+lapply(X = forecast.archives, FUN = unzip)
+# unzip(zipfile = "data/cmip5/2_5m/forecast-data.zip")
 
 # NOPE archive is too big (> 100 MB) for GitHub. But there might be a solution
 # GitHub large file storage https://git-lfs.github.com/