From 25fd5b6254cb55d9d983bb80108b7e78b91d14ce Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:43:03 +0530
Subject: [PATCH 01/15] Update downscale_function.R

Add L2 regularization to CNN model in SDA_downscale function

- Introduced L2 kernel regularization to convolutional and dense layers
- Set L2 factor to 0.01 for all regularized layers
- Aim to reduce overfitting and improve model generalization
- Updated model architecture in the CNN branch of SDA_downscale function
---
 modules/assim.sequential/R/downscale_function.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 27645f9dd7..99677466ec 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -145,11 +145,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
     
     for (i in seq_along(carbon_data)) {
+      # L2 regularization factor
+      l2_factor <- 0.01
       # Define the CNN model architecture
       # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
       model <- keras3::keras_model_sequential() |>
         # 1D Convolutional layer: Extracts local features from input data
-        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
+        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
         # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
         keras3::layer_batch_normalization() |>
         # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
@@ -157,13 +159,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         # Flatten: Converts 3D output to 1D for dense layer input
         keras3::layer_flatten() |>
         # Dense layer: Learns complex combinations of features
-        keras3::layer_dense(units = 64, activation = 'relu') |>
+        keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
         # Second batch normalization: Further stabilizes learning in deeper layers
         keras3::layer_batch_normalization() |>
         # Second dropout: Additional regularization to prevent overfitting in final layers
         keras3::layer_dropout(rate = 0.3) |>
         # Output layer: Single neuron for regression prediction
-        keras3::layer_dense(units = 1)
+        keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
       
       # Learning rate scheduler
       lr_schedule <- keras3::learning_rate_schedule_exponential_decay(

From d4696e6083ed5167609e8a238c5d75faabdec562 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:52:40 +0530
Subject: [PATCH 02/15] Define k-folds and create cross-validation indices

- Set k_folds variable to 5 for k-fold cross-validation
- Use caret::createFolds to generate fold indices for training data
---
 modules/assim.sequential/R/downscale_function.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 99677466ec..fede48920a 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,9 +140,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
+    # Define k_folds within the function
+    k_folds <- 5
+    
     # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+
+    # Create k-fold indices for cross-validation (only on training data)
+    fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
     
     for (i in seq_along(carbon_data)) {
       # L2 regularization factor

From 6a34666c849f70b22e9f2d5623c542c19ee02679 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:59:04 +0530
Subject: [PATCH 03/15] Implement cross-validation loop for ensemble modeling

- Initialize cv_results list to store results
- Create loop to process each fold
- Split data into training and validation sets for each fold
- Print progress message for each fold
---
 modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index fede48920a..311a90794b 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -151,6 +151,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
     fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
     
     for (i in seq_along(carbon_data)) {
+
+    cv_results <- list()
+    
+    for (fold in 1:k_folds) {
+      cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
+      
+      # Split training data into training and validation sets for this fold
+      val_indices <- fold_indices[[fold]]
+      train_indices <- setdiff(1:nrow(x_train), val_indices)
+      
+      x_train_fold <- x_train[train_indices, , drop = FALSE]
+      y_train_fold <- y_train[train_indices, i]
+      x_val_fold <- x_train[val_indices, , drop = FALSE]
+      y_val_fold <- y_train[val_indices, i]
       # L2 regularization factor
       l2_factor <- 0.01
       # Define the CNN model architecture

From 4b16839a3d826745c917b8478cfba1ec559b5fda Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:02:56 +0530
Subject: [PATCH 04/15] Implement model training and evaluation within
 cross-validation

- Use tryCatch for error handling during model fitting
- Evaluate model on validation set
- Store results in cv_results list
- Handle potential errors and log them
---
 .../assim.sequential/R/downscale_function.R   | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 311a90794b..61612948ff 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -208,16 +208,24 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         restore_best_weights = TRUE
       )
 
-      # Train the model
-      model |> keras3::fit(
-        x = x_train,
-        y = y_train[, i],
-        epochs = 500,  # Increased max epochs
-        batch_size = 32,
-        validation_split = 0.2,
-        callbacks = list(early_stopping),
-        verbose = 0
-      )
+      tryCatch({
+          model |> keras3::fit(
+            x = x_train_fold,
+            y = y_train_fold,
+            epochs = 500,
+            batch_size = 32,
+            callbacks = list(early_stopping),
+            verbose = 0
+          )
+          
+          # Evaluate model on validation set
+          val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
+          cv_results[[fold]] <- val_results
+        }, error = function(e) {
+          cat("Error in fold", fold, ":", conditionMessage(e), "\n")
+          cv_results[[fold]] <- c(NA, NA)
+        })
+      }
 
       # Store the trained model
       models[[i]] <- model

From b22c84e4b598877fddd6f6c14bc20874ce411bde Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:06:49 +0530
Subject: [PATCH 05/15] Calculate and display average cross-validation
 performance

- Compute mean MSE and MAE across all folds
- Use sapply for efficient calculation
- Handle potential NA values
- Print results for the current ensemble
---
 modules/assim.sequential/R/downscale_function.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 61612948ff..b4f71a662d 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -227,6 +227,12 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         })
       }
 
+      # Calculate average performance across folds
+      mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
+      mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
+      
+      cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
+
       # Store the trained model
       models[[i]] <- model
 

From dce5d953d56a1006141ce06df1e26ca0e41a6b5c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:26:03 +0530
Subject: [PATCH 06/15] Implement bagging for CNN in SDA_downscale function

- Add num_bags parameter to control number of bagged models
- Create and train multiple CNN models for each ensemble
- Implement bagged prediction function for CNN
- Update prediction process to use bagged models
- Adjust cross-validation to incorporate bagging
---
 .../assim.sequential/R/downscale_function.R   | 138 +++++++++---------
 1 file changed, 73 insertions(+), 65 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index b4f71a662d..f58b51763e 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -167,74 +167,82 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       y_val_fold <- y_train[val_indices, i]
       # L2 regularization factor
       l2_factor <- 0.01
-      # Define the CNN model architecture
-      # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
-      model <- keras3::keras_model_sequential() |>
-        # 1D Convolutional layer: Extracts local features from input data
-        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
-        # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
-        keras3::layer_batch_normalization() |>
-        # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
-        keras3::layer_dropout(rate = 0.3) |>
-        # Flatten: Converts 3D output to 1D for dense layer input
-        keras3::layer_flatten() |>
-        # Dense layer: Learns complex combinations of features
-        keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
-        # Second batch normalization: Further stabilizes learning in deeper layers
-        keras3::layer_batch_normalization() |>
-        # Second dropout: Additional regularization to prevent overfitting in final layers
-        keras3::layer_dropout(rate = 0.3) |>
-        # Output layer: Single neuron for regression prediction
-        keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
-      
-      # Learning rate scheduler
-      lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
-        initial_learning_rate = 0.001,
-        decay_steps = 1000,
-        decay_rate = 0.9
-      )
-
-      # Compile the model
-      model |> keras3::compile(
-        loss = 'mean_squared_error',
-        optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
-        metrics = c('mean_absolute_error')
-      )
-      
-      # Early stopping callback
-      early_stopping <- keras3::callback_early_stopping(
-        monitor = 'val_loss',
-        patience = 10,
-        restore_best_weights = TRUE
-      )
-
-      tryCatch({
-          model |> keras3::fit(
-            x = x_train_fold,
-            y = y_train_fold,
-            epochs = 500,
-            batch_size = 32,
-            callbacks = list(early_stopping),
-            verbose = 0
-          )
-          
-          # Evaluate model on validation set
-          val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
-          cv_results[[fold]] <- val_results
-        }, error = function(e) {
-          cat("Error in fold", fold, ":", conditionMessage(e), "\n")
-          cv_results[[fold]] <- c(NA, NA)
-        })
+      # Train final bagged models on all training data
+      final_bagged_models <- list()
+      for (bag in 1:num_bags) {
+        bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE)
+        x_train_bag <- x_train[bootstrap_indices, ]
+        y_train_bag <- y_train[bootstrap_indices, i]
+        # Define the CNN model architecture
+        # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
+        model <- keras3::keras_model_sequential() |>
+          # 1D Convolutional layer: Extracts local features from input data
+          keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
+          # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
+          keras3::layer_batch_normalization() |>
+          # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
+          keras3::layer_dropout(rate = 0.3) |>
+          # Flatten: Converts 3D output to 1D for dense layer input
+          keras3::layer_flatten() |>
+          # Dense layer: Learns complex combinations of features
+          keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
+          # Second batch normalization: Further stabilizes learning in deeper layers
+          keras3::layer_batch_normalization() |>
+          # Second dropout: Additional regularization to prevent overfitting in final layers
+          keras3::layer_dropout(rate = 0.3) |>
+          # Output layer: Single neuron for regression prediction
+          keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
+        
+        # Learning rate scheduler
+        lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+          initial_learning_rate = 0.001,
+          decay_steps = 1000,
+          decay_rate = 0.9
+        )
+  
+        # Compile the model
+        model |> keras3::compile(
+          loss = 'mean_squared_error',
+          optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
+          metrics = c('mean_absolute_error')
+        )
+        
+        # Early stopping callback
+        early_stopping <- keras3::callback_early_stopping(
+          monitor = 'val_loss',
+          patience = 10,
+          restore_best_weights = TRUE
+        )
+  
+        tryCatch({
+            model |> keras3::fit(
+              x = x_train_fold,
+              y = y_train_fold,
+              epochs = 500,
+              batch_size = 32,
+              callbacks = list(early_stopping),
+              verbose = 0
+            )
+        final_bagged_models[[bag]] <- final_model
+            
+            # Evaluate model on validation set
+            val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
+            cv_results[[fold]] <- val_results
+          }, error = function(e) {
+            cat("Error in fold", fold, ":", conditionMessage(e), "\n")
+            cv_results[[fold]] <- c(NA, NA)
+          })
+        }
+  
+        # Calculate average performance across folds
+        mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
+        mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
+        
+        cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
       }
 
-      # Calculate average performance across folds
-      mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
-      mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
-      
-      cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
-
       # Store the trained model
-      models[[i]] <- model
+      models[[i]] <- final_bagged_models
 
       #CNN predictions
       cnn_predict <- function(model, newdata, scaling_params) {

From e95536c965345fdfc1f0208d0865159e357e0fc2 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:33:12 +0530
Subject: [PATCH 07/15] Improve bagged CNN evaluation and standardize
 nomenclature

- Add final test set evaluation for each bagged CNN ensemble
- Calculate and display Test MSE and MAE for ensembles
- Standardize variable names for clarity and suiting the flow of the code avoiding conflicts .
---
 modules/assim.sequential/R/downscale_function.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f58b51763e..259ea30401 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -256,12 +256,18 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       prediction_rast <- terra::rast(covariates)
       
       # Generate spatial predictions using the trained model
-      maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
+      maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models,
                                   fun = cnn_predict,
                                   scaling_params = scaling_params)
 
       # Make predictions on held-out test data
-      predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
+      predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params)
+
+      # Evaluate final bagged ensemble on test set
+      test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params)
+      test_mse <- mean((test_predictions - y_test[, i])^2)
+      test_mae <- mean(abs(test_predictions - y_test[, i]))
+      cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))
     }
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")

From 85065566c9633d086ee6f18f58cf3a28cd209059 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 18:37:18 +0530
Subject: [PATCH 08/15] Added number of bags for bagging

- instead of passing as function arg , passed inside function
- set the number of bags to 5
---
 modules/assim.sequential/R/downscale_function.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 259ea30401..8a5504f672 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -142,6 +142,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
   } else if (model_type == "cnn") {
     # Define k_folds within the function
     k_folds <- 5
+    #Number of bags 
+    num_bags <- 5
     
     # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))

From ced7975bc97c31ea54ad1f5e1a1120ef5d12554c Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:28:45 +0530
Subject: [PATCH 09/15] Refactor CNN model with k-fold cross-validation and
 bagging

- Implement k-fold cross-validation for CNN models
- Add bagging to improve model robustness
- Restructure model creation and training process
- Introduce ensemble prediction function
- Update evaluation metrics for each fold and final ensemble
- Improve code organization and readability
---
 .../assim.sequential/R/downscale_function.R   | 216 +++++++++---------
 1 file changed, 105 insertions(+), 111 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 8a5504f672..3838d1e3e0 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,133 +140,127 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
-    # Define k_folds within the function
+        # Define k_folds and num_bags
     k_folds <- 5
-    #Number of bags 
     num_bags <- 5
     
-    # Reshape input data for CNN
-    x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
-    x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-
-    # Create k-fold indices for cross-validation (only on training data)
-    fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
-    
     for (i in seq_along(carbon_data)) {
-
-    cv_results <- list()
-    
-    for (fold in 1:k_folds) {
-      cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
+      all_models <- list()
       
-      # Split training data into training and validation sets for this fold
-      val_indices <- fold_indices[[fold]]
-      train_indices <- setdiff(1:nrow(x_train), val_indices)
+      # Create k-fold indices
+      fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
       
-      x_train_fold <- x_train[train_indices, , drop = FALSE]
-      y_train_fold <- y_train[train_indices, i]
-      x_val_fold <- x_train[val_indices, , drop = FALSE]
-      y_val_fold <- y_train[val_indices, i]
-      # L2 regularization factor
-      l2_factor <- 0.01
-      # Train final bagged models on all training data
-      final_bagged_models <- list()
-      for (bag in 1:num_bags) {
-        bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE)
-        x_train_bag <- x_train[bootstrap_indices, ]
-        y_train_bag <- y_train[bootstrap_indices, i]
-        # Define the CNN model architecture
-        # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
-        model <- keras3::keras_model_sequential() |>
-          # 1D Convolutional layer: Extracts local features from input data
-          keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |>
-          # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
-          keras3::layer_batch_normalization() |>
-          # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
-          keras3::layer_dropout(rate = 0.3) |>
-          # Flatten: Converts 3D output to 1D for dense layer input
-          keras3::layer_flatten() |>
-          # Dense layer: Learns complex combinations of features
-          keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |>
-          # Second batch normalization: Further stabilizes learning in deeper layers
-          keras3::layer_batch_normalization() |>
-          # Second dropout: Additional regularization to prevent overfitting in final layers
-          keras3::layer_dropout(rate = 0.3) |>
-          # Output layer: Single neuron for regression prediction
-          keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) )
+      for (fold in 1:k_folds) {
+        cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
-        # Learning rate scheduler
-        lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
-          initial_learning_rate = 0.001,
-          decay_steps = 1000,
-          decay_rate = 0.9
-        )
-  
-        # Compile the model
-        model |> keras3::compile(
-          loss = 'mean_squared_error',
-          optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
-          metrics = c('mean_absolute_error')
-        )
+        # Split data into training and validation sets for this fold
+        train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]])
+        val_indices <- fold_indices[[fold]]
         
-        # Early stopping callback
-        early_stopping <- keras3::callback_early_stopping(
-          monitor = 'val_loss',
-          patience = 10,
-          restore_best_weights = TRUE
-        )
-  
-        tryCatch({
-            model |> keras3::fit(
-              x = x_train_fold,
-              y = y_train_fold,
-              epochs = 500,
-              batch_size = 32,
-              callbacks = list(early_stopping),
-              verbose = 0
+        x_train_fold <- x_train[train_indices, , drop = FALSE]
+        y_train_fold <- y_train[train_indices, i]
+        x_val_fold <- x_train[val_indices, , drop = FALSE]
+        y_val_fold <- y_train[val_indices, i]
+        
+        # Create bagged models for this fold
+        fold_models <- list()
+        for (bag in 1:num_bags) {
+          # Create bootstrap sample
+          bootstrap_indices <- sample(1:nrow(x_train_fold), size = nrow(x_train_fold), replace = TRUE)
+          x_train_bag <- x_train_fold[bootstrap_indices, ]
+          y_train_bag <- y_train_fold[bootstrap_indices]
+          
+          # Create and train model
+          model <- keras3::keras_model_sequential() |>
+            keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
+            keras3::layer_conv_2d(
+              filters = 32,
+              kernel_size = c(3, 1),
+              activation = 'relu',
+              padding = 'same'
+            ) |>
+            keras3::layer_flatten() |>
+            keras3::layer_dense(
+              units = 64, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            keras3::layer_batch_normalization() |>
+            keras3::layer_dropout(rate = 0.3) |>
+            keras3::layer_dense(
+              units = 32, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            keras3::layer_batch_normalization() |>
+            keras3::layer_dropout(rate = 0.3) |>
+            keras3::layer_dense(
+              units = 1,
+              kernel_regularizer = keras3::regularizer_l2(0.01)
             )
-        final_bagged_models[[bag]] <- final_model
-            
-            # Evaluate model on validation set
-            val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0)
-            cv_results[[fold]] <- val_results
-          }, error = function(e) {
-            cat("Error in fold", fold, ":", conditionMessage(e), "\n")
-            cv_results[[fold]] <- c(NA, NA)
-          })
+          
+          # Learning rate scheduler
+          lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+            initial_learning_rate = 0.001,
+            decay_steps = 1000,
+            decay_rate = 0.9
+          )
+          
+          # Early stopping callback
+          early_stopping <- keras3::callback_early_stopping(
+            monitor = 'loss',
+            patience = 10,
+            restore_best_weights = TRUE
+          )
+          
+          model |> keras3::compile(
+            loss = 'mean_squared_error',
+            optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
+            metrics = c('mean_absolute_error')
+          )
+          
+          model |> keras3::fit(
+            x = x_train_bag,
+            y = y_train_bag,
+            epochs = 500,
+            batch_size = 32,
+            callbacks = list(early_stopping),
+            verbose = 0
+          )
+          
+          fold_models[[bag]] <- model
         }
-  
-        # Calculate average performance across folds
-        mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE)
-        mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE)
         
-        cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae))
+        # Add fold models to all_models list
+        all_models <- c(all_models, fold_models)
+        
+        # Evaluate fold performance
+        val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold))
+        val_predictions_mean <- rowMeans(val_predictions)
+        val_mse <- mean((val_predictions_mean - y_val_fold)^2)
+        val_mae <- mean(abs(val_predictions_mean - y_val_fold))
+        cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae))
       }
-
-      # Store the trained model
-      models[[i]] <- final_bagged_models
-
-      #CNN predictions
-      cnn_predict <- function(model, newdata, scaling_params) {
+      
+      # Store all models for this ensemble
+      models[[i]] <- all_models
+      
+      # Use all models for predictions
+      cnn_ensemble_predict <- function(models, newdata, scaling_params) {
         newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-        newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
-        predictions <- stats::predict(model, newdata)
-        return(as.vector(predictions))
+        predictions <- sapply(models, function(m) stats::predict(m, newdata))
+        return(rowMeans(predictions))
       }
-
-      # Create a prediction raster from covariates
-      prediction_rast <- terra::rast(covariates)
       
-      # Generate spatial predictions using the trained model
-      maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models,
-                                  fun = cnn_predict,
+      prediction_rast <- terra::rast(covariates)
+      maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
+                                  fun = cnn_ensemble_predict,
                                   scaling_params = scaling_params)
-
-      # Make predictions on held-out test data
-      predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params)
-
-      # Evaluate final bagged ensemble on test set
-      test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params)
+      
+      predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
+      
+      # Evaluate final ensemble on test set
+      test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params)
       test_mse <- mean((test_predictions - y_test[, i])^2)
       test_mae <- mean(abs(test_predictions - y_test[, i]))
       cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))

From 871d78ee53df2ba34111ea1a00b3d5fad5fca0d4 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:46:05 +0530
Subject: [PATCH 10/15] Added comments

some snippets require comments to explain their purpose , they have been added as well , as well as some past comments have been modified to aptly suit the changes .
---
 .../assim.sequential/R/downscale_function.R   | 41 +++++++++++++++----
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 3838d1e3e0..6de48ba3a1 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -140,16 +140,21 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
-        # Define k_folds and num_bags
+    # Define k_folds and num_bags
     k_folds <- 5
     num_bags <- 5
-    
+
+    # Reshape input data for CNN
+    x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
+    x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
+
     for (i in seq_along(carbon_data)) {
       all_models <- list()
       
       # Create k-fold indices
       fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
-      
+
+      #initialise operations for each fold
       for (fold in 1:k_folds) {
         cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
@@ -170,30 +175,41 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
           x_train_bag <- x_train_fold[bootstrap_indices, ]
           y_train_bag <- y_train_fold[bootstrap_indices]
           
-          # Create and train model
+          # Define the CNN model architecture
+          # Used dual batch normalization and dropout as the first set of batch normalization and 
           model <- keras3::keras_model_sequential() |>
+            # Layer Reshape : Reshape to fit target shape for the convolutional layer
             keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
+            # 1D Convolutional layer: Extracts local features from input data
             keras3::layer_conv_2d(
               filters = 32,
               kernel_size = c(3, 1),
               activation = 'relu',
               padding = 'same'
             ) |>
+            # Flatten: Converts 3D output to 1D for dense layer input
             keras3::layer_flatten() |>
+            # Dense layer: Learns complex combinations of features
             keras3::layer_dense(
               units = 64, 
               activation = 'relu',
               kernel_regularizer = keras3::regularizer_l2(0.01)
             ) |>
+            # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
             keras3::layer_batch_normalization() |>
+            # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
             keras3::layer_dropout(rate = 0.3) |>
+            # Dense layer: Learns complex combinations of features
             keras3::layer_dense(
               units = 32, 
               activation = 'relu',
               kernel_regularizer = keras3::regularizer_l2(0.01)
             ) |>
+            # Batch normalization: Further stabilizes learning in deeper layers
             keras3::layer_batch_normalization() |>
+            # Dropout: Additional regularization to prevent overfitting in final layer
             keras3::layer_dropout(rate = 0.3) |>
+            # Output layer: Single neuron for regression prediction
             keras3::layer_dense(
               units = 1,
               kernel_regularizer = keras3::regularizer_l2(0.01)
@@ -212,13 +228,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
             patience = 10,
             restore_best_weights = TRUE
           )
-          
+
+          # Compile the model
           model |> keras3::compile(
             loss = 'mean_squared_error',
             optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
             metrics = c('mean_absolute_error')
           )
-          
+
+          # Train the model
           model |> keras3::fit(
             x = x_train_bag,
             y = y_train_bag,
@@ -227,7 +245,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
             callbacks = list(early_stopping),
             verbose = 0
           )
-          
+
+          # Store the trained model for this bag in the fold_models list
           fold_models[[bag]] <- model
         }
         
@@ -251,12 +270,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         predictions <- sapply(models, function(m) stats::predict(m, newdata))
         return(rowMeans(predictions))
       }
-      
+
+      # Create a prediction raster from covariates
       prediction_rast <- terra::rast(covariates)
+
+      # Generate spatial predictions using the trained model
       maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
                                   fun = cnn_ensemble_predict,
                                   scaling_params = scaling_params)
-      
+
+      # Make predictions on held-out test data
       predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
       
       # Evaluate final ensemble on test set

From b73290314fb334908950cea73258088998be0192 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:13:37 +0530
Subject: [PATCH 11/15] Removed MAE ,MSE from downscale

- MAE , MSE already in metrics function
- reimplementing it makes it redundant
- Removed redundant implementation from downscale function
---
 modules/assim.sequential/R/downscale_function.R | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 6de48ba3a1..ced55e56c6 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -252,13 +252,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
         
         # Add fold models to all_models list
         all_models <- c(all_models, fold_models)
-        
-        # Evaluate fold performance
-        val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold))
-        val_predictions_mean <- rowMeans(val_predictions)
-        val_mse <- mean((val_predictions_mean - y_val_fold)^2)
-        val_mae <- mean(abs(val_predictions_mean - y_val_fold))
-        cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae))
       }
       
       # Store all models for this ensemble
@@ -282,11 +275,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       # Make predictions on held-out test data
       predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
       
-      # Evaluate final ensemble on test set
-      test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params)
-      test_mse <- mean((test_predictions - y_test[, i])^2)
-      test_mae <- mean(abs(test_predictions - y_test[, i]))
-      cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae))
     }
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")

From bf024780296e8c05680ad19b9abaa3d61c0b4611 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:40:41 +0530
Subject: [PATCH 12/15] Improve robustness of train-validation split in
 SDA_downscale function

- Replace 1:nrow(x_train) with seq_len(nrow(x_train)) for indexing
- Ensures proper handling of edge cases, including empty datasets
- Addresses potential "subscript out of bounds" error in CNN model training
---
 modules/assim.sequential/R/downscale_function.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index ced55e56c6..439de001a2 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -152,14 +152,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       all_models <- list()
       
       # Create k-fold indices
-      fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE)
+      fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
 
       #initialise operations for each fold
       for (fold in 1:k_folds) {
         cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
         
         # Split data into training and validation sets for this fold
-        train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]])
+        train_indices <- setdiff(seq_len(nrow(x_train)), fold_indices[[fold]])
         val_indices <- fold_indices[[fold]]
         
         x_train_fold <- x_train[train_indices, , drop = FALSE]

From 4ae1974d7aa78fa27e188d222e85ab0eba14b773 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:10:07 +0530
Subject: [PATCH 13/15] Implement custom create_folds function to replace caret
 dependency

- Add create_folds function for k-fold cross-validation
- Removes dependency on caret::createFolds
- Supports both training and test set index generation
- Allows flexible output as list or vector
---
 .../assim.sequential/R/downscale_function.R    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 439de001a2..f4d3700f18 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -62,6 +62,24 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
+  n <- length(y)
+  indices <- seq_len(n)
+  folds <- split(indices, cut(seq_len(n), breaks = k, labels = FALSE))
+  
+  if (!returnTrain) {
+    folds <- folds  # Test indices are already what we want
+  } else {
+    folds <- lapply(folds, function(x) indices[-x])  # Return training indices
+  }
+  
+  if (!list) {
+    folds <- unlist(folds)
+  }
+  
+  return(folds)
+}
+
 ##' @title SDA Downscale Function
 ##' @name SDA_downscale
 ##' @author Joshua Ploshay, Sambhav Dixit

From 5d86312d92fe03adeb3a77665b96db4ec2dcda45 Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:10:57 +0530
Subject: [PATCH 14/15] Add comprehensive roxygen documentation for
 create_folds function

- Include title, name, and author tags
- Detail all function parameters
- Provide clear description and details sections
- Specify return value format
- Align documentation style with project standards
---
 modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index f4d3700f18..dca7181718 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -62,6 +62,20 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+##' @title Create folds function
+##' @name create_folds
+##' @author Sambhav Dixit
+##'
+##' @param y Vector. A vector of outcome data or indices.
+##' @param k Numeric. The number of folds to create.
+##' @param list Logical. If TRUE, returns a list of fold indices. If FALSE, returns a vector.
+##' @param returnTrain Logical. If TRUE, returns indices for training sets. If FALSE, returns indices for test sets.
+##' @details This function creates k-fold indices for cross-validation. It can return either training or test set indices, and the output can be in list or vector format.
+##'
+##' @description This function generates k-fold indices for cross-validation, allowing for flexible output formats.
+##'
+##' @return A list of k elements (if list = TRUE), each containing indices for a fold, or a vector of indices (if list = FALSE).
+
 create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
   n <- length(y)
   indices <- seq_len(n)

From 70bfdaf5ac40a69309d78b063c28d41576d5603d Mon Sep 17 00:00:00 2001
From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:16:08 +0530
Subject: [PATCH 15/15] Replace carets

- Replace caretFolds -> caret_folds
- Remove carets:: dependency
---
 modules/assim.sequential/R/downscale_function.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index dca7181718..54ce13d2e5 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -184,7 +184,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       all_models <- list()
       
       # Create k-fold indices
-      fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
+      fold_indices <- create_folds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
 
       #initialise operations for each fold
       for (fold in 1:k_folds) {