diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
index 27645f9dd7..54ce13d2e5 100644
--- a/modules/assim.sequential/R/downscale_function.R
+++ b/modules/assim.sequential/R/downscale_function.R
@@ -62,6 +62,38 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
   return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
 }
 
+##' @title Create folds function
+##' @name create_folds
+##' @author Sambhav Dixit
+##'
+##' @param y Vector. A vector of outcome data or indices.
+##' @param k Numeric. The number of folds to create.
+##' @param list Logical. If TRUE, returns a list of fold indices. If FALSE, returns a vector.
+##' @param returnTrain Logical. If TRUE, returns indices for training sets. If FALSE, returns indices for test sets.
+##' @details This function creates k-fold indices for cross-validation. It can return either training or test set indices, and the output can be in list or vector format.
+##'
+##' @description This function generates k-fold indices for cross-validation, allowing for flexible output formats.
+##'
+##' @return A list of k elements (if list = TRUE), each containing indices for a fold, or a vector of indices (if list = FALSE).
+
+create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
+  n <- length(y)
+  indices <- seq_len(n)
+  folds <- split(indices, cut(seq_len(n), breaks = k, labels = FALSE))
+  
+  if (!returnTrain) {
+    folds <- folds  # Test indices are already what we want
+  } else {
+    folds <- lapply(folds, function(x) indices[-x])  # Return training indices
+  }
+  
+  if (!list) {
+    folds <- unlist(folds)
+  }
+  
+  return(folds)
+}
+
 ##' @title SDA Downscale Function
 ##' @name SDA_downscale
 ##' @author Joshua Ploshay, Sambhav Dixit
@@ -140,84 +172,141 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
       predictions[[i]] <- stats::predict(models[[i]], test_data)
     }
   } else if (model_type == "cnn") {
+    # Define k_folds and num_bags
+    k_folds <- 5
+    num_bags <- 5
+
     # Reshape input data for CNN
     x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
     x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))
-    
+
     for (i in seq_along(carbon_data)) {
-      # Define the CNN model architecture
-      # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
-      model <- keras3::keras_model_sequential() |>
-        # 1D Convolutional layer: Extracts local features from input data
-        keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
-        # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
-        keras3::layer_batch_normalization() |>
-        # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
-        keras3::layer_dropout(rate = 0.3) |>
-        # Flatten: Converts 3D output to 1D for dense layer input
-        keras3::layer_flatten() |>
-        # Dense layer: Learns complex combinations of features
-        keras3::layer_dense(units = 64, activation = 'relu') |>
-        # Second batch normalization: Further stabilizes learning in deeper layers
-        keras3::layer_batch_normalization() |>
-        # Second dropout: Additional regularization to prevent overfitting in final layers
-        keras3::layer_dropout(rate = 0.3) |>
-        # Output layer: Single neuron for regression prediction
-        keras3::layer_dense(units = 1)
+      all_models <- list()
       
-      # Learning rate scheduler
-      lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
-        initial_learning_rate = 0.001,
-        decay_steps = 1000,
-        decay_rate = 0.9
-      )
+      # Create k-fold indices
+      fold_indices <- create_folds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)
 
-      # Compile the model
-      model |> keras3::compile(
-        loss = 'mean_squared_error',
-        optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
-        metrics = c('mean_absolute_error')
-      )
-      
-      # Early stopping callback
-      early_stopping <- keras3::callback_early_stopping(
-        monitor = 'val_loss',
-        patience = 10,
-        restore_best_weights = TRUE
-      )
+      #initialise operations for each fold
+      for (fold in 1:k_folds) {
+        cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))
+        
+        # Split data into training and validation sets for this fold
+        train_indices <- setdiff(seq_len(nrow(x_train)), fold_indices[[fold]])
+        val_indices <- fold_indices[[fold]]
+        
+        x_train_fold <- x_train[train_indices, , drop = FALSE]
+        y_train_fold <- y_train[train_indices, i]
+        x_val_fold <- x_train[val_indices, , drop = FALSE]
+        y_val_fold <- y_train[val_indices, i]
+        
+        # Create bagged models for this fold
+        fold_models <- list()
+        for (bag in 1:num_bags) {
+          # Create bootstrap sample
+          bootstrap_indices <- sample(1:nrow(x_train_fold), size = nrow(x_train_fold), replace = TRUE)
+          x_train_bag <- x_train_fold[bootstrap_indices, ]
+          y_train_bag <- y_train_fold[bootstrap_indices]
+          
+          # Define the CNN model architecture
+          # Used dual batch normalization and dropout as the first set of batch normalization and 
+          model <- keras3::keras_model_sequential() |>
+            # Layer Reshape : Reshape to fit target shape for the convolutional layer
+            keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
+            # 1D Convolutional layer: Extracts local features from input data
+            keras3::layer_conv_2d(
+              filters = 32,
+              kernel_size = c(3, 1),
+              activation = 'relu',
+              padding = 'same'
+            ) |>
+            # Flatten: Converts 3D output to 1D for dense layer input
+            keras3::layer_flatten() |>
+            # Dense layer: Learns complex combinations of features
+            keras3::layer_dense(
+              units = 64, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
+            keras3::layer_batch_normalization() |>
+            # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
+            keras3::layer_dropout(rate = 0.3) |>
+            # Dense layer: Learns complex combinations of features
+            keras3::layer_dense(
+              units = 32, 
+              activation = 'relu',
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            ) |>
+            # Batch normalization: Further stabilizes learning in deeper layers
+            keras3::layer_batch_normalization() |>
+            # Dropout: Additional regularization to prevent overfitting in final layer
+            keras3::layer_dropout(rate = 0.3) |>
+            # Output layer: Single neuron for regression prediction
+            keras3::layer_dense(
+              units = 1,
+              kernel_regularizer = keras3::regularizer_l2(0.01)
+            )
+          
+          # Learning rate scheduler
+          lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
+            initial_learning_rate = 0.001,
+            decay_steps = 1000,
+            decay_rate = 0.9
+          )
+          
+          # Early stopping callback
+          early_stopping <- keras3::callback_early_stopping(
+            monitor = 'loss',
+            patience = 10,
+            restore_best_weights = TRUE
+          )
 
-      # Train the model
-      model |> keras3::fit(
-        x = x_train,
-        y = y_train[, i],
-        epochs = 500,  # Increased max epochs
-        batch_size = 32,
-        validation_split = 0.2,
-        callbacks = list(early_stopping),
-        verbose = 0
-      )
+          # Compile the model
+          model |> keras3::compile(
+            loss = 'mean_squared_error',
+            optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
+            metrics = c('mean_absolute_error')
+          )
 
-      # Store the trained model
-      models[[i]] <- model
+          # Train the model
+          model |> keras3::fit(
+            x = x_train_bag,
+            y = y_train_bag,
+            epochs = 500,
+            batch_size = 32,
+            callbacks = list(early_stopping),
+            verbose = 0
+          )
 
-      #CNN predictions
-      cnn_predict <- function(model, newdata, scaling_params) {
+          # Store the trained model for this bag in the fold_models list
+          fold_models[[bag]] <- model
+        }
+        
+        # Add fold models to all_models list
+        all_models <- c(all_models, fold_models)
+      }
+      
+      # Store all models for this ensemble
+      models[[i]] <- all_models
+      
+      # Use all models for predictions
+      cnn_ensemble_predict <- function(models, newdata, scaling_params) {
         newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
-        newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
-        predictions <- stats::predict(model, newdata)
-        return(as.vector(predictions))
+        predictions <- sapply(models, function(m) stats::predict(m, newdata))
+        return(rowMeans(predictions))
       }
 
       # Create a prediction raster from covariates
       prediction_rast <- terra::rast(covariates)
-      
+
       # Generate spatial predictions using the trained model
       maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
-                                  fun = cnn_predict,
+                                  fun = cnn_ensemble_predict,
                                   scaling_params = scaling_params)
 
       # Make predictions on held-out test data
-      predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
+      predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)
+      
     }
   } else {
     stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")