Skip to content

Commit

Permalink
modified namespaces
Browse files Browse the repository at this point in the history
added keras3:: and removed base:: namespaces
  • Loading branch information
sambhavnoobcoder authored Jul 26, 2024
1 parent 71c1013 commit 38c9e7a
Showing 1 changed file with 69 additions and 69 deletions.
138 changes: 69 additions & 69 deletions modules/assim.sequential/R/downscale_function.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,52 +14,52 @@

SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) {
# Read the input data and site coordinates
input_data <- base::readRDS(data_path)
input_data <- readRDS(data_path)
site_coordinates <- readr::read_csv(coords_path)

# Convert input_data names to Date objects
input_date_names <- lubridate::ymd(base::names(input_data))
base::names(input_data) <- input_date_names
input_date_names <- lubridate::ymd(names(input_data))
names(input_data) <- input_date_names

# Convert the input date to a Date object
standard_date <- lubridate::ymd(date)

# Ensure the date exists in the input data
if (!standard_date %in% input_date_names) {
base::stop(base::paste("Date", date, "not found in the input data."))
stop(paste("Date", date, "not found in the input data."))
}

# Extract the carbon data for the specified focus year
index <- base::which(input_date_names == standard_date)
index <- which(input_date_names == standard_date)
data <- input_data[[index]]

# Ensure the carbon pool exists in the input data
if (!carbon_pool %in% base::names(data)) {
base::stop(base::paste("Carbon pool", carbon_pool, "not found in the input data."))
if (!carbon_pool %in% names(data)) {
stop(paste("Carbon pool", carbon_pool, "not found in the input data."))
}

carbon_data <- base::as.data.frame(base::t(data[base::which(base::names(data) == carbon_pool)]))
base::names(carbon_data) <- base::paste0("ensemble", base::seq(base::ncol(carbon_data)))
carbon_data <- as.data.frame(t(data[which(names(data) == carbon_pool)]))
names(carbon_data) <- paste0("ensemble", seq(ncol(carbon_data)))

# Ensure site coordinates have 'lon' and 'lat' columns
if (!base::all(c("lon", "lat") %in% base::names(site_coordinates))) {
base::stop("Site coordinates must contain 'lon' and 'lat' columns.")
if (!all(c("lon", "lat") %in% names(site_coordinates))) {
stop("Site coordinates must contain 'lon' and 'lat' columns.")
}

# Ensure the number of rows in site coordinates matches the number of rows in carbon data
if (base::nrow(site_coordinates) != base::nrow(carbon_data)) {
base::message("Number of rows in site coordinates does not match the number of rows in carbon data.")
if (base::nrow(site_coordinates) > base::nrow(carbon_data)) {
base::message("Truncating site coordinates to match carbon data rows.")
site_coordinates <- site_coordinates[1:base::nrow(carbon_data), ]
if (nrow(site_coordinates) != nrow(carbon_data)) {
message("Number of rows in site coordinates does not match the number of rows in carbon data.")
if (nrow(site_coordinates) > nrow(carbon_data)) {
message("Truncating site coordinates to match carbon data rows.")
site_coordinates <- site_coordinates[1:nrow(carbon_data), ]
} else {
base::message("Truncating carbon data to match site coordinates rows.")
carbon_data <- carbon_data[1:base::nrow(site_coordinates), ]
message("Truncating carbon data to match site coordinates rows.")
carbon_data <- carbon_data[1:nrow(site_coordinates), ]
}
}

base::message("Preprocessing completed successfully.")
base::return(base::list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
message("Preprocessing completed successfully.")
return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
}

##' @title SDA Downscale Function
Expand All @@ -85,34 +85,34 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
site_coordinates <- terra::vect(preprocessed$site_coordinates, geom = c("lon", "lat"), crs = "EPSG:4326")

# Extract predictors from covariates raster using site coordinates
predictors <- base::as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))
predictors <- as.data.frame(terra::extract(covariates, site_coordinates, ID = FALSE))

# Dynamically get covariate names
covariate_names <- base::names(predictors)
covariate_names <- names(predictors)

# Create a single data frame with all predictors and ensemble data
full_data <- base::cbind(carbon_data, predictors)
full_data <- cbind(carbon_data, predictors)

# Split the observations into training and testing sets
if (!base::is.null(seed)) {
base::set.seed(seed) # Only set seed if provided
if (!is.null(seed)) {
set.seed(seed) # Only set seed if provided
}
sample <- base::sample(1:base::nrow(full_data), size = base::round(0.75 * base::nrow(full_data)))
sample <- sample(1:nrow(full_data), size = round(0.75 * nrow(full_data)))
train_data <- full_data[sample, ]
test_data <- full_data[-sample, ]

# Prepare data for both RF and CNN
x_data <- base::as.matrix(full_data[, covariate_names])
y_data <- base::as.matrix(carbon_data)
x_data <- as.matrix(full_data[, covariate_names])
y_data <- as.matrix(carbon_data)

# Calculate scaling parameters from all data
scaling_params <- base::list(
mean = base::colMeans(x_data),
sd = base::apply(x_data, 2, stats::sd)
scaling_params <- list(
mean = colMeans(x_data),
sd = apply(x_data, 2, stats::sd)
)

# Normalize the data
x_data_scaled <- base::scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)
x_data_scaled <- scale(x_data, center = scaling_params$mean, scale = scaling_params$sd)

# Split into training and testing sets
x_train <- x_data_scaled[sample, ]
Expand All @@ -121,14 +121,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
y_test <- y_data[-sample, ]

# Initialize lists for outputs
models <- base::list()
maps <- base::list()
predictions <- base::list()
models <- list()
maps <- list()
predictions <- list()

if (model_type == "rf") {
for (i in base::seq_along(carbon_data)) {
ensemble_col <- base::paste0("ensemble", i)
formula <- stats::as.formula(base::paste(ensemble_col, "~", base::paste(covariate_names, collapse = " + ")))
for (i in seq_along(carbon_data)) {
ensemble_col <- paste0("ensemble", i)
formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
models[[i]] <- randomForest::randomForest(formula,
data = train_data,
ntree = 1000,
Expand All @@ -140,23 +140,23 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
predictions[[i]] <- stats::predict(models[[i]], test_data)
}
} else if (model_type == "cnn") {
x_train <- array_reshape(x_train, c(base::nrow(x_train), 1, base::ncol(x_train)))
x_test <- array_reshape(x_test, c(base::nrow(x_test), 1, base::ncol(x_test)))
x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))

for (i in base::seq_along(carbon_data)) {
model <- keras_model_sequential() |>
layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, base::length(covariate_names))) |>
layer_flatten() |>
layer_dense(units = 64, activation = 'relu') |>
layer_dense(units = 1)
for (i in seq_along(carbon_data)) {
model <- keras3::keras_model_sequential() |>
keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
keras3::layer_flatten() |>
keras3::layer_dense(units = 64, activation = 'relu') |>
keras3::layer_dense(units = 1)

model |> compile(
model |> keras3::compile(
loss = 'mean_squared_error',
optimizer = optimizer_adam(),
optimizer = keras3::optimizer_adam(),
metrics = c('mean_absolute_error')
)

model |> fit(
model |> keras3::fit(
x = x_train,
y = y_train[, i],
epochs = 100,
Expand All @@ -168,10 +168,10 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
models[[i]] <- model

cnn_predict <- function(model, newdata, scaling_params) {
newdata <- base::scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
newdata <- array_reshape(newdata, c(base::nrow(newdata), 1, base::ncol(newdata)))
newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
predictions <- stats::predict(model, newdata)
base::return(base::as.vector(predictions))
return(as.vector(predictions))
}

prediction_rast <- terra::rast(covariates)
Expand All @@ -182,26 +182,26 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
}
} else {
base::stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
}

# Organize the results into a single output list
downscale_output <- base::list(
data = base::list(training = train_data, testing = test_data),
downscale_output <- list(
data = list(training = train_data, testing = test_data),
models = models,
maps = maps,
predictions = predictions,
scaling_params = scaling_params
)

# Rename each element of the output list with appropriate ensemble numbers
for (i in base::seq_along(carbon_data)) {
base::names(downscale_output$models)[i] <- base::paste0("ensemble", i)
base::names(downscale_output$maps)[i] <- base::paste0("ensemble", i)
base::names(downscale_output$predictions)[i] <- base::paste0("ensemble", i)
for (i in seq_along(carbon_data)) {
names(downscale_output$models)[i] <- paste0("ensemble", i)
names(downscale_output$maps)[i] <- paste0("ensemble", i)
names(downscale_output$predictions)[i] <- paste0("ensemble", i)
}

base::return(downscale_output)
return(downscale_output)
}

##' @title Calculate Metrics for Downscaling Results
Expand All @@ -218,20 +218,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
##' @return A list of metrics for each ensemble, where each element contains MAE , MSE ,R_squared ,actual values from testing data and predicted values for the testing data

SDA_downscale_metrics <- function(downscale_output, carbon_pool) {
metrics <- base::list()
metrics <- list()

for (i in 1:base::length(downscale_output$data)) {
actual <- downscale_output$data[[i]]$testing[[base::paste0(carbon_pool, "_ens", i)]]
for (i in 1:length(downscale_output$data)) {
actual <- downscale_output$data[[i]]$testing[[paste0(carbon_pool, "_ens", i)]]
predicted <- downscale_output$predictions[[i]]

mse <- base::mean((actual - predicted)^2)
mae <- base::mean(base::abs(actual - predicted))
r_squared <- 1 - base::sum((actual - predicted)^2) / base::sum((actual - base::mean(actual))^2)
mse <- mean((actual - predicted)^2)
mae <- mean(abs(actual - predicted))
r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)

metrics[[i]] <- base::list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
metrics[[i]] <- list(MSE = mse, MAE = mae, R_squared = r_squared, actual = actual, predicted = predicted)
}

base::names(metrics) <- base::paste0("ensemble", base::seq_along(metrics))
names(metrics) <- paste0("ensemble", seq_along(metrics))

base::return(metrics)
return(metrics)
}

0 comments on commit 38c9e7a

Please sign in to comment.