-
Notifications
You must be signed in to change notification settings - Fork 5
/
data_KFA.R
143 lines (116 loc) · 4.13 KB
/
data_KFA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
library(readr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ecoforecastR) # Ensure this package is installed and loaded
# Define DLM function
DLM_function <- function() {
# Load target data
target <- read_csv("https://data.ecoforecast.org/neon4cast-targets/aquatics/aquatics-targets.csv.gz", guess_max = 1e6)
site_data <- read_csv("https://raw.githubusercontent.com/eco4cast/neon4cast-targets/main/NEON_Field_Site_Metadata_20220412.csv")
# Filter for aquatic sites
site_data <- site_data %>% filter(aquatics == 1)
site <- "BARC"
oxygen <- target %>% filter(variable == "oxygen", site_id == site) %>% arrange(datetime)
# Prepare data for DLM
y <- oxygen$observation
data <- list(y = y, n = length(y))
# Define the DLM model; simplified model since `X` isn't used
model <- list(obs = "y", fixed = "~ 1", n.iter = 10000)
# Run the DLM
ef.out <- ecoforecastR::fit_dlm(model = model, data = data)
# Extract and process results from DLM
params <- window(ef.out$params, start = 1000)
summary_params <- summary(params)
# Initial conditions for Kalman Filter
mu0 <- as.numeric(head(y, 1)) # First observation of y
P0 <- diag(10, length(mu0)) # Large initial covariance
# Return relevant data and DLM results
return(list(
Y = y,
mu0 = mu0,
P0 = P0,
time = oxygen$datetime,
DLM_results = ef.out,
params = summary_params
))
}
# Kalman Filter functions
KalmanAnalysis <- function(mu.f, P.f, Y, R, H, I){
obs = !is.na(Y)
if (any(obs)) {
K <- P.f * H / (H * P.f * H + R)
mu.a <- mu.f + K * (Y - H * mu.f)
P.a <- (I - K * H) * P.f
} else {
mu.a = mu.f
P.a = P.f
}
return(list(mu.a=mu.a, P.a=P.a))
}
KalmanForecast <- function(mu.a, P.a, M, Q){
mu.f = M * mu.a
P.f = Q + M * P.a * M
return(list(mu.f=mu.f, P.f=P.f))
}
# Running the Kalman Filter
KalmanFilter <- function(M, mu0, P0, Q, R, Y){
nt = length(Y)
mu.f = numeric(nt + 1)
mu.a = numeric(nt)
P.f = numeric(nt + 1)
P.a = numeric(nt)
# Initialization
mu.f[1] = mu0
P.f[1] = P0
H = 1
I = 1
# Sequential updates
for(t in 1:nt){
KA <- KalmanAnalysis(mu.f[t], P.f[t], Y[t], R, H, I)
mu.a[t] <- KA$mu.a
P.a[t] <- KA$P.a
KF <- KalmanForecast(mu.a[t], P.a[t], M, Q)
mu.f[t + 1] <- KF$mu.f
P.f[t + 1] <- KF$P.f
}
return(list(mu.f=mu.f, mu.a=mu.a, P.f=P.f, P.a=P.a))
}
# Run the DLM function to get initial parameters
results <- DLM_function()
# Define a simple state transition matrix M, assuming simple evolution without spatial interactions
alpha <- 0.05
M <- diag(1 - alpha, length(results$Y))
# Define process and observation error
tau_proc <- rep(0.01, length(results$Y))
Q <- diag(diag(tau_proc))
tau_obs <- var(results$Y, na.rm = TRUE)
R <- diag(tau_obs, length(results$Y))
# Prepare the observation vector Y
Y <- matrix(results$Y, ncol = 1)
# Run the Kalman Filter
KF_results <- KalmanFilter(M, results$mu0, results$P0, Q, R, Y)
# Forecasting for the next 16 days
last_mu <- tail(KF_results$mu.a, 1)
last_P <- tail(KF_results$P.a, 1)
forecast_length <- 30
forecast_mu <- numeric(forecast_length)
forecast_P <- numeric(forecast_length)
for (i in 1:forecast_length) {
forecast_result <- KalmanForecast(last_mu, last_P, M, Q)
forecast_mu[i] <- forecast_result$mu.f
last_mu <- forecast_result$mu.f
last_P <- forecast_result$P.f
}
forecast_dates <- seq(max(results$time) + 1, by = "day", length.out = forecast_length)
# Combining actual and forecasted results for plotting
full_mu <- c(KF_results$mu.a, forecast_mu)
full_time <- c(results$time, forecast_dates)
full_p <- c(sqrt(KF_results$P.a), sqrt(forecast_P)) * 1.96 # 95% CI
ci <- cbind(full_mu - full_p, full_mu + full_p)
# Plotting actual and forecasted results with confidence intervals
plot(as.Date(full_time), full_mu, type = 'n', xlab = "Time", ylab = "Oxygen Level", main = "Forecast and Analysis with Confidence Intervals")
ecoforecastR::ciEnvelope(as.Date(full_time), ci[, 1], ci[, 2], col = "lightBlue")
lines(as.Date(full_time), full_mu, lwd = 2)
points(as.Date(results$time), results$Y, pch = 19) # Actual observations
points(as.Date(forecast_dates), forecast_mu, pch = 19, col = 'red') # Forecasted points