-
Notifications
You must be signed in to change notification settings - Fork 5
/
KalmanFilter.R
163 lines (128 loc) · 4.38 KB
/
KalmanFilter.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
library(readr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ecoforecastR) # Ensure this package is installed and loaded
# Define DLM function
DLM_function <- function() {
# Load target data
target <- read_csv("https://data.ecoforecast.org/neon4cast-targets/aquatics/aquatics-targets.csv.gz", guess_max = 1e6)
site_data <- read_csv("https://raw.githubusercontent.com/eco4cast/neon4cast-targets/main/NEON_Field_Site_Metadata_20220412.csv")
# Filter for aquatic sites
site_data <- site_data %>% filter(aquatics == 1)
site <- "BARC"
oxygen <- target %>% filter(variable == "oxygen", site_id == site) %>% arrange(datetime)
# Prepare data for DLM
y <- oxygen$observation
data <- list(y = y, n = length(y))
# Define the DLM model; simplified model since `X` isn't used
model <- list(obs = "y", fixed = "~ 1 + X", n.iter = 2000)
# Run the DLM
ef.out <- ecoforecastR::fit_dlm(model = model, data = data)
# Extract and process results from DLM
params <- window(ef.out$params, start = 1000)
summary_params <- summary(params)
# Initial conditions for Kalman Filter
mu0 <- as.numeric(head(y, 1)) # First observation of y
P0 <- diag(10, length(mu0)) # Large initial covariance
# Return relevant data and DLM results
return(list(
Y = y,
mu0 = mu0,
P0 = P0,
time = oxygen$datetime,
DLM_results = ef.out,
params = summary_params
))
}
# Kalman Filter functions
KalmanAnalysis <- function(mu.f, P.f, Y, R, H, I){
obs = !is.na(Y)
if (any(obs)) {
K <- P.f * H / (H * P.f * H + R)
mu.a <- mu.f + K * (Y - H * mu.f)
P.a <- (I - K * H) * P.f
} else {
mu.a = mu.f
P.a = P.f
}
return(list(mu.a=mu.a, P.a=P.a))
}
KalmanForecast <- function(mu.a, P.a, M, Q){
mu.f = M * mu.a
P.f = Q + M * P.a * M
return(list(mu.f=mu.f, P.f=P.f))
}
# Running the Kalman Filter
KalmanFilter <- function(M, mu0, P0, Q, R, Y){
nt = length(Y)
mu.f = numeric(nt + 1)
mu.a = numeric(nt)
P.f = numeric(nt + 1)
P.a = numeric(nt)
# Initialization
mu.f[1] = mu0
P.f[1] = P0
H = 1
I = 1
# Sequential updates
for(t in 1:nt){
KA <- KalmanAnalysis(mu.f[t], P.f[t], Y[t], R, H, I)
mu.a[t] <- KA$mu.a
P.a[t] <- KA$P.a
KF <- KalmanForecast(mu.a[t], P.a[t], M, Q)
mu.f[t + 1] <- KF$mu.f
P.f[t + 1] <- KF$P.f
}
return(list(mu.f=mu.f, mu.a=mu.a, P.f=P.f, P.a=P.a))
}
# Run the DLM function to get initial parameters
results <- DLM_function()
# Define the number of states based on your data
nstates <- length(results$Y) # This should actually be 1 since we have one state variable
# Define a simple state transition matrix M, assuming simple evolution without spatial interactions
alpha <- 0.05
M <- diag(1 - alpha, nstates)
# Define process and observation error
tau_proc <- rep(0.01, nstates)
Q <- diag(diag(tau_proc))
tau_obs <- results$DLM_results$data$OBS# var(results$Y, na.rm = TRUE)
R <- diag(tau_obs, nstates)
# Initial conditions based on historical data or estimated from DLM
mu0 <- results$mu0
P0 <- results$P0
# Prepare the observation vector Y
Y <- matrix(results$Y, ncol = 1)
KF_results <- KalmanFilter(M, mu0, P0, Q, R, Y)
# Plot the actual and predicted oxygen levels
time <- results$time
actual <- results$Y
predicted <- matrix(KF_results$mu.a, ncol = 1) # Ensure predicted is a column matrix
df <- data.frame(Time = time, Actual = actual, Predicted = predicted)
# Assume `KF_results` and `time` are from the previous code
time <- results$time
mu.f <- KF_results$mu.f
mu.a <- KF_results$mu.a
P.f <- KF_results$P.f
P.a <- KF_results$P.a
# Convert time to Date format if not already
time <- as.Date(time)
## Subset time
time2 <- time[time > as.Date("2015-01-01")]
tsel <- which(time %in% time2)
time <- as.Date(time) # Convert time to Date if not already
time2 <- time[time > as.Date("2015-01-01")]
tsel <- which(time %in% time2)
n = length(time2) * 2
mu = p = rep(NA, n)
mu[seq(1, n, by = 2)] = mu.f[tsel]
mu[seq(2, n, by = 2)] = mu.a[tsel]
p[seq(1, n, by = 2)] = 1.96 * sqrt(P.f[tsel])
p[seq(2, n, by = 2)] = 1.96 * sqrt(P.a[tsel])
ci = cbind(mu - p, mu + p)
time3 = sort(c(time2, time2 + 1))
# Plotting
plot(time3, mu, ylim = range(ci), type = 'n', xlab = "Time", ylab = "Oxygen Level", main = "Forecast and Analysis with Confidence Intervals")
ecoforecastR::ciEnvelope(time3, ci[, 1], ci[, 2], col = "lightBlue")
lines(time3, mu, lwd = 2)
points(time[tsel], results$Y[tsel], pch = 19) # add actual observations