add overestimates code as example

boettiger-lab · Jan 30, 2018 · 6671232 · 6671232
1 parent a5f27f7
commit 6671232
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: sarsop
 Type: Package
 Title: Approximate POMDP Planning Software
-Version: 0.4.0
+Version: 0.5.0
 Authors@R: c(
     person("Carl", "Boettiger", email = "[email protected]", role = c("cre", "aut")),
     person("Jeroen", "Ooms", email = "[email protected]", role = c("aut")),

diff --git a/inst/examples/pomdp_overestimates.R b/inst/examples/pomdp_overestimates.R
@@ -0,0 +1,98 @@
+## A helper function illustrating POMDP simulations when the true observation matrix used in the simulations
+## may be different from the one the POMDP is based upon.  This can illustrate the impact of different
+## assumptions about the measurement error process, including overestimating the measurement error or choisng
+## a different distribution.
+pomdp_overestimates <-
+  function (transition,
+            model_observation,
+            reward,
+            discount,
+            true_observation,
+            x0,
+            a0 = 1,
+            Tmax = 20,
+            alpha,
+            reps = 100)
+  {
+    sims <- parallel::mclapply(1:reps, function(i) {
+      sim <- pomdp_overestimate(transition,
+                                model_observation,
+                                reward,
+                                discount,
+                                true_observation,
+                                x0,
+                                a0,
+                                Tmax,
+                                alpha)
+      sim$df$rep <- i
+      sim
+    })
+    list(df = do.call(rbind, lapply(sims, `[[`, "df")),
+         state_posterior = do.call(rbind, lapply(sims, `[[`, "state_posterior")))
+
+  }
+
+pomdp_overestimate <-
+  function (transition,
+            model_observation,
+            reward,
+            discount,
+            true_observation,
+            x0,
+            a0 = 1,
+            Tmax = 20,
+            alpha)
+  {
+    observation <- model_observation
+
+    stopifnot(identical(dim(true_observation), dim(model_observation)))
+
+    n_states <- dim(observation)[1]
+    n_obs <- dim(observation)[2]
+    value <- obs <- action <- state <- numeric(Tmax + 1)
+    state_posterior <- array(NA, dim = c(Tmax + 1, n_states))
+    state_prior <- rep(1, n_states)/n_states # assume unif prior
+
+    state[2] <- x0
+    action[1] <- a0
+    state_posterior[2, ] <- state_prior
+
+    for (t in 2:Tmax) {
+      ## Policy based on model
+      out <- sarsop::compute_policy(alpha,
+                                    transition,
+                                    model_observation,
+                                    reward,
+                                    state_posterior[t, ],
+                                    action[t - 1])
+
+      ## Simulation based on True
+      obs[t] <- sample(1:n_obs,
+                       1,
+                       prob =
+                         true_observation[state[t],, action[t - 1]])
+
+      action[t] <- out$policy[obs[t]]
+      value[t] <- reward[state[t], action[t]] * discount^(t - 1)
+      state[t + 1] <- sample(1:n_states,
+                             1,
+                             prob = transition[state[t], , action[t]])
+
+      ## Update belief based on model
+      state_posterior[t + 1, ] <-
+        sarsop:::update_belief(state_posterior[t,],
+                               transition,
+                               model_observation,
+                               obs[t],
+                               action[t - 1])
+    }
+
+    ## Collect results
+    df <- data.frame(time = 0:Tmax,
+                     state,
+                     obs,
+                     action,
+                     value)[2:Tmax,]
+    list(df = df,
+         state_posterior = state_posterior[2:(Tmax + 1), ])
+  }