Skip to content

Commit

Permalink
Fix bug in Issue #49
Browse files Browse the repository at this point in the history
Also fix a bug that becomes apparent after fixing the original bug:
Not all partitions in the bootstrap replication are necessarily be filled, yielding NaN values when the statistic is calculated. Add data validation to ensure only calculating statistic on partitions that contain values.
  • Loading branch information
MeganFantes committed Jun 7, 2019
1 parent a94cd77 commit bfc6b85
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions R/mechanism-bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,29 @@
#' @param sensitivity Sensitivity of the function
#' @param epsilon Numeric differential privacy parameter
#' @param fun Function to evaluate
#' @param inputObject the Bootstrap mechanism object on which the input function will be evaluated
#' @return Value of the function applied to one bootstrap sample
#' @import stats
#' @export

bootstrap.replication <- function(x, n, sensitivity, epsilon, fun) {
bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) {
partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n))
max.appearances <- max(partition)
probs <- sapply(1:max.appearances, dbinom, size=n, prob=(1 / n))
stat.partitions <- vector('list', max.appearances)
for (i in 1:max.appearances) {
variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon)
stat.i <- fun(x[partition == i])
noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian')
stat.partitions[[i]] <- i * stat.i + noise.i
# make a sorted vector of the partitions of the data
# because it is not guaranteed that every partition from 1:max.appearances will have a value in it
# so we need to loop through only the partitions that have data
validPartitions <- sort(unique(partition[,1]))
# we do not want the 0 partition, so we remove it from the list
validPartitions <- validPartitions[2:length(validPartitions)]
# print the unique values of the partition, to track which entries may result in NaN
print(validPartitions)
probs <- sapply(1:length(validPartitions), dbinom, size=n, prob=(1 / n))
stat.partitions <- vector('list', length(validPartitions))
for (i in 1:length(validPartitions)) {
currentPartition <- validPartitions[i]
variance.currentPartition <- (currentPartition * probs[i] * (sensitivity^2)) / (2 * epsilon)
stat.currentPartition <- inputObject$bootStatEval(x[partition == currentPartition], fun, ...)
noise.currentPartition <- dpNoise(n=length(stat.currentPartition), scale=sqrt(variance.currentPartition), dist='gaussian')
stat.partitions[[i]] <- currentPartition * stat.currentPartition + noise.currentPartition
}
stat.out <- do.call(rbind, stat.partitions)
return(apply(stat.out, 2, sum))
Expand All @@ -39,10 +48,10 @@ mechanismBootstrap <- setRefClass(
)

mechanismBootstrap$methods(
bootStatEval = function(xi) {
bootStatEval = function(xi, fun, ...) {
fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
input.vals = c(list(x=x), fun.args)
stat <- do.call(boot.fun, input.vals)
input.vals = c(list(x=xi), fun.args)
stat <- do.call(fun, input.vals)
return(stat)
})

Expand All @@ -58,11 +67,11 @@ mechanismBootstrap$methods(
})

mechanismBootstrap$methods(
evaluate = function(fun, x, sens, postFun) {
evaluate = function(fun, x, sens, postFun, ...) {
x <- censordata(x, .self$var.type, .self$rng)
x <- fillMissing(x, .self$var.type, .self$impute.rng[0], .self$impute.rng[1])
epsilon.part <- epsilon / .self$n.boot
release <- replicate(.self$n.boot, bootstrap.replication(x, n, sens, epsilon.part, fun=.self$bootStatEval))
release <- replicate(.self$n.boot, bootstrap.replication(x, n, sens, epsilon.part, fun=fun, inputObject = .self, ...))
std.error <- .self$bootSE(release, .self$n.boot, sens)
out <- list('release' = release, 'std.error' = std.error)
out <- postFun(out)
Expand Down

0 comments on commit bfc6b85

Please sign in to comment.