From 9cca11f90f98799a89efe90a734a0af469cf220f Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Tue, 4 Jun 2019 17:22:56 -0400 Subject: [PATCH 01/18] added .Rproj to the gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b9240e7..8b307e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .Rproj.user +.Rproj .Rhistory .RData .Ruserdata vignettes/*.html -vignettes/*.pdf \ No newline at end of file +vignettes/*.pdf From 74a3b3837b4db7b335137688bac9b0edb762915b Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Tue, 4 Jun 2019 17:29:14 -0400 Subject: [PATCH 02/18] added * to paths in gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 8b307e4..c4438a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -.Rproj.user -.Rproj +*.Rproj.user +*.Rproj .Rhistory .RData .Ruserdata From 9b8a1ef43626de458090dd4ad11abc2cb6334e7a Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Tue, 4 Jun 2019 17:31:25 -0400 Subject: [PATCH 03/18] setting up PSIlence on my computer --- .Rbuildignore | 2 + .Rhistory | 512 --------------------------------------- NAMESPACE | 2 +- R/mechanism-laplace.R | 1 - man/Laplace-Mechanism.Rd | 27 --- vignettes/dp-mean.Rmd | 10 +- 6 files changed, 9 insertions(+), 545 deletions(-) delete mode 100644 .Rhistory delete mode 100644 man/Laplace-Mechanism.Rd diff --git a/.Rbuildignore b/.Rbuildignore index c68bc5a..48935ad 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ images README.md README_files .DS_Store +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.Rhistory b/.Rhistory deleted file mode 100644 index 98963e9..0000000 --- a/.Rhistory +++ /dev/null @@ -1,512 +0,0 @@ -population_minimum <- min(population) -population_maximum <- max(population) -population_range <- range(population) -population_max_difference <- diff(range(population)) -print(summary(population)) -growth_rate <- 1.08 -print(population * growth_rate) -growth_rates <- c(1.03, 1.05, 1.02, 1.10, 1.05) -print(population * growth_rates) -first_element <- population[1] -first_three_elements <- population[1:3] -last_element <- population[length(population)] -large_population_indicator <- population > 6 -print(large_population_indicator) -print(state[large_population_indicator]) -data <- list( -"population" = population, -"state" = state, -"total_population" = population_total, -"average_population" = population_average -) -print(data) -print(data$total_population) -print(data[["total_population"]]) -print(data[[3]]) -population_data <- data.frame(state, population, growth_rates) -print(population_data) -print(population_data$population) -print(dim(population_data)) -print(nrow(population_data)) -print(ncol(population_data)) -population_data$counter <- 1:nrow(population_data) -population_data$population_log <- log(population_data$population) -population_data$population_t2 <- population_data$population * population_data$growth_rates -print(population_data) -height <- c(71, 64, 68, 73, 61, 75, 65, 67, 68, 70) -n <- length(height) -height_mean <- sum(height) / n # mean(height) -height_variance <- sum((height - height_mean)^2) / (n - 1) # var(height) -snake_case <- 1 -period.separation <- 2 -lowerCamelCase <- 3 -salaries <- read.table('../data/salary.dat', header=TRUE) -print(head(salaries, 10)) -print(summary(salaries)) -print(salaries[10, ]) -print(salaries[6:10, ]) -female_indicator <- salaries$sx == 'female' -female_faculty <- salaries[female_indicator, ] -print(sum(female_indicator)) # this matches what we saw in the summary -male_faculty <- salaries[!female_indicator, ] # should have 38 rows -salary2 <- salaries[, c('sx', 'rk', 'sl')] -print(dim(salary2)) -salary_female <- salaries[female_indicator, c('rk', 'sl')] -print(dim(salary_female)) -salary_female <- salaries[female_indicator, 'sl'] -salary_male <- salaries[!female_indicator, 'sl'] -summary_female <- summary(salary_female) -summary_male <- summary(salary_male) -data.frame(rbind(summary_female, summary_male)) -census <- read.csv('../data/PUMS5extract10000.csv') -print(dim(census)) -print(head(census)) -for (i in 1:5) { -cat('Number:', i, ' Square:', i^2, '\n') -} -length(unique(census$puma)) -income_by_puma <- c() # empty vector -for (puma_code in unique(census$puma)) { -code_indicator <- census$puma == puma_code # boolean vector -mean_this_code <- mean(census[code_indicator, 'income']) # mean on subset -income_by_puma <- c(income_by_puma, mean_this_code) # concatenate -} -print(summary(income_by_puma)) -odds <- c() -for (i in 1:10) { -if (i %% 2 != 0) { # note: modulo operator yields remainder -odds <- c(odds, i) # concatenate -} -} -print(odds) -evens <- c() -odds <- c() -for (i in 1:10) { -if (i %% 2 != 0) { -odds <- c(odds, i) -} else { -evens <- c(evens, i) -} -} -print(odds) -print(evens) -education <- c() -for (puma_code in unique(census$puma)) { -code_indicator <- census$puma == puma_code -puma_subset <- census[code_indicator, ] -if (nrow(puma_subset) >= 30 && mean(puma_subset$income) > 40000) { -education <- c(education, mean(puma_subset$educ)) -} -} -print(length(education)) -print(summary(education)) -vector_summary <- function(vec) { -n <- length(vec) -vector_mean <- mean(vec) -vector_sd <- sd(vec) -return(list( -'n' = n, -'mean' = vector_mean, -'stdev' = vector_sd)) -} -print(vector_summary(census$income)) -print(vector_summary(census$income)$stdev) -print(cor(census$educ, census$income)) -print(cor(census[, c('age', 'educ', 'income')])) -print(t.test(income ~ sex, data=census)) -linear_model <- lm(income ~ age + sex + educ + married + latino + black + asian, data=census) -linear_model$coefficients -coefficients(linear_model) -summary(linear_model)[[2]] -summary(linear_model)[[1]] -summary(linear_model)[[3]] -summary(linear_model) -summary(linear_model)[[4]] -o -logit <- glm(form, data=data, family=binomial) -form <- as.formula('y ~ x1 + x2 + x3 + x4') -logit <- glm(form, data=data, family=binomial) -p <- 1 / (1 + exp(-z)) -set.seed(84723) -n <- 1000 -x1 <- rnorm(n) -x2 <- rnorm(n) -x3 <- rnorm(n) -x4 <- rnorm(n) -z <- -0.3 + (-1.6 * x1) + (0.3 * x2) + (0.15 * x3) + (0.75 * x4) -p <- 1 / (1 + exp(-z)) -y <- rbinom(n, 1, p) -data <- data.frame(y, x1, x2, x3, x4) -form <- as.formula('y ~ x1 + x2 + x3 + x4') -logit <- glm(form, data=data, family=binomial) -print(summary(logit)$coefficients) -X <- cbind(1, x1, x2, x3, x4) -scaler <- mapMatrixUnit(X, p=2) -#source('../PSI-Library/R/mechanisms.R') -source('../PSI-Library/R/utilities.R') -X <- cbind(1, x1, x2, x3, x4) -scaler <- mapMatrixUnit(X, p=2) -X <- scaler$matrix -X <- cbind(1, x1, x2, x3, x4) -scaler <- mapMatrixUnit(X, p=2) -X <- scaler$matrix -objective <- function(theta, X, y, b, n, Delta) { -xb <- as.matrix(X) %*% as.matrix(theta) -p <- as.numeric(1 / (1 + exp(-xb))) -noise <- (b %*% as.matrix(theta)) / n -llik <- sum(y * log(p) + ((1 - y) * log(1 - p))) / n -penalty <- 0.5 * Delta * sum(theta^2) -llik.noisy <- noise + llik + penalty -return(-llik.noisy) -} -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -# regularizer -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = u -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -# draw noise vector b -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -# fit -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -return(release) -} -check <- mechanism(objective, X, y, n, epsilon=0.5, c-0.25, Lambda=0.001) -check <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001) -check <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001) -traceback() -check <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001) -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -# regularizer -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -# draw noise vector b -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -# fit -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -return(release) -} -check <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001) -print(check / scaler$max.norm) -mechanism <- function(fun, X, y, n, epsilon, c, Lambda) { -# regularizer -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -# draw noise vector b -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -# fit -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -return(release) -} -X <- cbind(1, x1, x2, x3, x4) -scaler <- mapMatrixUnit(X, p=2) -X <- scaler$matrix -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001) / scaler$max.norm -print(estimates) -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -if (is.null(n.boot)) { -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -} else { -epsilon <- epsilon / n.boot -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -boot.ests <- vector('list', n.boot) -for (i in 1:n.boot) { -index <- sample(1:n, n, replace=TRUE) -X.star <- X[index, ] -y.star <- y[index, ] -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -boot.ests[[i]] <- optim(par=start.params), fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -if (is.null(n.boot)) { -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -} else { -epsilon <- epsilon / n.boot -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -boot.ests <- vector('list', n.boot) -for (i in 1:n.boot) { -index <- sample(1:n, n, replace=TRUE) -X.star <- X[index, ] -y.star <- y[index, ] -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -boot.ests[[i]] <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -} -release <- data.frame(do.call(rbind, boot.ests)) -} -return(release) -} -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -y -#estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -#print(estimates) -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -if (is.null(n.boot)) { -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -start.params <- rep(0, ncol(X)) -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -} else { -epsilon <- epsilon / n.boot -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -boot.ests <- vector('list', n.boot) -for (i in 1:n.boot) { -index <- sample(1:n, n, replace=TRUE) -X.star <- X[index, ] -y.star <- y[index] -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -boot.ests[[i]] <- optim(par=start.params, fn=fun, X=X.star, y=y.star, b=b, n=n, Delta=Delta)$par -} -release <- data.frame(do.call(rbind, boot.ests)) -} -return(release) -} -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -mechanism <- function(fun, X, y, n, epsilon, c, Lambda, n.boot=NULL) { -start.params <- rep(0, ncol(X)) -if (is.null(n.boot)) { -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -release <- optim(par=start.params, fn=fun, X=X, y=y, b=b, n=n, Delta=Delta)$par -} else { -epsilon <- epsilon / n.boot -epsilon.prime <- epsilon - log(1 + (((2 * c) / (n * Lambda)) + (c^2 / (n^2 * Lambda^2)))) -if (epsilon.prime > 0) { -Delta = 0 -} else { -Delta = c / (n * (exp(epsilon.prime / 4) - 1)) -} -epsilon.prime = epsilon / 2 -boot.ests <- vector('list', n.boot) -for (i in 1:n.boot) { -index <- sample(1:n, n, replace=TRUE) -X.star <- X[index, ] -y.star <- y[index] -b.norm <- dpNoise(n=1, scale=(2 / epsilon.prime), dist='gamma', shape=length(start.params)) -b <- dpNoise(n=length(start.params), scale=(-(epsilon.prime / 2) * b.norm), dist='laplace') -boot.ests[[i]] <- optim(par=start.params, fn=fun, X=X.star, y=y.star, b=b, n=n, Delta=Delta)$par -} -release <- data.frame(do.call(rbind, boot.ests)) -} -return(release) -} -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -print(estimates) -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -print(apply(estimates, 2, mean)) -print(apply(estimates, 2, sd)) -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -print(apply(estimates, 2, mean)) -print(apply(estimates, 2, sd)) -estimates <- mechanism(objective, X, y, n, epsilon=0.5, c=0.25, Lambda=0.001, n.boot=20) / scaler$max.norm -est <- apply(estimates, 2, mean) -se <- apply(estimates, 2, sd) -out <- data.frame(est, se) -names(out) <- c('Estimate', 'Std. Error') -print(out) -install.packages("haven") -install.packages(c("apaTables", "assertthat", "backports", "bayesplot", "BH", "boot", "car", "caret", "checkmate", "cluster", "colorspace", "colourpicker", "crayon", "curl", "data.table", "DBI", "devtools", "digest", "dplyr", "dygraphs", "evaluate", "flexmix", "foreign", "formatR", "Formula", "gdata", "git2r", "glmnet", "gmp", "gridExtra", "Hmisc", "hms", "htmltools", "htmlwidgets", "httpuv", "httr", "knitr", "lattice", "lazyeval", "lme4", "lmtest", "loo", "markdown", "MASS", "MatchIt", "Matrix", "matrixStats", "MBESS", "mcmc", "MCMCpack", "memoise", "mgcv", "mvtnorm", "nlme", "OpenMx", "pbkrtest", "PKI", "quantreg", "R6", "Rcpp", "RcppArmadillo", "RcppEigen", "rmarkdown", "Rmpfr", "rockchalk", "roxygen2", "rpart", "rpf", "rprojroot", "rsconnect", "rstan", "rstanarm", "rstantools", "rstudioapi", "Rvmmin", "sandwich", "scales", "scatterplot3d", "sem", "shiny", "shinyjs", "shinystan", "sourcetools", "SparseM", "StanHeaders", "stringi", "stringr", "survey", "survival", "threejs", "tibble", "tidyr", "viridis", "withr", "xts", "Zelig", "zoo")) -install.packages(c("Matrix", "mgcv")) -library(PSIlence) -devtools::install() -install.packages('devtools') -getwd() -setwd("~/Dropbox/iqss/differential_privacy/PSI-Library/) -"" -"") -setwd("~/Dropbox/iqss/differential_privacy/PSI-Library/") -getwd() -devtools::install() -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -model <- dpGLM$new(mechanism='mechanismObjective', var.type='numeric', n=n, epsilon=epsilon, form=form, rng=rng) -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -model <- dpGLM$new(mechanism='mechanismObjective', var.type='numeric', n=n, epsilon=epsilon, formula=form, rng=rng) -rng -help("dpGLM") -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -model <- dpGLM$new(mechanism='mechanismObjective', var.type='numeric', n=n, epsilon=epsilon, -formula=form, rng=rng, objective='ols') -model$release(PUMS5extract10000) -print(model$result) -help("mechanism") -knitr::opts_knit$set( -stop_on_error = 2L -) -knitr::opts_chunk$set( -fig.height = 7, -fig.width = 7 -) -n.boot <- 50 -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -mechanism <- 'mechanismObjective' -var.type <- 'numeric' -objective <- 'ols' -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 50 -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 20 -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 20 -form <- 'sex ~ income + educ' -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective='logit', n.boot=n.boot) -model$release(PUMS5extract10000) -form -model -epsilon -rng -n.boot <- 20 -form <- 'sex ~ income + educ' -rng <- matrix(c(0, 1, 0, 200000, 1, 16), ncol=2, byrow=TRUE) -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective='logit', n.boot=n.boot) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 20 -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -model$release(PUMS5extract10000) -print(model$result) -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -mechanism <- 'mechanismObjective' -var.type <- 'numeric' -objective <- 'ols' -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 20 -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -model$release(PUMS5extract10000) -print(model$result) -library(PSIlence) -data(PUMS5extract10000) -n <- 10000 -epsilon <- 0.5 -form <- 'income ~ educ + sex' -rng <- matrix(c(0, 200000, 1, 16, 0, 1), ncol=2, byrow=TRUE) -mechanism <- 'mechanismObjective' -var.type <- 'numeric' -objective <- 'ols' -model <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective) -model$release(PUMS5extract10000) -print(model$result) -n.boot <- 20 -model.boot <- dpGLM$new(mechanism=mechanism, var.type=var.type, n=n, epsilon=epsilon, -formula=form, rng=rng, objective=objective, n.boot=n.boot) -model.boot$release(PUMS5extract10000) -print(model.boot$result) -devtools::build() -devtools::install() -q() diff --git a/NAMESPACE b/NAMESPACE index 9b1f41f..95ffd9b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -export("Laplace Mechanism") +export() export(bootstrap.replication) export(censordata) export(check_histogram_mechanism) diff --git a/R/mechanism-laplace.R b/R/mechanism-laplace.R index d3f4b21..94221da 100644 --- a/R/mechanism-laplace.R +++ b/R/mechanism-laplace.R @@ -17,7 +17,6 @@ mechanismLaplace$methods( #' Differentially private evaluation of input function "fun" with sensitivity "sens" on input data #' "x" using the Laplace mechanism. #' - #' @name Laplace Mechanism #' @references C. Dwork, A. Roth The Algorithmic Foundations of Differential Privacy, Chapter 3.3 The Laplace Mechanism p.30-37. August 2014. #' #' @param fun function of input x to add Laplace noise to. diff --git a/man/Laplace-Mechanism.Rd b/man/Laplace-Mechanism.Rd deleted file mode 100644 index 531d9e6..0000000 --- a/man/Laplace-Mechanism.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mechanism-laplace.R -\name{Laplace Mechanism} -\alias{Laplace Mechanism} -\title{Laplace Mechanism} -\arguments{ -\item{fun}{function of input x to add Laplace noise to.} - -\item{x}{input that function fun will be evaluated on.} - -\item{sens}{sensitivity of fun. Sensitivity is defined in above citation.} - -\item{postFun}{post-processing function. Takes differentially private release as input -and returns some form of output in principal based on the differentially private release.} - -\item{...}{any additional (optional) parameters} -} -\value{ -result of post-processing on input function "fun" evaluated on database "x", assuming sensitivity of fun is "sens". -} -\description{ -Differentially private evaluation of input function "fun" with sensitivity "sens" on input data -"x" using the Laplace mechanism. -} -\references{ -C. Dwork, A. Roth The Algorithmic Foundations of Differential Privacy, Chapter 3.3 The Laplace Mechanism p.30-37. August 2014. -} diff --git a/vignettes/dp-mean.Rmd b/vignettes/dp-mean.Rmd index bce171b..7790842 100644 --- a/vignettes/dp-mean.Rmd +++ b/vignettes/dp-mean.Rmd @@ -37,16 +37,18 @@ Syntax ```{r, eval = FALSE} x1 <- c(3, 12, 20, 42, 33, 65, 70, 54, 33, 45) -x2 <- c(TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE) +x2 <- c(TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE) data <- data.frame(x1, x2) dp.mean <- dpMean$new(mechanism='mechanismLaplace', var.type='numeric', - variable='x1', epsilon=0.1, n=10, rng=c(0, 70)) -dp.mean$release(data) + variable='x1', epsilon=10, n=10, rng=c(0, 70)) +out1 <- dp.mean$release(data) +print(out1) dp.mean2 <- dpMean$new(mechanism='mechanismLaplace', var.type='logical', variable='x2', epsilon=0.1, n=10, rng=c(0, 1)) -dp.mean2$release(data) +out2 <- dp.mean2$release(data) +print(out2) ``` Arguments From a94cd773346029e47e9c17a8974cb59a1196aa31 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Wed, 5 Jun 2019 12:34:38 -0400 Subject: [PATCH 04/18] modified gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c4438a2..656bf21 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .Ruserdata vignettes/*.html vignettes/*.pdf +.Rproj.user From bfc6b8538a3b240659c7ecc36739ba58e0ae341e Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Thu, 6 Jun 2019 22:57:51 -0400 Subject: [PATCH 05/18] Fix bug in Issue #49 Also fix a bug that becomes apparent after fixing the original bug: Not all partitions in the bootstrap replication are necessarily be filled, yielding NaN values when the statistic is calculated. Add data validation to ensure only calculating statistic on partitions that contain values. --- R/mechanism-bootstrap.R | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 3fb36b0..35c2363 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -5,20 +5,29 @@ #' @param sensitivity Sensitivity of the function #' @param epsilon Numeric differential privacy parameter #' @param fun Function to evaluate +#' @param inputObject the Bootstrap mechanism object on which the input function will be evaluated #' @return Value of the function applied to one bootstrap sample #' @import stats #' @export -bootstrap.replication <- function(x, n, sensitivity, epsilon, fun) { +bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) - max.appearances <- max(partition) - probs <- sapply(1:max.appearances, dbinom, size=n, prob=(1 / n)) - stat.partitions <- vector('list', max.appearances) - for (i in 1:max.appearances) { - variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon) - stat.i <- fun(x[partition == i]) - noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian') - stat.partitions[[i]] <- i * stat.i + noise.i + # make a sorted vector of the partitions of the data + # because it is not guaranteed that every partition from 1:max.appearances will have a value in it + # so we need to loop through only the partitions that have data + validPartitions <- sort(unique(partition[,1])) + # we do not want the 0 partition, so we remove it from the list + validPartitions <- validPartitions[2:length(validPartitions)] + # print the unique values of the partition, to track which entries may result in NaN + print(validPartitions) + probs <- sapply(1:length(validPartitions), dbinom, size=n, prob=(1 / n)) + stat.partitions <- vector('list', length(validPartitions)) + for (i in 1:length(validPartitions)) { + currentPartition <- validPartitions[i] + variance.currentPartition <- (currentPartition * probs[i] * (sensitivity^2)) / (2 * epsilon) + stat.currentPartition <- inputObject$bootStatEval(x[partition == currentPartition], fun, ...) + noise.currentPartition <- dpNoise(n=length(stat.currentPartition), scale=sqrt(variance.currentPartition), dist='gaussian') + stat.partitions[[i]] <- currentPartition * stat.currentPartition + noise.currentPartition } stat.out <- do.call(rbind, stat.partitions) return(apply(stat.out, 2, sum)) @@ -39,10 +48,10 @@ mechanismBootstrap <- setRefClass( ) mechanismBootstrap$methods( - bootStatEval = function(xi) { + bootStatEval = function(xi, fun, ...) { fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self) - input.vals = c(list(x=x), fun.args) - stat <- do.call(boot.fun, input.vals) + input.vals = c(list(x=xi), fun.args) + stat <- do.call(fun, input.vals) return(stat) }) @@ -58,11 +67,11 @@ mechanismBootstrap$methods( }) mechanismBootstrap$methods( - evaluate = function(fun, x, sens, postFun) { + evaluate = function(fun, x, sens, postFun, ...) { x <- censordata(x, .self$var.type, .self$rng) x <- fillMissing(x, .self$var.type, .self$impute.rng[0], .self$impute.rng[1]) epsilon.part <- epsilon / .self$n.boot - release <- replicate(.self$n.boot, bootstrap.replication(x, n, sens, epsilon.part, fun=.self$bootStatEval)) + release <- replicate(.self$n.boot, bootstrap.replication(x, n, sens, epsilon.part, fun=fun, inputObject = .self, ...)) std.error <- .self$bootSE(release, .self$n.boot, sens) out <- list('release' = release, 'std.error' = std.error) out <- postFun(out) From 58455821a50e6c6f445248ca9aeaaa1123087363 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Fri, 7 Jun 2019 10:20:38 -0400 Subject: [PATCH 06/18] Add 2 options to handle empty partitions in Issue #49 Will discuss with Ira which option is best --- R/mechanism-bootstrap.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 35c2363..ed8cf58 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -10,6 +10,10 @@ #' @import stats #' @export +# There are 2 options for handling empty partitions: + +# 1: skip it entirely, and say the total number of partitions is just the number of partitions that are not empty + bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) # make a sorted vector of the partitions of the data @@ -33,6 +37,34 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, return(apply(stat.out, 2, sum)) } +# 2: treat it as a partition with a mean of 0 and keep it in the calculation, adding noise and adding it to the final calculation + +# bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { +# partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) +# # make a sorted vector of the partitions of the data +# # because it is not guaranteed that every partition from 1:max.appearances will have a value in it +# validPartitions <- validPartitions <- sort(unique(partition[,1])) +# # print the unique values of the partition, to track which entries may result in NaN +# print(validPartitions) +# max.appearances <- max(partition) +# probs <- sapply(1:max.appearances, dbinom, size=n, prob=(1 / n)) +# stat.partitions <- vector('list', max.appearances) +# for (i in 1:max.appearances) { +# variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon) +# if (i %in% validPartitions) { +# stat.i <- fun(x[partition == i]) +# noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian') +# stat.partitions[[i]] <- i * stat.i + noise.i +# } else { +# stat.i <- 0 +# noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian') +# stat.partitions[[i]] <- i * stat.i + noise.i +# } +# } +# stat.out <- do.call(rbind, stat.partitions) +# return(apply(stat.out, 2, sum)) +# } + #' Bootstrap mechanism #' From 32d61d53e0fc40980c7e24cb836e60cf9dfba33f Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Fri, 7 Jun 2019 14:12:57 -0400 Subject: [PATCH 07/18] Change the return statement of bootstrap.replication to return the mean of the partition means (instead of the sum) --- R/mechanism-bootstrap.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index ed8cf58..7bff952 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -34,7 +34,10 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, stat.partitions[[i]] <- currentPartition * stat.currentPartition + noise.currentPartition } stat.out <- do.call(rbind, stat.partitions) - return(apply(stat.out, 2, sum)) + # return(apply(stat.out, 2, sum)) + # returnedBootstrappedResult = apply(stat.out, 2, sum) + returnedBootstrappedResult = apply(X = stat.out, MARGIN = 2, FUN = fun) + return(returnedBootstrappedResult) } # 2: treat it as a partition with a mean of 0 and keep it in the calculation, adding noise and adding it to the final calculation @@ -52,7 +55,7 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, # for (i in 1:max.appearances) { # variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon) # if (i %in% validPartitions) { -# stat.i <- fun(x[partition == i]) +# stat.i <- inputObject$bootStatEval(x[partition == currentPartition], fun, ...) # noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian') # stat.partitions[[i]] <- i * stat.i + noise.i # } else { From 8f11bfb73512176e4cc1229169319ace14921281 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Fri, 7 Jun 2019 14:18:38 -0400 Subject: [PATCH 08/18] fix assignment operator from = to <- --- R/mechanism-bootstrap.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 7bff952..3439c99 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -35,8 +35,8 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, } stat.out <- do.call(rbind, stat.partitions) # return(apply(stat.out, 2, sum)) - # returnedBootstrappedResult = apply(stat.out, 2, sum) - returnedBootstrappedResult = apply(X = stat.out, MARGIN = 2, FUN = fun) + # returnedBootstrappedResult <- apply(stat.out, 2, sum) + returnedBootstrappedResult <- apply(X = stat.out, MARGIN = 2, FUN = fun) return(returnedBootstrappedResult) } From 74b99fe0e37cf24b5638b2d5b70372d346e3fb07 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Fri, 7 Jun 2019 14:47:53 -0400 Subject: [PATCH 09/18] create test file for Issue #49 --- tests/testthat/test-bootstrap.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/testthat/test-bootstrap.R diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R new file mode 100644 index 0000000..7be4294 --- /dev/null +++ b/tests/testthat/test-bootstrap.R @@ -0,0 +1,15 @@ +library(PSIlence) +context("bootstrap") + +test_that('bootstrap did not run, then thre NaNs, now produces result that is way too big', { + data(PUMS5extract10000, package = "PSIlence") + + n.boot <- 25 + boot_mean <- dpMean$new(mechanism='mechanismBootstrap', var.type='numeric', + variable='income', n=10000, epsilon=0.1, rng=c(0, 750000), + n.boot=n.boot) + boot_mean$release(PUMS5extract10000) + print(boot_mean$result) + + print(mean(boot_mean$result$release)) +}) \ No newline at end of file From e2fb65caba5b9fc4452a0b0c257684f331f948d7 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Fri, 7 Jun 2019 17:43:27 -0400 Subject: [PATCH 10/18] Change function applied to partitions in bootstrap.replication to mean instead of sum. This addresses the huge standard error that results from bootstrapping. This was another bug found after fixing Issue #49. --- R/mechanism-bootstrap.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 3439c99..081bb93 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -65,7 +65,7 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, # } # } # stat.out <- do.call(rbind, stat.partitions) -# return(apply(stat.out, 2, sum)) +# return(apply(stat.out, 2, mean)) # } From 173450c307a69e813beaa7acc839f3328b2a5176 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Thu, 13 Jun 2019 13:33:39 -0400 Subject: [PATCH 11/18] change function in apply() in bootstrap.replication back to sum --- R/mechanism-bootstrap.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 081bb93..de686c9 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -35,8 +35,7 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, } stat.out <- do.call(rbind, stat.partitions) # return(apply(stat.out, 2, sum)) - # returnedBootstrappedResult <- apply(stat.out, 2, sum) - returnedBootstrappedResult <- apply(X = stat.out, MARGIN = 2, FUN = fun) + returnedBootstrappedResult <- apply(stat.out, 2, sum) return(returnedBootstrappedResult) } From e8b448c74cd551b0013fbc40f40c28986ea80c00 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Thu, 13 Jun 2019 16:55:58 -0400 Subject: [PATCH 12/18] Change function in bootStatEval to boot.fun (this was the reason for the HUGE standard error on the differentially private bootstrapped mean.) --- R/mechanism-bootstrap.R | 4 ++-- tests/testthat/test-bootstrap.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index de686c9..c67eae2 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -83,9 +83,9 @@ mechanismBootstrap <- setRefClass( mechanismBootstrap$methods( bootStatEval = function(xi, fun, ...) { - fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self) + fun.args <- getFuncArgs(boot.fun, inputList=list(...), inputObject=.self) input.vals = c(list(x=xi), fun.args) - stat <- do.call(fun, input.vals) + stat <- do.call(boot.fun, input.vals) return(stat) }) diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R index 7be4294..f4958c0 100644 --- a/tests/testthat/test-bootstrap.R +++ b/tests/testthat/test-bootstrap.R @@ -12,4 +12,4 @@ test_that('bootstrap did not run, then thre NaNs, now produces result that is wa print(boot_mean$result) print(mean(boot_mean$result$release)) -}) \ No newline at end of file +}) From 0fb770761f5b262ca4f94ce9b003e257ad60ff55 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Mon, 8 Jul 2019 11:03:16 -0400 Subject: [PATCH 13/18] remove lines mistakenly added to .Rbuildignore --- .Rbuildignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 48935ad..c68bc5a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,5 +3,3 @@ images README.md README_files .DS_Store -^.*\.Rproj$ -^\.Rproj\.user$ From de4a8b6d49ff145e1b2018c2dd71a1ac3010d7db Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Mon, 8 Jul 2019 11:05:09 -0400 Subject: [PATCH 14/18] remove line mistakenly added to .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 656bf21..3589ba9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -*.Rproj.user *.Rproj .Rhistory .RData From c7301c3e7b0c53078cc4917f41f20adbb207a4d2 Mon Sep 17 00:00:00 2001 From: Megan Fantes Date: Mon, 8 Jul 2019 11:16:26 -0400 Subject: [PATCH 15/18] update comments --- .Rbuildignore | 2 ++ R/mechanism-bootstrap.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index c68bc5a..48935ad 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ images README.md README_files .DS_Store +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index c67eae2..0924a18 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -39,7 +39,7 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, return(returnedBootstrappedResult) } -# 2: treat it as a partition with a mean of 0 and keep it in the calculation, adding noise and adding it to the final calculation +# 2: treat it as a partition with a statistic of value 0 and keep it in the calculation, adding noise and adding it to the final calculation # bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { # partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) From f8a06e94c31ae76f2bf3ae8ab5c6be0e6962b898 Mon Sep 17 00:00:00 2001 From: Ira Globus-Harris Date: Tue, 1 Oct 2019 13:57:25 -0400 Subject: [PATCH 16/18] Updated mean vignette and NAMESPACE --- NAMESPACE | 12 ------------ vignettes/dp-mean.Rmd | 5 ----- 2 files changed, 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9450f68..cad9640 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,23 +1,11 @@ # Generated by roxygen2: do not edit by hand -<<<<<<< HEAD -export() export(bootstrap.replication) -export(censordata) -export(check_histogram_mechanism) -export(check_variable_type) -export(checkepsilon) -export(checkrange) -export(coefficient.release) -export(dlap) -======= -export(bootstrapReplication) export(censorData) export(checkEpsilon) export(checkRange) export(coefficientRelease) export(dLap) ->>>>>>> 83ca45d09aad2da93b57163f48978eec43dda0c0 export(dpCovariance) export(dpGLM) export(dpHeavyHitters) diff --git a/vignettes/dp-mean.Rmd b/vignettes/dp-mean.Rmd index 55a5bc8..1ec1075 100644 --- a/vignettes/dp-mean.Rmd +++ b/vignettes/dp-mean.Rmd @@ -46,12 +46,7 @@ dpMeanExample$release(data) dpMeanExample2 <- dpMean$new(mechanism='mechanismLaplace', varType='logical', variable='x2', epsilon=0.1, n=10, rng=c(0, 1)) -<<<<<<< HEAD -out2 <- dp.mean2$release(data) -print(out2) -======= dpMeanExample2$release(data) ->>>>>>> 83ca45d09aad2da93b57163f48978eec43dda0c0 ``` Arguments From 90bcb8b16ca445232cb5a63625024fc52f044a34 Mon Sep 17 00:00:00 2001 From: Ira Globus-Harris Date: Tue, 1 Oct 2019 15:31:08 -0400 Subject: [PATCH 17/18] Fixed post-upstream merge bugs --- .gitignore | 1 + R/mechanism-bootstrap.R | 15 +++++++-------- R/mechanism-laplace.R | 4 ++-- R/statistic-mean.R | 8 +++++++- tests/testthat/test-bootstrap.R | 6 ++---- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 3d66525..74816fd 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ vignettes/*.pdf *.aux *.log *.synctex.gz +.Rproj.user diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index 49805c3..d7df1ce 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -13,7 +13,7 @@ # 1: skip it entirely, and say the total number of partitions is just the number of partitions that are not empty -bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { +bootstrapReplication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) # make a sorted vector of the partitions of the data # because it is not guaranteed that every partition from 1:max.appearances will have a value in it @@ -40,7 +40,7 @@ bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, # 2: treat it as a partition with a statistic of value 0 and keep it in the calculation, adding noise and adding it to the final calculation -# bootstrap.replication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { +# bootstrapReplication <- function(x, n, sensitivity, epsilon, fun, inputObject, ...) { # partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n)) # # make a sorted vector of the partitions of the data # # because it is not guaranteed that every partition from 1:max.appearances will have a value in it @@ -81,10 +81,9 @@ mechanismBootstrap <- setRefClass( ) mechanismBootstrap$methods( - - bootStatEval = function(xi,...) { + bootStatEval = function(xi, fun,...) { funArgs <- getFuncArgs(fun, inputList=list(...), inputObject=.self) - inputVals = c(list(x=x), funArgs) + inputVals = c(list(x=xi), funArgs) stat <- do.call(bootFun, inputVals) return(stat) }) @@ -101,11 +100,11 @@ mechanismBootstrap$methods( }) mechanismBootstrap$methods( - evaluate = function(fun, x, sens, postFun) { - x <- censorData(x, .self$varType, .self$rng) + evaluate = function(fun, x, sens, postFun, ...) { + x <- censorData(x, .self$varType, .self$rng, rngFormat=.self$rngFormat) x <- fillMissing(x, .self$varType, .self$imputeRng[0], .self$imputeRng[1]) epsilonPart <- epsilon / .self$nBoot - release <- replicate(.self$nBoot, bootstrapReplication(x, n, sens, epsilonPart, fun=.self$bootStatEval)) + release <- replicate(.self$nBoot, bootstrapReplication(x, .self$n, sens, epsilonPart, fun, .self)) stdError <- .self$bootSE(release, .self$nBoot, sens) out <- list('release' = release, 'stdError' = stdError) out <- postFun(out) diff --git a/R/mechanism-laplace.R b/R/mechanism-laplace.R index 0159a21..9e4a7a9 100644 --- a/R/mechanism-laplace.R +++ b/R/mechanism-laplace.R @@ -58,8 +58,8 @@ mechanismLaplace$methods( evaluate = function(fun, x, sens, postFun, ...) { x <- censorData(x, .self$varType, .self$rng, .self$bins, .self$rngFormat) x <- fillMissing(x, .self$varType, imputeRng=.self$rng, categories=.self$imputeBins) - fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self) - inputVals = c(list(x=x), fun.args) + funArgs <- getFuncArgs(fun, inputList=list(...), inputObject=.self) + inputVals = c(list(x=x), funArgs) trueVal <- do.call(fun, inputVals) # Concern: are we confident that the environment this is happening in is getting erased. scale <- sens / .self$epsilon release <- trueVal + dpNoise(n=length(trueVal), scale=scale, dist='laplace') diff --git a/R/statistic-mean.R b/R/statistic-mean.R index e2d86d2..96ef895 100644 --- a/R/statistic-mean.R +++ b/R/statistic-mean.R @@ -81,7 +81,13 @@ dpMean$methods( #' the \code{dpMean$release} function. release = function(data, ...) { x <- data[, variable] - .self$result <- export(mechanism)$evaluate(mean, x, .self$sens, .self$postProcess, ...) + if (mechanism=='mechanismLaplace'){ + .self$result <- export(mechanism)$evaluate(mean, x, .self$sens, .self$postProcess, ...) + } + else if (mechanism=='mechanismBootstrap'){ + .self$result <- export(mechanism)$evaluate(bootMean, x, .self$sens, .self$postProcess, .self$n) + } + }) dpMean$methods( diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R index f4958c0..9135097 100644 --- a/tests/testthat/test-bootstrap.R +++ b/tests/testthat/test-bootstrap.R @@ -1,15 +1,13 @@ -library(PSIlence) context("bootstrap") -test_that('bootstrap did not run, then thre NaNs, now produces result that is way too big', { +test_that('bootstrap did not run, then three NaNs, now produces result that is way too big', { data(PUMS5extract10000, package = "PSIlence") n.boot <- 25 - boot_mean <- dpMean$new(mechanism='mechanismBootstrap', var.type='numeric', + boot_mean <- dpMean$new(mechanism='mechanismBootstrap', varType='numeric', variable='income', n=10000, epsilon=0.1, rng=c(0, 750000), n.boot=n.boot) boot_mean$release(PUMS5extract10000) print(boot_mean$result) - print(mean(boot_mean$result$release)) }) From 25b97d1219f9c54b785fe011a5914e2bbe199f8b Mon Sep 17 00:00:00 2001 From: Ira Globus-Harris Date: Tue, 1 Oct 2019 15:33:36 -0400 Subject: [PATCH 18/18] Fixed option #2 and reran docs --- NAMESPACE | 2 +- R/mechanism-bootstrap.R | 3 ++- man/{bootstrap.replication.Rd => bootstrapReplication.Rd} | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) rename man/{bootstrap.replication.Rd => bootstrapReplication.Rd} (81%) diff --git a/NAMESPACE b/NAMESPACE index cad9640..c298730 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(bootstrap.replication) +export(bootstrapReplication) export(censorData) export(checkEpsilon) export(checkRange) diff --git a/R/mechanism-bootstrap.R b/R/mechanism-bootstrap.R index d7df1ce..30bf86d 100644 --- a/R/mechanism-bootstrap.R +++ b/R/mechanism-bootstrap.R @@ -22,7 +22,7 @@ bootstrapReplication <- function(x, n, sensitivity, epsilon, fun, inputObject, . # we do not want the 0 partition, so we remove it from the list validPartitions <- validPartitions[2:length(validPartitions)] # print the unique values of the partition, to track which entries may result in NaN - print(validPartitions) + #print(validPartitions) probs <- sapply(1:length(validPartitions), dbinom, size=n, prob=(1 / n)) stat.partitions <- vector('list', length(validPartitions)) for (i in 1:length(validPartitions)) { @@ -53,6 +53,7 @@ bootstrapReplication <- function(x, n, sensitivity, epsilon, fun, inputObject, . # for (i in 1:max.appearances) { # variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon) # if (i %in% validPartitions) { +# currentPartition <- validPartitions[i] # stat.i <- inputObject$bootStatEval(x[partition == currentPartition], fun, ...) # noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian') # stat.partitions[[i]] <- i * stat.i + noise.i diff --git a/man/bootstrap.replication.Rd b/man/bootstrapReplication.Rd similarity index 81% rename from man/bootstrap.replication.Rd rename to man/bootstrapReplication.Rd index 1843465..8ccc312 100644 --- a/man/bootstrap.replication.Rd +++ b/man/bootstrapReplication.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mechanism-bootstrap.R -\name{bootstrap.replication} -\alias{bootstrap.replication} +\name{bootstrapReplication} +\alias{bootstrapReplication} \title{Bootstrap replication for a function} \usage{ -bootstrap.replication(x, n, sensitivity, epsilon, fun, inputObject, ...) +bootstrapReplication(x, n, sensitivity, epsilon, fun, inputObject, ...) } \arguments{ \item{x}{Vector}