diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..0167d8c --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,9 @@ +misc +^.*\.Rproj$ +^.*\.github$ +^\.Rproj\.user$ +^cran-comments.md$ +^doc$ +^Meta$ +^CRAN-RELEASE$ +^CRAN-SUBMISSION$ diff --git a/.github/workflows/R-cmd-check.yaml b/.github/workflows/R-cmd-check.yaml new file mode 100644 index 0000000..c90f5a0 --- /dev/null +++ b/.github/workflows/R-cmd-check.yaml @@ -0,0 +1,78 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}- + + - name: Install system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('r-hub/sysreqs')" + sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") + sudo -s eval "$sysreqs" + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..1f9d0d2 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,43 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: test-coverage + +jobs: + test-coverage: + runs-on: macOS-latest + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@master + + - uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: macOS-r-3.6-${{ hashFiles('.github/depends.Rds') }} + restore-keys: macOS-r-3.6- + + - name: Install dependencies + run: | + install.packages(c("remotes")) + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8c990fa --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.Rproj.user +.Rhistory +.RData +inst/doc +doc +Meta diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..eae5b9d --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,36 @@ +Package: rddtools +Version: 2.0.0 +Title: Toolbox for Regression Discontinuity Design ('RDD') +Description: Set of functions for Regression Discontinuity Design ('RDD'), for + data visualisation, estimation and testing. +Authors@R: c( + person("Matthieu", "Stigler", role = "aut", comment=c(ORCID="0000-0002-6802-4290"), + email="Matthieu.Stigler@gmail.com"), + person("Bastiaan", "Quast", email = "bquast@gmail.com", role=c("aut", "cre"), comment=c(ORCID="0000-0002-2951-3577") ) + ) +Maintainer: Bastiaan Quast +Imports: + KernSmooth, + ggplot2, + rdd, + sandwich, + lmtest, + Formula, + locpol, + methods, + rdrobust, + rmarkdown +Depends: + AER, + np +Suggests: + stats4, + car, + knitr, + testthat +License: GPL (>= 2) +URL: https://github.com/bquast/rddtools +BugReports: https://github.com/bquast/rddtools/issues +VignetteBuilder: knitr +RoxygenNote: 7.2.3 +Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..3717789 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,75 @@ +# Generated by roxygen2: do not edit by hand + +S3method("[",rdd_data) +S3method(as.data.frame,rdd_data) +S3method(as.lm,rdd_reg) +S3method(as.lm,rdd_reg_np) +S3method(bread,rdd_reg_np) +S3method(covarTest_dis,rdd_data) +S3method(covarTest_dis,rdd_reg) +S3method(covarTest_mean,rdd_data) +S3method(covarTest_mean,rdd_reg) +S3method(estfun,rdd_reg_np) +S3method(getCall,rdd_reg) +S3method(model.frame,rdd_reg_np) +S3method(model.matrix,rdd_data) +S3method(plot,rdd_data) +S3method(plot,rdd_reg_lm) +S3method(plot,rdd_reg_np) +S3method(plotPlacebo,PlaceboVals) +S3method(plotPlacebo,rdd_reg) +S3method(plotPlaceboDens,PlaceboVals) +S3method(plotPlaceboDens,rdd_reg) +S3method(plotSensi,rdd_reg_lm) +S3method(plotSensi,rdd_reg_np) +S3method(print,rdd_reg_lm) +S3method(print,rdd_reg_np) +S3method(print,summary.rdd_reg_np) +S3method(rdd_coef,default) +S3method(rdd_coef,rdd_reg_np) +S3method(rdd_coef,rdd_reg_npreg) +S3method(subset,rdd_data) +S3method(summary,rdd_data) +S3method(summary,rdd_reg_np) +S3method(vcov,rdd_reg_np) +export(as.lm) +export(as.npreg) +export(as.npregbw) +export(clusterInf) +export(computePlacebo) +export(covarTest_dis) +export(covarTest_mean) +export(dens_test) +export(gen_mc_ik) +export(plotPlacebo) +export(plotPlaceboDens) +export(plotSensi) +export(rdd_bw_cct_estim) +export(rdd_bw_cct_plot) +export(rdd_bw_ik) +export(rdd_bw_rsw) +export(rdd_coef) +export(rdd_data) +export(rdd_gen_reg) +export(rdd_pred) +export(rdd_reg_lm) +export(rdd_reg_np) +export(rot_bw) +export(vcovCluster) +export(vcovCluster2) +import(Formula) +import(KernSmooth) +import(ggplot2) +import(lmtest) +import(methods) +import(np) +import(rdd) +import(rdrobust) +import(sandwich) +importFrom(AER,ivreg) +importFrom(locpol,gaussK) +importFrom(locpol,locpol) +importFrom(rdrobust,rdbwselect) +importFrom(rdrobust,rdplot) +importFrom(stats,getCall) +importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..256356b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,128 @@ +rddtools 1.8.0 +===================== +* redo documentation + + +rddtools 1.6.1 +===================== +* Fix bug #12 (https://github.com/bquast/rddtools/issues/12) reported by @PhilipSpechler + + +rddtools 1.6.0 +===================== +* documentation update + +rddtools 1.4.0 +===================== +Published on 2020-08-07 + +* fix CRAN error + + +rddtools 1.2.0 +===================== +Published on 2020-07-22 + +* fix CRAN error + +* documentation cleanup + +* switch to GitHub Actions + +* switch to codecov + +* test using R 4.0.0 + + +rddtools 1.0.0 +===================== + +* stable release + +* various maintenance updates + +* documentation updates + + +rddtools 0.5.0 +===================== + +* cleanup documentation + + +rddtools 0.3.0 +===================== + +* development taken over by Bastiaan + +* rename package to rddtools (from RDDtools) + +* rename functions to lower case + +* move package from subdir to repo root directory + +* change S3class method to export for roxygen + +* connect method functions with . in stead of white space + +* classify default functions as RDDcoef.default etc. + +* update DESCRIPTION with CRAN guidelines + +* change .onLoad to .onAttach + +* remove old lyx vignette in several places + +* move examples from README.Rmd to Rmd vignettes + +* fix empty package dependency bug + + +rddtools 0.22 +=========== +Updated on 21/5/14 + +* RDDdata: change arg z to covar, add new argument z for sharp, currently unused. + +* dens_test: work now on RDDreg, return object htest + +* Multiple changes in help files + +* Correct import, suggests, calls to ::: + + +rddtools 0.21 +=========== +Updated on 25/7/13 + +* Add new function RDDpred + +* Add new model.matrix.RDDdata, preparing all output, now used by all RDDreg_np, RDDreg_lm, RDDgenre... + +* Add method vcov.RDDreg, as.lm.RDDreg + +* Add enw function vcovCluster2, complement doc, add M Arai, + +* Add data STAR_MHE + +* Many small fixes + + +rddtools 0.2 +=========== +Updated on 16/7/13 + +* Add new option to have separate or same covariates + +* Add as.nprg, to convert to a np regression from package np + +* Add RDDcoef, working on multiple models (lm, np, npreg). + +* Many fixes... + + +rddtools 0.1 +=========== +Initial commit on 29/04/2013 + +* Initial commit, containing RDDdata, RDDreg_lm, RDDreg_np, plotSensi, plotPlacebo, etc... diff --git a/R/as.npreg.R b/R/as.npreg.R new file mode 100644 index 0000000..a6219df --- /dev/null +++ b/R/as.npreg.R @@ -0,0 +1,129 @@ +#' Convert an rdd_reg object to a \code{npreg} object +#' +#' Convert an rdd_object to a non-parametric regression \code{npreg} from package \code{np} +#' @param x Object of class \code{rdd_reg} created by \code{\link{rdd_reg_np}} or \code{\link{rdd_reg_lm}} +#' @param \ldots Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}} +#' @details This function converts an rdd_reg object into an \code{npreg} object from package \code{np} +#' Note that the output won't be the same, since \code{npreg} does not offer a triangular kernel, but a Gaussian or Epanechinkov one. +#' Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while rdd_reg +#' proceeds as if the kernel was univariate. A simple solution to make the multivariate kernel similar to the univariate one +#' is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. +#' @export +#' @return An object of class \code{npreg} or \code{npregbw} +#' @seealso \code{\link{as.lm}} which converts \code{rdd_reg} objects into \code{lm}. +#' @examples +#' # Estimate ususal rdd_reg: +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd) +#' +#' ## Convert to npreg: +#' reg_nonpara_np <- as.npreg(reg_nonpara) +#' reg_nonpara_np +#' rdd_coef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) +#' +#' ## Compare with result obtained with a Gaussian kernel: +#' bw_lm <- dnorm(house_rdd$x, sd=rddtools:::getBW(reg_nonpara)) +#' reg_nonpara_gaus <- rdd_reg_lm(rdd_object=house_rdd, w=bw_lm) +#' all.equal(rdd_coef(reg_nonpara_gaus),rdd_coef(reg_nonpara_np)) + + +as.npregbw <- function(x, ...) { + res <- as.npregbw_low(x = x, npreg = FALSE, ...) + res +} + +#' @rdname as.npregbw +#' @export +as.npreg <- function(x, ...) { + res <- as.npregbw_low(x = x, npreg = TRUE, ...) + res +} + + +as.npregbw_low <- function(x, npreg = FALSE, adjustik_bw = TRUE, ...) { + + dat <- getOriginalData(x) + bw <- getBW(x) + cutpoint <- getCutpoint(x) + + ## Specify inputs to npregbw: + + ## data: + x <- dat$x + dat_np <- data.frame(y = dat$y, x = x, D = ifelse(x >= cutpoint, 1, 0), Dx = ifelse(x >= cutpoint, x, 0)) + dataPoints <- data.frame(x = c(cutpoint, cutpoint), D = c(0, 1), Dx = c(0, cutpoint)) + + ## bw: + range.x <- range(dat$x, na.rm = TRUE, finite = TRUE) + if (adjustik_bw) { + ## & names(bw) =='h_opt' + bw <- rdd_bw_ik(dat, kernel = "Normal") + } + bw_other <- 9999 * diff(range.x) + bws <- c(bw, rep(bw_other, 2)) + + + ## start npregbw + res <- np::npregbw(bws = bws, formula = y ~ x + D + Dx, data = dat_np, regtype = "ll", eval = dataPoints, bandwidth.compute = FALSE, + gradients = TRUE, ...) + class(res) <- c("rdd_reg_npregbw", class(res)) + + ## if npreg, return instead model_np <- npreg(bw_np, newdata=dataPoints, gradients=TRUE) + if (npreg == TRUE) { + + # check if np is installed + if (!requireNamespace("np", quietly = TRUE)) { + stop("The package 'np' is needed for this function to work. Please install it.", call. = FALSE) + } + + # require('np') requireNamespace('np', quietly = TRUE) + options(np.messages = TRUE) ## otherwise got warnings messages... probably because comes only if loaded! + res <- np::npreg(res, newdata = dataPoints, gradients = TRUE, ...) + class(res) <- c("rdd_reg_npreg", class(res)) + } + + attr(res, "RDDdf") <- dat_np + attr(res, "cutpoint") <- cutpoint + res +} + + +#' @export +rdd_coef.rdd_reg_npreg <- function(object, allInfo = FALSE, allCo = FALSE, ...) { + + co <- diff(object$mean) + if (allInfo) { + se <- sum(object$merr) + zval <- co/se + pval <- 2 * pnorm(abs(zval), lower.tail = FALSE) + res <- cbind(co, se, zval, pval) + colnames(res) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)") + rownames(res) <- "D" + } else { + res <- co + } + + if (allCo) { + cos <- c(object$mean[1], object$grad) + ses <- c(object$merr[1], object$gerr) + + ## X_right: + dataPoints_Xr <- data.frame(x = 0, D = 0, Dx = c(0, 1)) + Xr <- diff(predict(object, newdata = dataPoints_Xr)) + + estimates <- c(cos[1], co, cos[2], Xr) + + if (allInfo) { + zvals <- cos/ses + pvals <- 2 * pnorm(abs(zvals), lower.tail = FALSE) + res <- data.frame(Estimate = estimates, `Std. Error` = c(ses[1], se, ses[2:3]), `z value` = c(zvals[1], zval, zvals[2:3]), + `Pr(>|z|)` = c(pvals[1], pval, pvals[2:3]), check.names = FALSE) + rownames(res) <- c("(Intercept)", "D", "x_left", "x_right") + } else { + res <- estimates + } + } + + res +} diff --git a/R/bw_cct_estim.R b/R/bw_cct_estim.R new file mode 100644 index 0000000..4f3a99a --- /dev/null +++ b/R/bw_cct_estim.R @@ -0,0 +1,42 @@ +#' Bandwidth selection for Regression Discontinuity estimators, CTT 2014 +#' +#' Simple wrapper of the Calonico-Cattaneo-Titiunik (2014) bandwidth selection procedures +#' for RDD estimators \code{\link[rdrobust]{rdbwselect}}. +#' +#' @param rdd_object of class rdd_data created by \code{\link{rdd_data}} +#' @param kernel The type of kernel used: either \code{Triangular}, \code{Uniform} or \code{Epanechnikov}. +#' @param method The type of method used. See +#' @param \ldots further arguments passed to \code{\link[rdrobust]{rdbwselect}}. +#' @return See documentation of \code{\link[rdrobust]{rdbwselect}} +#' @references Calonico, S., M. D. Cattaneo, and R. Titiunik. 2014a. Robust Nonparametric Confidence Intervals for Regression-Discontinuity Designs. Econometrica 82(6): 2295-2326. +#' \url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}. +#' @seealso \code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +#' @author Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdbwselect}} +#' @importFrom rdrobust rdbwselect +#' @export +#' @examples +#' data(house) +#' rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +#' rdd_bw_cct_estim(rd) +#' + + + +rdd_bw_cct_estim <- function(rdd_object, + method=c("mserd", "msetwo", "msesum", "msecomb1", "msecomb2", "cerrd", "certwo", "cersum", "cercomb1"), + kernel = c("Triangular", "Uniform", "Epanechnikov"), ...) { + + kernel <- tolower(match.arg(kernel)) + method <- match.arg(method) + + checkIsRDD(rdd_object) + + rdd_data <- getOriginalData(rdd_object) + + res <- rdrobust::rdbwselect(y=rdd_data$y, x=rdd_data$x, + c = getCutpoint(rdd_object), + kernel = "tri", + bwselect = method, + ...) + return(res) +} diff --git a/R/bw_cct_plot.R b/R/bw_cct_plot.R new file mode 100644 index 0000000..48f1a42 --- /dev/null +++ b/R/bw_cct_plot.R @@ -0,0 +1,44 @@ +#' Bandwidth selection for Regression Discontinuity visualisation, CTT 2015 +#' +#' Simple wrapper of the Calonico-Cattaneo-Titiunik (2015) bandwidth selection procedures +#' for RDD visualisation \code{\link[rdrobust]{rdplot}}. +#' +#' @param rdd_object of class rdd_data created by \code{\link{rdd_data}} +#' @param method The type of method used. See \code{\link[rdrobust]{rdplot}}. +#' Default is \code{esmv}, the variance mimicking evenly-spaced method. +#' @param \ldots further arguments passed to \code{\link[rdrobust]{rdplot}}. +#' @return See documentation of \code{\link[rdrobust]{rdplot}} +#' @references Calonico, S., M. D. Cattaneo, and R. Titiunik. 2015a. Optimal Data-Driven Regression Discontinuity Plots. Journal of the American Statistical Association 110(512): 1753-1769. +#' \url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}. +#' @seealso \code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +#' @author Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdplot}} +#' @importFrom rdrobust rdplot +#' @export +#' @examples +#' data(house) +#' rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +#' rdd_bw_cct_plot(rd) +#' + + +rdd_bw_cct_plot <- function(rdd_object, method=c("esmv", "es", "espr", "esmvpr", "qs", "qspr", "qsmv", "qsmvpr"), ...) { + method <- match.arg(method) + checkIsRDD(rdd_object) + + rdd_data <- getOriginalData(rdd_object) + rdp <- rdrobust::rdplot(y=rdd_data$y, x=rdd_data$x, + c = getCutpoint(rdd_object), hide=TRUE, + ...) + rdp +} + + +if(FALSE){ + # data(house) + rd <- rdd_data(x=x, y=y, data=house, cutpoint=0) + + rdd_bw_cct_plot(rdd_object=rd) + + reg_np <- rdd_reg_np(rd) + rdd_bw_cct_plot(reg_np) +} diff --git a/R/bw_ik.R b/R/bw_ik.R new file mode 100644 index 0000000..c74ed82 --- /dev/null +++ b/R/bw_ik.R @@ -0,0 +1,208 @@ +#' Imbens-Kalyanaraman Optimal Bandwidth Calculation +#' +#' Imbens-Kalyanaraman optimal bandwidth +#' for local linear regression in Regression discontinuity designs. +#' +#' @param rdd_object of class rdd_data created by \code{\link{rdd_data}} +#' @param kernel The type of kernel used: either \code{triangular} or \code{uniform}. +#' @return The optimal bandwidth +#' @references Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,' +#' Review of Economic Studies (2012) 79, 933-959 +#' @seealso \code{\link{rdd_bw_rsw}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @export +#' @examples +#' data(house) +#' rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +#' rdd_bw_ik(rd) + + +rdd_bw_ik <- function(rdd_object, kernel = c("Triangular", "Uniform", "Normal")) { + + kernel <- match.arg(kernel) + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + + rdd_data <- getOriginalData(rdd_object) + res <- rdd_bw_ik_low(X = rdd_data$x, Y = rdd_data$y, threshold = cutpoint, + verbose = FALSE, type = "RES", returnBig = FALSE, + kernel = kernel) + return(res) + +} + +ik_bias <- function(rdd_object, kernel = c("Triangular", "Uniform", "Normal"), bw) { + + kernel <- match.arg(kernel) + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + + resB <- rdd_bw_ik_low(X = rdd_object$x, Y = rdd_object$y, threshold = cutpoint, verbose = FALSE, type = "RES", returnBig = TRUE, + kernel = kernel) + + ## compute C1: see ik equ 5, and Fan Jijbels (1996, 3.23) is done in R with locpol, computeMu(i=2, equivKernel(TrianK, nu=0, + ## deg=1, lower=0, upper=Inf), lower=0, upper=Inf) + C1 <- switch(kernel, Triangular = -0.1, Uniform = -0.1666667, Normal = -0.7519384) ## from: + + ## Compute bias as in ik equ:5, note here 1/4 is outside C1 + if (missing(bw)) + bw <- resB$h_opt + res <- C1 * 1/2 * bw^2 * (resB$m2_right - resB$m2_left) + return(res) + +} + +ik_var <- function(rdd_object, kernel = c("Triangular", "Uniform", "Normal"), bw) { + + kernel <- match.arg(kernel) + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + + resB <- rdd_bw_ik_low(X = rdd_object$x, Y = rdd_object$y, threshold = cutpoint, verbose = FALSE, type = "RES", returnBig = TRUE, + kernel = kernel) + + ## compute C2: see ik equ 5, and Fan Jijbels (1996, 3.23) is done in R with locpol, computeRK(equivKernel(TrianK, nu=0, deg=1, + ## lower=0, upper=Inf), lower=0, upper=Inf) + C2 <- switch(kernel, Triangular = 4.8, Uniform = 4, Normal = 1.785961) ## from: + + ## Compute var as in ik equ:5, + if (missing(bw)) + bw <- resB$h_op + elem1 <- (resB$var_inh_left + resB$var_inh_right)/resB$f_cu + elem2 <- C2/(nrow(rdd_object) * bw) + res <- elem1 * elem2 + res +} + +ik_amse <- function(rdd_object, kernel = c("Triangular", "Uniform", "Normal"), bw) { + + var <- ik_var(rdd_object = rdd_object, kernel = kernel, bw = bw) + bias <- ik_bias(rdd_object = rdd_object, kernel = kernel, bw = bw) + res <- bias^2 + var + res +} + + +rdd_bw_ik_low <- function(X, Y, threshold = 0, verbose = FALSE, type = c("RES", "RES_imp", "WP"), returnBig = FALSE, kernel = c("Triangular", + "Uniform", "Normal")) { + + type <- match.arg(type) + kernel <- match.arg(kernel) + + + N <- length(X) + N_left <- sum(X < threshold, na.rm = TRUE) + N_right <- sum(X >= threshold, na.rm = TRUE) + + + ########## STEP 1 + + ## Silverman bandwidth + h1 <- 1.84 * sd(X) * N^(-1/5) + if (verbose) + cat("\n-h1:", h1) + + ## f(cut) + isIn_h1_left <- X >= (threshold - h1) & X < threshold + isIn_h1_right <- X >= threshold & X <= (threshold + h1) + + NisIn_h1_left <- sum(isIn_h1_left, na.rm = TRUE) + NisIn_h1_right <- sum(isIn_h1_right, na.rm = TRUE) + if (verbose) + cat("\n-N left /right:", NisIn_h1_left, NisIn_h1_right) + + + f_cut <- (NisIn_h1_left + NisIn_h1_right)/(2 * N * h1) + if (verbose) + cat("\n-f(threshold):", f_cut) + + ## Variances : Equ (13) + + var_inh_left <- var(Y[isIn_h1_left], na.rm = TRUE) + var_inh_right <- var(Y[isIn_h1_right], na.rm = TRUE) + + # problem with working pap0er: Equ 4.9 is different! + if (type == "WP") { + denom <- 1/(NisIn_h1_left + NisIn_h1_right) + var_inh_global <- denom * ((NisIn_h1_left - 1) * var_inh_left + (NisIn_h1_right - 1) * var_inh_right) + } + + if (verbose) { + cat("\n-Sigma^2 left:", var_inh_left, "\n-Sigma^2 right:", var_inh_right) + } + ########## STEP 2 + + + ## Global function of order 3: Equ (14) + reg <- lm(Y ~ I(X >= threshold) + I(X - threshold) + I((X - threshold)^2) + I((X - threshold)^3)) + m3 <- 6 * coef(reg)[5] + if (verbose) + cat("\n-m3:", m3) + + + ## left and right bandwidths: Equ (15) + Ck_h2 <- 3.556702 # 7200^(1/7) + h2_left <- Ck_h2 * (var_inh_left/(f_cut * m3^2))^(1/7) * N_left^(-1/7) + h2_right <- Ck_h2 * (var_inh_right/(f_cut * m3^2))^(1/7) * N_right^(-1/7) + + if (verbose) + cat("\n-h2 left:", h2_left, "\n-h2 right:", h2_right) + + ## second derivatives right/left + isIn_h2_left <- X >= (threshold - h2_left) & X < threshold + isIn_h2_right <- X >= threshold & X <= (threshold + h2_right) + + N_h2_left <- sum(isIn_h2_left, na.rm = TRUE) + N_h2_right <- sum(isIn_h2_right, na.rm = TRUE) + + reg2_left <- lm(Y ~ I(X - threshold) + I((X - threshold)^2), subset = isIn_h2_left) + reg2_right <- lm(Y ~ I(X - threshold) + I((X - threshold)^2), subset = isIn_h2_right) + + m2_left <- as.numeric(2 * coef(reg2_left)[3]) + m2_right <- as.numeric(2 * coef(reg2_right)[3]) + + if (verbose) + cat("\n-m2 left:", m2_left, "\n-m2 right:", m2_right) + + ########## STEP 3 + + ## Regularization: Equ (16) + if (type == "RES") { + r_left <- (2160 * var_inh_left)/(N_h2_left * h2_left^4) + r_right <- (2160 * var_inh_right)/(N_h2_right * h2_right^4) + } else { + r_left <- (2160 * var_inh_global)/(N_h2_left * h2_left^4) + r_right <- (2160 * var_inh_global)/(N_h2_right * h2_right^4) + } + + + if (verbose) + cat("\n-Reg left:", r_left, "\n-Reg right:", r_right) + + ## Compute kernel dependent constant: (see file ~/Dropbox/HEI/rdd/Rcode/ik bandwidth/bandwidth_comput.R) + Ck <- switch(kernel, Triangular = 3.4375, Uniform = 2.70192, Normal = 1.25864) # is not 5.4 as in paper since our kernel is on I(|x|<1), not <1/2 + + ## Final bandwidth: Equ (17) + h_opt <- Ck * ((var_inh_left + var_inh_right)/(f_cut * ((m2_right - m2_left)^2 + r_left + r_right)))^(1/5) * N^(-1/5) + names(h_opt) <- "h_opt" + + if (verbose) + cat("\n\n") + + ### + if (returnBig) { + res <- list() + res$h_opt <- as.numeric(h_opt) + res$var_inh_left <- var_inh_left + res$var_inh_right <- var_inh_right + res$m2_right <- m2_right + res$m2_left <- m2_left + res$f_cut <- f_cut + res$h2_left <- h2_left + res$h2_right <- h2_right + } else { + res <- h_opt + } + + return(res) +} diff --git a/R/bw_rot.R b/R/bw_rot.R new file mode 100644 index 0000000..ad58e2c --- /dev/null +++ b/R/bw_rot.R @@ -0,0 +1,85 @@ +#' Bandwidth selector +#' +#' implements dpill +#' +#' @param object object of class rdd_data +#' @references McCrary, Justin. (2008) 'Manipulation of the running variable in the regression discontinuity design: A density test,' \emph{Journal of Econometrics}. 142(2): 698-714. +#' @export +#' @examples +#' #No discontinuity + +### Crary bw + +rot_bw <- function(object) { + + if (!inherits(object, "rdd_data")) + stop("Only works for rdd_data objects") + cutpoint <- getCutpoint(object) + x <- object$x + y <- object$y + + ##### first step + n <- length(y) + sd_x <- sd(x, na.rm = TRUE) + bw_pilot <- (2 * sd_x)/sqrt(n) + + ## hist + his <- plotBin(x = x, y = y, h = bw_pilot, cutpoint = cutpoint, plot = FALSE, type = "number") + # his2 <- hist(x, breaks=c(min(x), his[['x']], max(x))) + x1 <- his$x + y1 <- his[, "y.Freq"] + + ##### second step + + ## regs: + reg_left <- lm(y1 ~ poly(x1, degree = 4, raw = TRUE), subset = x1 < cutpoint) + reg_right <- lm(y1 ~ poly(x1, degree = 4, raw = TRUE), subset = x1 >= cutpoint) + + + +} + + +#' Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth} +#' +#' Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) +#' either to the whole function, or to the functions below and above the cutpoint. +#' +#' @param object object of class rdd_data created by \code{\link{rdd_data}} +#' @param type Whether to choose a global bandwidth for the whole function (\code{global}) +#' or for each side (\code{sided}) +#' @return One (or two for \code{sided}) bandwidth value. +#' @references See \code{\link[KernSmooth]{dpill}} +#' @seealso \code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +#' @import KernSmooth +#' @export +#' @examples +#' data(house) +#' rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +#' rdd_bw_rsw(rd) + + +rdd_bw_rsw <- function(object, type = c("global", "sided")) { + + type <- match.arg(type) + + if (!inherits(object, "rdd_data")) + stop("Only works for rdd_data objects") + cutpoint <- getCutpoint(object) + x <- object$x + y <- object$y + + if (type == "global") { + bw <- dpill(x = x, y = y) + } else { + dat_left <- subset(object, x < cutpoint) + dat_right <- subset(object, x >= cutpoint) + + bw_left <- dpill(x = dat_left$x, y = dat_left$y) + bw_right <- dpill(x = dat_right$x, y = dat_right$y) + bw <- c(bw_left, bw_right) + } + + ## result + bw +} diff --git a/R/clusterInf.R b/R/clusterInf.R new file mode 100644 index 0000000..9e83b48 --- /dev/null +++ b/R/clusterInf.R @@ -0,0 +1,146 @@ +#' Post-inference for clustered data +#' +#' Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix +#' possibly on the range specified by bandwidth +#' @param object Object of class lm, from which rdd_reg also inherits. +#' @param clusterVar The variable containing the cluster attributions. +#' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich +#' @param type The type of cluster correction to use: either the degrees of freedom, or a HC matrix. +#' @param \ldots Further arguments passed to coeftest +#' @return The output of the coeftest function, which is itself of class \code{coeftest} +#' @seealso \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf} +#' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. +#' \emph{AmericanEconomic Review}, 93, p. 133-138 +#' @export +#' @import sandwich +#' @import lmtest +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' reg_para <- rdd_reg_lm(rdd_object=house_rdd) +#' +#' # here we just generate randomly a cluster variable: +#' nlet <- sort(c(outer(letters, letters, paste, sep=''))) +#' clusRandom <- sample(nlet[1:60], size=nrow(house_rdd), replace=TRUE) +#' +#' # now do post-inference: +#' clusterInf(reg_para, clusterVar=clusRandom) +#' clusterInf(reg_para, clusterVar=clusRandom, type='HC') + + +clusterInf <- function(object, clusterVar, vcov. = NULL, type = c("df-adj", "HC"), ...) { + + if (is.null(clusterVar)) + stop("clusterVar seems to be NULL?") + type <- match.arg(type) + + if (type == "df-adj") { + nClus <- if (is.factor(clusterVar)) + nlevels(clusterVar) else length(unique(clusterVar)) + res <- coeftest(object, vcov. = vcov., df = nClus, ...) + } else { + if (!is.null(vcov.)) + warning("arg 'vcov.' not used when 'type=HC' (default vcovCluster used)") + res <- coeftest(object, vcov. = function(x) vcovCluster(x, clusterVar = clusterVar), ...) + } + + return(res) +} + +#' @export +estfun.rdd_reg_np <- function(x, ...) { + inf_met <- infType(x) ## def in Misc.R + if (inf_met == "se") + stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") + estfun(x$RDDslot$model) +} + +#' @export +bread.rdd_reg_np <- function(x, ...) { + inf_met <- infType(x) ## def in Misc.R + if (inf_met == "se") + stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") + bread(x$RDDslot$model) +} + + +# sandwich.rdd_reg_np <- function (x, bread. = bread, meat. = meat, ...){ inf_met <- infType(x) ## def in Misc.R +# if(inf_met=='se') stop('No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference +# estimators') sandwich(x$RDDslot$model, bread.=bread., meat.=meat., ...) } + +#' @export +model.frame.rdd_reg_np <- function(formula, ...) model.frame(formula$RDDslot$model) + +#' Cluster Heteroskedasticity-consistent estimation of the covariance matrix. +#' +#' Offer a cluster variant of the usual Heteroskedasticity-consistent +#' @param object Object of class lm, from which rdd_reg also inherits. +#' @param clusterVar The variable containing the cluster attributions. +#' @return A matrix containing the covariance matrix estimate. +#' @author Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html} +#' @references Cameron, C., Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering, +#' \emph{Journal of Business and Economic Statistics}, vol. 29(2), pages 238-249. +#' #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. +#' \emph{American Economic Review}, 93, p. 133-138 +#' @references Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}. +#' @export +#' @seealso \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods. +#' @examples +#' data(STAR_MHE) +#' if(all(c(require(sandwich), require(lmtest)))){ +#' +#' # Run simple regression: +#' reg_krug <- lm(pscore~cs, data=STAR_MHE) +#' +#' # Row 1 of Table 8.2.1, inference with standard vcovHC: +#' coeftest(reg_krug,vcov.=vcovHC(reg_krug, 'HC1'))[2,2] +#' +#' # Row 4 of Table 8.2.1, inference with cluster vcovHC: +#' coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2] +#' } + +vcovCluster <- function(object, clusterVar) { + M <- length(unique(clusterVar)) + N <- length(clusterVar) + K <- getModelRank(object) + dfc <- (M/(M - 1)) * ((N - 1)/(N - K)) + uj <- apply(estfun(object), 2, function(x) tapply(x, clusterVar, sum)) + # require('sandwich') + dfc * sandwich::sandwich(object, meat. = crossprod(uj)/N) +} + +#' @rdname vcovCluster +#' @param clusterVar1,clusterVar2 The two cluster variables for the 2-cluster case. +#' @export +vcovCluster2 <- function(object, clusterVar1, clusterVar2) { + # R-codes (www.r-project.org) for computing multi-way clustered-standard errors. Mahmood Arai, Jan 26, 2008. See: Thompson + # (2006), Cameron, Gelbach and Miller (2006) and Petersen (2006). reweighting the var-cov matrix for the within model + + K <- getModelRank(object) + estF <- estfun(object) + + clusterVar12 <- paste(clusterVar1, clusterVar2, sep = "") + M1 <- length(unique(clusterVar1)) + M2 <- length(unique(clusterVar2)) + M12 <- length(unique(clusterVar12)) + N <- length(clusterVar1) + + dfc1 <- (M1/(M1 - 1)) * ((N - 1)/(N - K)) + dfc2 <- (M2/(M2 - 1)) * ((N - 1)/(N - K)) + dfc12 <- (M12/(M12 - 1)) * ((N - 1)/(N - K)) + + u1j <- apply(estF, 2, function(x) tapply(x, clusterVar1, sum)) + u2j <- apply(estF, 2, function(x) tapply(x, clusterVar2, sum)) + u12j <- apply(estF, 2, function(x) tapply(x, clusterVar12, sum)) + vc1 <- dfc1 * sandwich(object, meat. = crossprod(u1j)/N) + vc2 <- dfc2 * sandwich(object, meat. = crossprod(u2j)/N) + vc12 <- dfc12 * sandwich(object, meat. = crossprod(u12j)/N) + vcovMCL <- vc1 + vc2 - vc12 + vcovMCL +} + +getModelRank <- function(object, ...) UseMethod("getModelRank") + +getModelRank.default <- function(object, ...) object$rank + +getModelRank.rdd_reg_np <- function(object, ...) getModelRank.default(object$RDDslot$model) diff --git a/R/covarTests.R b/R/covarTests.R new file mode 100644 index 0000000..9d27434 --- /dev/null +++ b/R/covarTests.R @@ -0,0 +1,213 @@ +#' Testing for balanced covariates: equality of means with t-test +#' +#' Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold +#' +#' @param object object of class rdd_data +#' @param bw a bandwidth +#' @param paired Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests. +#' @param var.equal Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal +#' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function +#' @return A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @seealso \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution +#' @examples +#' data(house) +#' +#' ## Add randomly generated covariates +#' set.seed(123) +#' n_Lee <- nrow(house) +#' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), +#' z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)), +#' z3 = sample(letters, size = n_Lee, replace = TRUE)) +#' house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0) +#' +#' ## test for equality of means around cutoff: +#' covarTest_mean(house_rdd_Z, bw=0.3) +#' +#' ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: +#' covarTest_dis(house_rdd_Z, bw=0.3) +#' +#' ## covarTest_mean works also on regression outputs (bw will be taken from the model) +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z) +#' covarTest_mean(reg_nonpara) + + + + + +#' @export +covarTest_mean <- function(object, bw = NULL, paired = FALSE, var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg", + "hommel", "bonferroni")) UseMethod("covarTest_mean") + +#' @rdname covarTest_mean +#' @export +covarTest_mean.rdd_data <- function(object, bw = NULL, paired = FALSE, var.equal = FALSE, p.adjust = c("none", "holm", "BH", + "BY", "hochberg", "hommel", "bonferroni")) { + + cutpoint <- getCutpoint(object) + covar <- getCovar(object) + cutvar <- object$x + + covarTest_mean_low(covar = covar, cutvar = cutvar, cutpoint = cutpoint, + bw = bw, paired = paired, var.equal = var.equal, + p.adjust = p.adjust) + +} + + +#' @rdname covarTest_mean +#' @export +covarTest_mean.rdd_reg <- function(object, bw = NULL, paired = FALSE, var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", + "hochberg", "hommel", "bonferroni")) { + + cutpoint <- getCutpoint(object) + dat <- object$RDDslot$rdd_data + covar <- getCovar(dat) + cutvar <- dat$x + if (is.null(bw)) + bw <- getBW(object) + + covarTest_mean_low(covar = covar, cutvar = cutvar, cutpoint = cutpoint, bw = bw, paired = paired, var.equal = var.equal, + p.adjust = p.adjust) + +} + + +covarTest_mean_low <- function(covar, cutvar, cutpoint, bw = NULL, paired = FALSE, var.equal = FALSE, p.adjust = c("none", "holm", + "BH", "BY", "hochberg", "hommel", "bonferroni")) { + + p.adjust <- match.arg(p.adjust) + + ## subset + if (!is.null(bw)) { + isInH <- cutvar >= cutpoint - bw & cutvar <= cutpoint + bw + covar <- covar[isInH, ] + cutvar <- cutvar[isInH] + } + regime <- cutvar < cutpoint + + ## Split data + covar_num <- sapply(covar, make_numeric) + + tests <- apply(covar_num, 2, function(x) t.test(x[regime], x[!regime], paired = paired, var.equal = var.equal)) + tests_vals <- sapply(tests, function(x) c(x[["estimate"]], diff(x[["estimate"]]), x[c("statistic", "p.value")])) + + ## Adjust p values if required: + if (p.adjust != "none") { + tests_vals["p.value", ] <- p.adjust(tests_vals["p.value", ], method = p.adjust) + } + + ## Print results + res <- t(tests_vals) + colnames(res)[3] <- "Difference" + res + + +} + + + + +#' Testing for balanced covariates: equality of distribution +#' +#' Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold +#' +#' @param object object of class rdd_data +#' @param bw a bandwidth +#' @param exact Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed. +#' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function +#' @return A data frame with, for each covariate, the K-S statistic and its p-value. +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @seealso \code{\link{covarTest_mean}} for the t-test of equality of means +#' @examples +#' data(house) +#' +#' ## Add randomly generated covariates +#' set.seed(123) +#' n_Lee <- nrow(house) +#' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), +#' z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)), +#' z3 = sample(letters, size = n_Lee, replace = TRUE)) +#' house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0) +#' +#' ## Kolmogorov-Smirnov test of equality in distribution: +#' covarTest_dis(house_rdd_Z, bw=0.3) +#' +#' ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff: +#' covarTest_mean(house_rdd_Z, bw=0.3) +#' ## covarTest_dis works also on regression outputs (bw will be taken from the model) +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z) +#' covarTest_dis(reg_nonpara) + +#' @export +covarTest_dis <- function(object, bw, exact = NULL, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")) UseMethod("covarTest_dis") + +#' @rdname covarTest_dis +#' @export +covarTest_dis.rdd_data <- function(object, bw = NULL, exact = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")) { + + cutpoint <- getCutpoint(object) + covar <- getCovar(object) + cutvar <- object$x + + covarTest_dis_low(covar = covar, cutvar = cutvar, cutpoint = cutpoint, bw = bw, + exact = exact, p.adjust = p.adjust) + +} + +#' @rdname covarTest_dis +#' @export +covarTest_dis.rdd_reg <- function(object, bw = NULL, exact = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")) { + + cutpoint <- getCutpoint(object) + dat <- object$RDDslot$rdd_data + covar <- getCovar(dat) + cutvar <- dat$x + if (is.null(bw)) + bw <- getBW(object) + + covarTest_dis_low(covar = covar, cutvar = cutvar, cutpoint = cutpoint, bw = bw, + exact = exact, p.adjust = p.adjust) + +} + +covarTest_dis_low <- function(covar, cutvar, cutpoint, bw = NULL, exact = NULL, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni")) { + + p.adjust <- match.arg(p.adjust) + + ## subset + if (!is.null(bw)) { + isInH <- cutvar >= cutpoint - bw & cutvar <= cutpoint + bw + covar <- covar[isInH, ] + cutvar <- cutvar[isInH] + } + regime <- cutvar < cutpoint + + + + ## Split data + covar_num <- sapply(covar, make_numeric) + + tests <- apply(covar_num, 2, function(x) ks.test(x[regime], x[!regime], exact = exact)) + tests_vals <- sapply(tests, function(x) x[c("statistic", "p.value")]) + + ## Adjust p values if required: + if (p.adjust != "none") + tests_vals["p.value", ] <- p.adjust(tests_vals["p.value", ], method = p.adjust) + + ## Print results + res <- t(tests_vals) + res + + +} + +## small utility function +make_numeric <- function(x){ + if(is.character(x)) x <- as.factor(x) + as.numeric(x) +} \ No newline at end of file diff --git a/R/dens_test.R b/R/dens_test.R new file mode 100644 index 0000000..79c247d --- /dev/null +++ b/R/dens_test.R @@ -0,0 +1,49 @@ +#' Run the McCracy test for manipulation of the forcing variable +#' +#' Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{rdd_object}. +#' +#' @param rdd_object object of class rdd_data +#' @param bin Argument of the \code{\link{DCdensity}} function, the binwidth +#' @param bw Argument of the \code{\link{DCdensity}} function, the bandwidth +#' @param plot Whether to return a plot. Logical, default ot TRUE. +#' @param \ldots Further arguments passed to \code{\link[rdd]{DCdensity}}. +#' @export +#' @import rdd +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' dens_test(house_rdd) + + + +dens_test <- function(rdd_object, bin = NULL, bw = NULL, plot = TRUE, ...) { + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + x <- getOriginalX(rdd_object) + test <- try(DCdensity(runvar = x, cutpoint = cutpoint, bin = bin, bw = bw, plot = plot, ext.out = TRUE, ...), silent = TRUE) + if (inherits(test, "try-error")) { + warning("Error in computing the density, returning a simple histogram", if (is.null(bin)) + " with arbitrary bin" else NULL) + if (is.null(bin)) { + test <- try(DCdensity(rdd_object$x, cutpoint, bin = bin, bw = 0.2, ext.out = TRUE, plot = FALSE), silent = TRUE) + bin <- test$binsize + } + max_x <- max(rdd_object$x, na.rm = TRUE) + seq_breaks <- seq(from = min(rdd_object$x, na.rm = TRUE), to = max_x, by = bin) + if (max_x > max(seq_breaks)) + seq_breaks <- c(seq_breaks, max_x + 0.001) + hist(rdd_object$x, breaks = seq_breaks) + abline(v = cutpoint, col = 2, lty = 2) + } + + test.htest <- list() + test.htest$statistic <- c(`z-val` = test$z) + test.htest$p.value <- test$p + test.htest$data.name <- deparse(substitute(rdd_object)) + test.htest$method <- "McCrary Test for no discontinuity of density around cutpoint" + test.htest$alternative <- "Density is discontinuous around cutpoint" + test.htest$estimate <- c(Discontinuity = test$theta) + test.htest$test.output <- test + class(test.htest) <- "htest" + return(test.htest) +} diff --git a/R/gen_mc_ik.R b/R/gen_mc_ik.R new file mode 100644 index 0000000..3cf1461 --- /dev/null +++ b/R/gen_mc_ik.R @@ -0,0 +1,141 @@ +#' Generate Monte Carlo simulations of Imbens and Kalyanaraman +#' +#' Generate the simulations reported in Imbens and Kalyanaraman (2012) +#' @param n The size of sampel to generate +#' @param version The MC version of Imbens and Kalnayaraman (between 1 and 4). +#' @param sd The standard deviation of the error term. +#' @param output Whether to return a data-frame, or already a rdd_data +#' @param size The size of the effect, this depends on the specific version, defaults are as in ik: 0.04, NULL, 0.1, 0.1 +#' @return An data frame with x and y variables. +#' @export +#' @examples +#' mc1_dat <- gen_mc_ik() +#' MC1_rdd <- rdd_data(y=mc1_dat$y, x=mc1_dat$x, cutpoint=0) +#' +#' ## Use np regression: +#' reg_nonpara <- rdd_reg_np(rdd_object=MC1_rdd) +#' reg_nonpara +#' +#' # Represent the curves: +#' plotCu <- function(version=1, xlim=c(-0.1,0.1)){ +#' res <- gen_mc_ik(sd=0.0000001, n=1000, version=version) +#' res <- res[order(res$x),] +#' ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), 'y')) +#' plot(res, type='l', xlim=xlim, ylim=ylim, main=paste('DGP', version)) +#' abline(v=0) +#' xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),] +#' points(xCut, col=2) +#' } +#' layout(matrix(1:4,2, byrow=TRUE)) +#' plotCu(version=1) +#' plotCu(version=2) +#' plotCu(version=3) +#' plotCu(version=4) +#' layout(matrix(1)) + +gen_mc_ik <- function(n = 200, version = 1, sd = 0.1295, output = c("data.frame", "rdd_data"), size) { + + output <- match.arg(output) + if (!version %in% c(1:4) | length(version) != 1) + stop("arg 'version' should be between 1 and 4") + + foo <- switch(version, `1` = gen_mc_ik_1, `2` = gen_mc_ik_2, `3` = gen_mc_ik_3, `4` = gen_mc_ik_4) + if (missing(size)) { + size <- switch(version, `1` = 0.04, `2` = 0, `3` = 0.1, `4` = 0.1) + } + res <- foo(n = n, sd = sd, size = size) + if (output == "rdd_data") { + res <- rdd_data(x = res$x, y = res$y, cutpoint = 0) + } + res +} + + +#################################### MC 1 + +gen_mc_ik_1 <- function(n = 200, sd = 0.1295, size = 0.04) { + + ## Regressor: + Z <- rbeta(n, shape1 = 2, shape2 = 4, ncp = 0) + X <- 2 * Z - 1 + error <- rnorm(n, sd = sd) + + ## Prepare variables: + Y <- vector("numeric", length = n) + ind_below <- X < 0 + X_low <- X[ind_below] + X_up <- X[!ind_below] + + ## Compute Y variables: + Y[ind_below] <- 0.48 + 1.27 * X_low + 7.18 * X_low^2 + 20.21 * X_low^3 + 21.54 * X_low^4 + 7.33 * X_low^5 + error[ind_below] + Y[!ind_below] <- 0.48 + size + 0.84 * X_up - 3 * X_up^2 + 7.99 * X_up^3 - 9.01 * X_up^4 + 3.56 * X_up^5 + error[!ind_below] + + ## Result: + res <- data.frame(x = X, y = Y) + return(res) +} + +#################################### MC 2 + +gen_mc_ik_2 <- function(n = 200, sd = 0.1295, size = 0) { + + # if(!missing(size) && !is.null(size)) warning('Argument 'size' ignored for gen_mc_ik_2') Regressor: + Z <- rbeta(n, shape1 = 2, shape2 = 4, ncp = 0) + X <- 2 * Z - 1 + error <- rnorm(n, sd = sd) + + ## Compute Y variables: + Y <- ifelse(X < 0, 3 * X^2, 4 * X^2 + size) + error + + ## Result: + res <- data.frame(x = X, y = Y) + return(res) +} + + +#################################### MC 3 + +gen_mc_ik_3 <- function(n = 200, sd = 0.1295, size = 0.1) { + + ## Regressor: + Z <- rbeta(n, shape1 = 2, shape2 = 4, ncp = 0) + X <- 2 * Z - 1 + error <- rnorm(n, sd = sd) + + ## Compute Y variables: + Y <- 0.42 + ifelse(X < 0, 0, size) + 0.84 * X - 3 * X^2 + 7.99 * X^3 - 9.01 * X^4 + 3.56 * X^5 + error + + ## Result: + res <- data.frame(x = X, y = Y) + return(res) +} + +#################################### MC 4 + +gen_mc_ik_4 <- function(n = 200, sd = 0.1295, size = 0.1) { + + ## Regressor: + Z <- rbeta(n, shape1 = 2, shape2 = 4, ncp = 0) + X <- 2 * Z - 1 + error <- rnorm(n, sd = sd) + + ## Compute Y variables: + Y <- 0.42 + ifelse(X < 0, 0, size) + 0.84 * X + 7.99 * X^3 - 9.01 * X^4 + 3.56 * X^5 + error + + ## Result: + res <- data.frame(x = X, y = Y) + return(res) +} + + +#################################### MC simple + +gen_MC_simple <- function(n = 200, LATE = 0.3) { + + ## Regressor: + x <- rnorm(n) + D <- x >= 0 + y <- 0.8 + LATE * D + 0.3 * x + 0.1 * x * D + rnorm(n) + rdd_data(x = x, y = y, cutpoint = 0) + +} diff --git a/R/get_methods.R b/R/get_methods.R new file mode 100644 index 0000000..e8f06e8 --- /dev/null +++ b/R/get_methods.R @@ -0,0 +1,150 @@ + + +# checkIsRDD <- function(object) if(!inherits(object, 'rdd_data')) stop('Only works for rdd_data objects') checkIsAnyRDD <- +# function(object) if(!inherits(object, c('rdd_data', 'rdd_reg_np'))) stop('Only works for rdd_data objects') + +# function(object) if(!inherits(object, 'rdd_data')) stop('Only works for rdd_data objects') +checkIsAnyRDD <- checkIsRDD <- function(object) { + classesOk <- c("rdd_data", "rdd_reg_np", "rdd_reg_lm") + if (!inherits(object, classesOk)) + stop("Only works for rdd_data objects") +} + +getType <- function(object) { + checkIsRDD(object) + attr(object, "type") +} + +isFuzzy <- function(object) { + checkIsRDD(object) + attr(object, "type") == "Fuzzy" +} + +getCutpoint <- function(object) { + + checkIsRDD(object) + attr(object, "cutpoint") +} + +getOrder <- function(object) { + + checkIsRDD(object) + attr(object, "PolyOrder") +} + +getSlope <- function(object) { + + checkIsRDD(object) + attr(object, "slope") +} + +getBW <- function(object, force.na = FALSE) { + + checkIsAnyRDD(object) + res <- attr(object, "bw") + if (force.na) + if (is.null(res)) + res <- NA + res +} + + + +## return the type of inference used by rdd_reg_np +infType <- function(x) { + if (is.null(getCall(x)$inference)) + "se" else getCall(x)$inference +} + + +hasCovar <- function(object) UseMethod("hasCovar") + +hasCovar.rdd_data <- function(object) attr(object, "hasCovar") + +hasCovar.rdd_reg <- function(object) { + call <- getCall(object) + !is.null(call$covariates) +} + +getCovar <- function(object) { + if (!inherits(object, "rdd_data")) + stop("Only works for rdd_data objects") + if (!hasCovar(object)) + stop("object has no covariates") + + rem <- if (isFuzzy(object)) + 1:3 else 1:2 + res <- object[, -rem, drop = FALSE] + as.data.frame(res) +} + +getCovarNames <- function(object) { + if (!inherits(object, "rdd_data")) + stop("Only works for rdd_data objects") + if (!hasCovar(object)) + stop("object has no covariates") + + rem <- if (isFuzzy(object)) + 1:3 else 1:2 + colnames(object)[-rem] +} + +getOriginalX <- function(object) { + + cutpoint <- getCutpoint(object) + x <- object$model[, "x"] + if (cutpoint != 0) + x <- x + cutpoint + x +} + +getOriginalX <- function(object) UseMethod("getOriginalX") + + +getOriginalX.rdd_reg <- function(object) { + object$RDDslot$rdd_data[, "x"] +} + +getOriginalX.rdd_data <- function(object) { + object[, "x"] +} + +# getOriginalX.rdd_reg_np <- function(object){ cutpoint <- getCutpoint(object) Xnam <- getXname(object) x <- +# object$model[,Xnam] if(cutpoint!=0) x <- x+cutpoint x } + + +getOriginalData <- function(object, na.rm = TRUE, classRDD = TRUE) UseMethod("getOriginalData") + +# getOriginalData.rdd_reg_np <- function(object, na.rm=TRUE){ cutpoint <- getCutpoint(object) Xnam <- getXname(object) dat <- +# object$model[,c('y',Xnam)] if(cutpoint!=0) dat[,Xnam] <- dat[,Xnam] +cutpoint if(na.rm) dat <- dat[apply(dat, 1, +# function(x) all(!is.na(x))),] # remove na rows dat } + + + +getOriginalData.rdd_reg <- function(object, na.rm = TRUE, classRDD = TRUE) { + res <- object$RDDslot$rdd_data + if (na.rm) + res <- res[apply(res, 1, function(x) all(!is.na(x))), ] # remove na rows + if (!classRDD) + res <- as.data.frame(res) + res +} + +getOriginalData.rdd_data <- function(object, na.rm = TRUE, classRDD = TRUE) { + res <- object + if (na.rm) + res <- res[apply(res, 1, function(x) all(!is.na(x))), ] # remove na rows + if (!classRDD) + res <- as.data.frame(res) + res +} + + + +#' @importFrom stats getCall +#' @export +getCall.rdd_reg <- function(x, ...) attr(x, "RDDcall") + + +# format(Sys.Date(), '%A %Y-%m-%d') + diff --git a/R/model.matrix.rdd.R b/R/model.matrix.rdd.R new file mode 100644 index 0000000..c055881 --- /dev/null +++ b/R/model.matrix.rdd.R @@ -0,0 +1,88 @@ +#' @export + +model.matrix.rdd_data <- function(object, + covariates = NULL, + order = 1, + bw = NULL, + slope = c("separate", "same"), + covar.opt = list(strategy = c("include", "residual"), + slope = c("same", "separate"), + bw = NULL), + covar.strat = c("include", "residual"), ...) { + + checkIsRDD(object) + rdd_object <- object + type <- getType(object) + + if (!missing(covar.strat)) + stop("covar.strat is deprecated, use covar.opt = list(strategy=...) instead") + + slope <- match.arg(slope) + if(!is.list(covar.opt)) stop("Argument 'covar.opt' should be a list") + covar.strat <- match.arg(covar.opt$strategy, choices = c("include", "residual")) + covar.slope <- match.arg(covar.opt$slope, choices = c("same", "separate")) + + cutpoint <- getCutpoint(rdd_object) + if (!is.null(covariates) & !hasCovar(rdd_object)) + stop("Arg 'covariates' was specified, but no covariates found in 'rdd_object'.") + + ## Construct data + dat <- as.data.frame(rdd_object) + + dat_step1 <- dat[, c("y", "x")] + dat_step1$x <- dat_step1$x - cutpoint + + L <- ifelse(dat_step1$x >= 0, 1, 0) + dat_step1$D <- if (type == "Sharp") + L else object$z + + if (order > 0) { + polys <- poly(dat_step1$x, degree = order, raw = TRUE) + colnames(polys) <- paste("x", 1:order, sep = "^") + dat_step1 <- cbind(dat_step1[, c("y", "D")], polys) + if (slope == "separate") { + polys2 <- polys * L + colnames(polys2) <- paste(colnames(polys), "right", sep = "_") + dat_step1 <- cbind(dat_step1, polys2) + } + } else { + dat_step1$x <- NULL + } + + ## Covariates + if (!is.null(covariates)) { + covar <- getCovar(rdd_object) + formu.cova <- covariates + + if (grepl("\\.", formu.cova)) + formu.cova <- paste(colnames(covar), collapse = " + ") + if (covar.slope == "separate") { + formu.cova <- paste(formu.cova, "+", paste("D*(", formu.cova, ")", sep = ""), sep = " ") + covar$D <- dat_step1$D + } + + formula.cova <- as.formula(paste("~", formu.cova)) + mf <- model.frame(formula.cova, covar, na.action = na.pass) + M_covar <- model.matrix(formula.cova, data = mf) + + if (covar.strat == "residual") { + M_covar <- data.frame(y = dat_step1$y, M_covar) + first_stage <- lm(y ~ ., data = M_covar) ## regress y on covariates only + dat_step1$y <- residuals(first_stage) ## change in original data + } else { + rem <- switch(covar.slope, separate = "^D$|(Intercept)", same = "(Intercept)") + M_covar <- M_covar[, -grep(rem, colnames(M_covar)), drop = FALSE] + dat_step1 <- cbind(dat_step1, M_covar) ## add covar as regressors + } + } + + ## Colnames cleaning + colnames(dat_step1) <- gsub("x\\^1", "x", colnames(dat_step1)) + + ## + if (type == "Fuzzy") + dat_step1$ins <- L + + ## return results: + dat_step1 +} diff --git a/R/placebo.R b/R/placebo.R new file mode 100644 index 0000000..a393fca --- /dev/null +++ b/R/placebo.R @@ -0,0 +1,287 @@ +#' Draw a (density) plot of placebo tests +#' +#' Draw a plot of placebo tests, estimating the impact on fake cutpoints +#' @param object the output of an RDD regression +#' @param device Whether to draw a base or a ggplot graph. +#' @param \ldots Further arguments passed to specific methods. +#' @param vcov. Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}. +#' @param plot Whether to actually plot the data. +#' @param output Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object +#' @return A data frame containing the cutpoints, their corresponding estimates and confidence intervals. +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd) +#' plotPlacebo(reg_nonpara) +#' +#' # Use with another vcov function; cluster case +#' reg_nonpara_lminf <- rdd_reg_np(rdd_object=house_rdd, inference='lm') +#' # need to be a function applied to updated object! +#' vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x) +#' plotPlacebo(reg_nonpara_lminf, vcov. = vc) + + +#' @export +plotPlacebo <- function(object, device = c("ggplot", "base"), ...) UseMethod("plotPlacebo") + +#' @rdname plotPlacebo +#' @export +#' @param from Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint +#' @param to Ending point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint +#' @param by Increments of the from-to sequence +#' @param level Level of the confidence interval shown +#' @param same_bw Whether to re-estimate the bandwidth at each point +plotPlacebo.rdd_reg <- function(object, device = c("ggplot", "base"), from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, + vcov. = NULL, plot = TRUE, output = c("data", "ggplot"), ...) { + + device <- match.arg(device) + output <- match.arg(output) + + # compute Placebos: + seq_vals <- computePlacebo(object = object, from = from, to = to, by = by, level = level, same_bw = same_bw, vcov. = vcov.) + + ## Use low-level to plot: + plotPlacebo_low(seq_vals, device = device, plot = plot, output = output, ...) + + invisible(seq_vals) +} + + + +#' @export +plotPlacebo.PlaceboVals <- function(object, device = c("ggplot", "base"), plot = TRUE, output = c("data", "ggplot"), ...) { + + device <- match.arg(device) + output <- match.arg(output) + plotPlacebo_low(object, device = device, plot = plot, output = output, ...) + + invisible(object) +} + + +plotPlacebo_low <- function(seq_vals, device = c("ggplot", "base"), output = c("data", "ggplot"), plot = TRUE) { + + device <- match.arg(device) + output <- match.arg(output) + + if (device == "base") { + if (plot) { + ylims <- range(seq_vals[, c("CI_low", "CI_high")], na.rm = TRUE) + xlims <- range(seq_vals$cutpoint) + + dat_left <- subset(seq_vals, position == "left") + dat_right <- subset(seq_vals, position == "right") + dat_true <- subset(seq_vals, position == "True") + + plot(dat_left$cutpoint, dat_left$LATE, type = "l", ylab = "LATE", xlab = "Cutpoints", ylim = ylims, xlim = xlims) + title("Placebo test") + abline(h = 0) + + # left CI + lines(dat_left$cutpoint, dat_left$CI_low, lty = 2) + lines(dat_left$cutpoint, dat_left$CI_high, lty = 2) + + # right values: + lines(dat_right$cutpoint, dat_right$LATE, lty = 1) + lines(dat_right$cutpoint, dat_right$CI_low, lty = 2) + lines(dat_right$cutpoint, dat_right$CI_high, lty = 2) + + # add estimate at true cutoff + points(dat_true$cutpoint, dat_true$LATE, col = 2) + segments(dat_true$cutpoint, ylims[1] - 1, dat_true$cutpoint, dat_true$LATE, col = "red", lty = 2) ## vertical line + segments(xlims[1] - 1, dat_true$LATE, dat_true$cutpoint, dat_true$LATE, col = "red", lty = 2) + } + if (output != "data") + warning("output='ggplot' only makes sense with device='ggplot'") + } else { + seq_vals_placeb <- subset(seq_vals, position != "True") + seq_vals_true <- subset(seq_vals, position == "True") + + # hack for decent width of error bar: + last_left <- nrow(subset(seq_vals_placeb, position == "left")) + W <- diff(seq_vals_placeb[c(last_left, last_left + 1), "cutpoint"])/5 + + pl <- qplot(x = cutpoint, y = LATE, data = seq_vals_placeb, geom = "line", colour = position) + geom_smooth(aes(ymin = CI_low, + ymax = CI_high), data = seq_vals_placeb, stat = "identity") + theme(legend.position = "none") + geom_hline(yintercept = 0) + + geom_point(aes(x = cutpoint, y = LATE), data = seq_vals_true) + geom_errorbar(aes(ymin = CI_low, ymax = CI_high), + data = seq_vals_true, width = W) + if (plot) + print(pl) + } + + ## export (silently) results: + out <- switch(output, data = seq_vals, ggplot = pl) + invisible(out) +} + + +#' @rdname plotPlacebo +#' @export +plotPlaceboDens <- function(object, device = c("ggplot", "base"), ...) UseMethod("plotPlaceboDens") + +#' @rdname plotPlacebo +#' @export +plotPlaceboDens.rdd_reg <- function(object, device = c("ggplot", "base"), from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, + vcov. = NULL, ...) { + + device <- match.arg(device) + + # compute Placebos: + seq_vals <- computePlacebo(object = object, from = from, to = to, by = by, level = level, same_bw = same_bw, vcov. = vcov.) + + ## Use low-level to plot: + plotPlaceboDens_low(seq_vals, device = device) + + invisible(seq_vals) +} + + +#' @export +plotPlaceboDens.PlaceboVals <- function(object, device = c("ggplot", "base"), ...) { + + device <- match.arg(device) + plotPlaceboDens_low(object, device = device, ...) + + invisible(object) +} + + +plotPlaceboDens_low <- function(seq_vals, device = c("ggplot", "base")) { + + device <- match.arg(device) + seq_vals_placeb <- subset(seq_vals, position != "True") + perc_rejected <- 100 * mean(seq_vals_placeb$p_value < 0.05) + + + if (device == "base") { + stop("not implemented") + } else { + seq_vals_true <- subset(seq_vals, position == "True") + + dens_max <- max(density(seq_vals_placeb$LATE)$y) # not efficient.... + text_rej <- paste("Perc rejected:", perc_rejected, "%") + + + pl <- qplot(x = LATE, data = seq_vals_placeb, geom = "density") + geom_vline(xintercept = 0, lty = 2) + geom_vline(xintercept = seq_vals_true$LATE, + colour = "red") + annotate("text", x = seq_vals_true$LATE, y = dens_max, label = "LATE at true \ncutpoint ", colour = "red", + hjust = 1) + annotate("text", x = seq_vals_true$LATE, y = 0, label = text_rej, hjust = 1, vjust = 1) + print(pl) + } + + ## export (silently) results: + invisible(seq_vals) +} + + +#' @rdname plotPlacebo +#' @export + +computePlacebo <- function(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, vcov. = NULL) { + + bw <- getBW(object) + hasBw <- !is.null(bw) + if (!hasBw) + bw <- NA + + if (!is.null(vcov.) && !is.function(vcov.)) + stop("'arg' vcov. should be a function (so can be updated at each step, not a matrix") + cutpoint <- getCutpoint(object) + forc_var <- getOriginalX(object) + + ## set grid: + quants_left <- quantile(forc_var[forc_var < cutpoint], probs = c(from, to)) + quants_right <- quantile(forc_var[forc_var >= cutpoint], probs = c(from, to)) + + seqi_left <- seq(from = quants_left[1], to = quants_left[2], by = by) + seqi_right <- seq(from = quants_right[1], to = quants_right[2], by = by) + seqi <- c(seqi_left, seqi_right) + + n_seqi_left <- length(seqi_left) + n_seqi_right <- length(seqi_right) + n_seqi <- length(seqi) + + ## set matrix for results: + seq_vals <- matrix(NA, nrow = n_seqi, ncol = 8) + colnames(seq_vals) <- c("cutpoint", "position", "LATE", "se", "p_value", "CI_low", "CI_high", "bw") + seq_vals[, "cutpoint"] <- seqi + + ## get original call: + object_call <- getCall(object) + + ## original dataset: + dat_orig <- eval(object_call$rdd_object) + hasCov <- hasCovar(dat_orig) + + ## run each time: + for (i in seq_along(seqi)) { + + ## select sample + if (seqi[i] < cutpoint) { + dat_sides <- subset(dat_orig, x < cutpoint) + } else { + dat_sides <- subset(dat_orig, x > cutpoint) ## exclude x>cutpoint + } + + + ## change the cutpoint, reattribute new data: + attr(dat_sides, "cutpoint") <- seqi[i] + object_call$rdd_object <- dat_sides + + ## Change bw if(same_bw=FALSE) + if (hasBw) + object_call$bw <- if (!same_bw) + rdd_bw_ik(dat_sides) else bw + + ## Re-estimate model with new cutpoint/bw + object_new <- eval(object_call) # rdd_reg_np(dat_sides, bw=bw_reg) + + ## assign results (LATE and se) + if (!inherits(object_new, "try-error")) { + + # check if lmtest is installed + if (!requireNamespace("lmtest", quietly = TRUE)) { + stop("The package 'lmtest' is needed for this function to work. Please install it.", call. = FALSE) + } + + # load the lmtest package require('lmtest') + + seq_vals[i, "LATE"] <- rdd_coef(object_new) + if (!is.null(vcov.)) { + co <- lmtest::coeftest(object_new, vcov. = vcov.)["D", , drop = FALSE] + } else { + co <- rdd_coef(object_new, allInfo = TRUE) + } + seq_vals[i, "se"] <- co[, "Std. Error"] + seq_vals[i, "p_value"] <- co[, 4] + seq_vals[i, "bw"] <- getBW(object_new, force.na = TRUE) + seq_vals[i, c("CI_low", "CI_high")] <- waldci(object_new, level = level, vcov. = vcov.)["D", ] ## confint version working with vcov. + } + } + + + ## Add midpoint: + if (!is.null(vcov.)) { + true_co <- coeftest(object, vcov. = vcov.)["D", , drop = FALSE] + } else { + true_co <- rdd_coef(object, allInfo = TRUE) + } + true_confint <- as.numeric(waldci(object, level = level, vcov. = vcov.)["D", ]) + true <- data.frame(cutpoint = cutpoint, position = "True", LATE = rdd_coef(object), se = true_co["D", "Std. Error"], p_value = true_co["D", + 4], CI_low = true_confint[1], CI_high = true_confint[2], bw = bw) + + + ## output + seq_vals <- as.data.frame(seq_vals) + seq_vals$position <- ifelse(seq_vals$cutpoint < cutpoint, "left", "right") + + seq_vals <- rbind(seq_vals, true) + seq_vals <- seq_vals[order(seq_vals$cutpoint), ] + rownames(seq_vals) <- seq_len(nrow(seq_vals)) + + + # seq_vals$position <- if(seq_vals$cutpoint == cutpoint) 'True' + + class(seq_vals) <- c("PlaceboVals", "data.frame") + return(seq_vals) +} diff --git a/R/plotBin.R b/R/plotBin.R new file mode 100644 index 0000000..38de8e4 --- /dev/null +++ b/R/plotBin.R @@ -0,0 +1,124 @@ +#' Bin plotting +#' +#' Do a 'scatterplot bin smoothing' +#' +#' @param x Forcing variable +#' @param y Output +#' @param h the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)}) +#' @param nbins number of Bins +#' @param cutpoint Cutpoint +#' @param plot Logical. Whether to plot or only returned silently +#' @param type Whether returns the y averages, or the x frequencies +#' @param xlim,cex,main,xlab,ylab Usual parameters passed to plot(), see \code{\link{par}} +#' @param \ldots further arguments passed to plot. +#' @return Returns silently values +#' @references McCrary, Justin. +#' @importFrom utils head + + +plotBin <- function(x, y, h = NULL, nbins = NULL, cutpoint = 0, plot = TRUE, type = c("value", "number"), xlim = range(x, na.rm = TRUE), + cex = 0.9, main = NULL, xlab, ylab, ...) { + + if(sum(c(is.null(h), is.null(nbins)))!=1) stop("Should provide only one of `h`` or `nbins`") + + type <- match.arg(type) + x_name <- if (missing(xlab)) + deparse(substitute(x)) else xlab + y_name <- if (missing(ylab)) + deparse(substitute(y)) else ylab + + + ## Set intervals and midpoints + min_x <- min(xlim) + max_x <- max(xlim) + + ## set h given nBins + if (!is.null(nbins)) { + if(length(nbins)==1){ + h_both <- diff(xlim)/nbins + + ## compute actual number of bins + K0 <- (cutpoint - min_x)/h_both + K1 <- (max_x -cutpoint )/h_both + + ## round number of bins + nbins <- roundEqual(c(K0, K1)) + } + + ## compute corresponding h_L + K0 <- nbins[1] + K1 <- nbins[2] + h_L <- c(cutpoint - min_x)/K0 + h_R <- c(max_x -cutpoint)/K1 + + } else if(!is.null(h)) { + if(length(h)==1){ + h_L <- h_R <- h + } else { + h_L <- h[1] + h_R <- h[2] + } + K0 <- ceiling((cutpoint - min_x)/h_L) # Number of bins on left + K1 <- ceiling((cutpoint + max_x)/h_R) # Number of bins on right + } + + ## + K <- K0 + K1 + + ## get bins midpoints, breaks, inspired by # Lee and Lemieux (2010) p. 308 + breaks_L <- cutpoint - (K0 - c(1:K0) + 1) * h_L + breaks_H <- cutpoint + c(0:K1) * h_R + breaks <- c(breaks_L, breaks_H) + + # mid_points + mid_points_bk <- head(breaks, -1)+diff(breaks)/2 + + ## compute output (mean of count) + intervs <- cut(x, breaks = breaks, include.lowest = TRUE) + if(any(is.na(intervs))) warning("NA intervs...") + + ## + table_intervs <- table(intervs) + n_non0_intervs <- sum(table_intervs != 0) + + y2 <- switch(type, value = tapply(y, intervs, mean, na.rm = TRUE), + number = table_intervs) + + + ## plot + if (plot) { + sub <- paste("h=", paste(round(c(h_L, h_R), 4),collapse="/"), ",\t\tn bins=", K, " (", K0, "/", K1,")", sep = "") + plot(mid_points_bk, as.numeric(y2), pch = 19, cex = cex, xlab = x_name, ylab = y_name, xlim = xlim, ...) + title(main = main, sub = sub) + abline(v = cutpoint, lty = 2) + } + + ## return invisible result + res <- data.frame(x = mid_points_bk, y = y2) + invisible(res) +} + + + +## Small utility funciton +roundEqual <- function(x){ + if(isTRUE(all.equal(x[1], x[2]))) { + r <- c(floor(x[1]), ceiling(x[2])) + } else { + r <- round(x) + } + r +} + + +if(FALSE){ + xt <- rnorm(100) + yt <- 1.2*x+rnorm(100) + plotBin(x=xt, y=yt) + plotBin(x=xt, y=yt, h=.05) + plotBin(x=xt, y=yt, h=c(0.05, 0.06)) + + pl_nb1 <- plotBin(x=xt, y=yt, nbins=25) + pl_nb2 <- plotBin(x=xt, y=yt, nbins=c(12, 13)) + pl_nb2 +} diff --git a/R/plotSensi.R b/R/plotSensi.R new file mode 100644 index 0000000..093a68e --- /dev/null +++ b/R/plotSensi.R @@ -0,0 +1,232 @@ +#' Plot the sensitivity to the bandwidth +#' +#' Draw a plot showing the LATE estimates depending on multiple bandwidths +#' +#' @param rdd_regobject object of a RDD regression, from either \code{\link{rdd_reg_lm}} or \code{\link{rdd_reg_np}} +#' @param from First bandwidth point. Default value is max(1e-3, bw-0.1) +#' @param to Last bandwidth point. Default value is bw+0.1 +#' @param by Increments in the \code{from} \code{to} sequence +#' @param level Level of the confidence interval +#' @param order For parametric models (from \code{\link{rdd_reg_lm}}), the order of the polynomial. +#' @param type For parametric models (from \code{\link{rdd_reg_lm}}) whether different orders are represented as different colour or as different facets. +#' @param device Whether to draw a base or a ggplot graph. +#' @param output Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object +#' @param plot Whether to actually plot the data. +#' @param \ldots Further arguments passed to specific methods +#' @return A data frame containing the bandwidths and corresponding estimates and confidence intervals. +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @import methods +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' +#' #Non-parametric estimate +#' bw_ik <- rdd_bw_ik(house_rdd) +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik) +#' plotSensi(reg_nonpara) +#' plotSensi(reg_nonpara, device='base') +#' +#' #Parametric estimate: +#' reg_para_ik <- rdd_reg_lm(rdd_object=house_rdd, order=4, bw=bw_ik) +#' plotSensi(reg_para_ik) +#' plotSensi(reg_para_ik, type='facet') + + + +################################### plotSensi: function to plot sensitivity to bandwidth + +#' @export +plotSensi <- function(rdd_regobject, from, to, by = 0.01, level = 0.95, output = c("data", "ggplot"), plot = TRUE, ...) UseMethod("plotSensi") + +#' @rdname plotSensi +#' @export +#' @param vcov. Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}} +plotSensi.rdd_reg_np <- function(rdd_regobject, from, to, by = 0.05, level = 0.95, output = c("data", "ggplot"), plot = TRUE, + device = c("ggplot", "base"), vcov. = NULL, ...) { + + device <- match.arg(device) + output <- match.arg(output) + if (!is.null(vcov.) && !is.function(vcov.)) + stop("'arg' vcov. should be a function (so can be updated at each step, not a matrix") + if (device == "base" && output == "ggplot") + stop("Arg 'output=ggplot' only relevant for 'device=ggplot'") + + object <- rdd_regobject + bw <- getBW(object) + est <- rdd_coef(object) + + ## set grid: + if (missing(from)) + from <- max(0.001, bw - 0.1) + if (missing(to)) + to <- bw + 0.1 + + seq_bw <- unique(sort(c(bw, seq(from = from, to = to, by = by)))) + n_seq_bw <- length(seq_bw) + + ## set matrix for results: + seq_vals <- matrix(NA, nrow = n_seq_bw, ncol = 6) + colnames(seq_vals) <- c("bw", "LATE", "se", "p_value", "CI_low", "CI_high") + seq_vals[, "bw"] <- seq_bw + + ## get call: + object_call <- getCall(object) + + ## run each time: + for (i in seq_along(seq_bw)) { + object_call$bw <- seq_bw[i] + object_new <- try(eval(object_call), silent = TRUE) + if (!inherits(object_new, "try-error")) { + seq_vals[i, "LATE"] <- rdd_coef(object_new) + if (!is.null(vcov.)) { + co <- coeftest(object_new, vcov. = vcov.)["D", , drop = FALSE] + } else { + co <- rdd_coef(object_new, allInfo = TRUE) + } + seq_vals[i, "se"] <- co[, "Std. Error"] + seq_vals[i, "p_value"] <- co[, 4] + seq_vals[i, c("CI_low", "CI_high")] <- waldci(object_new, level = level, vcov. = vcov.)["D", ] ## confint version working with vcov. + } + } + + + ## plot results: + seq_vals <- as.data.frame(seq_vals) + if (device == "base" && plot) { + ra <- range(seq_vals[, c("CI_low", "CI_high")], na.rm = TRUE) + plot(seq_vals[, "bw"], seq_vals[, "LATE"], type = "l", ylab = "LATE", xlab = "bandwidth", ylim = ra) + title("Sensitivity to bandwidth choice") + lines(seq_bw, seq_vals[, "CI_low"], lty = 2) + lines(seq_bw, seq_vals[, "CI_high"], lty = 2) # + + + ## add optim in case: + points(bw, est, col = "red") + segments(bw, 0, bw, est, col = "red", lty = 2) + segments(0, est, bw, est, col = "red", lty = 2) + } else { + sensPlot <- qplot(x = bw, y = LATE, data = seq_vals, geom = "line") + sensPlot <- sensPlot + geom_smooth(aes(ymax = CI_high, ymin = CI_low), data = seq_vals, stat = "identity") # add the conf int + point.df <- data.frame(bw = bw, LATE = est) + sensPlot <- sensPlot + geom_point(data = point.df) # add the conf int + sensPlot <- sensPlot + geom_vline(xintercept = 0, lty = 2) + if (plot) + print(sensPlot) + } + + ## export (silently) results: + out <- switch(output, data = seq_vals, ggplot = sensPlot) + invisible(out) +} + + +#' @rdname plotSensi +#' @export +plotSensi.rdd_reg_lm <- function(rdd_regobject, from, to, by = 0.05, level = 0.95, output = c("data", "ggplot"), plot = TRUE, + order, type = c("colour", "facet"), ...) { + + type <- match.arg(type) + output <- match.arg(output) + object <- rdd_regobject + est <- rdd_coef(object) + bw <- getBW(object) + origOrder <- getOrder(object) + hasBw <- !is.null(bw) + if (!hasBw & type == "facet") + stop("Arg 'type=facet' works only when the parametric regression was estimated with a bandwidth") + + ## set grid: + if (hasBw) { + if (missing(from)) + from <- max(0.001, bw - 0.1) + if (missing(to)) + to <- bw + 0.1 + + seq_bw <- unique(sort(c(bw, seq(from = from, to = to, by = by)))) + n_seq_bw <- length(seq_bw) + } else { + if (!all(c(missing(from), missing(to)))) + warning("Args 'from' and 'to' not considered since original input has no bw") + n_seq_bw <- 1 + seq_bw <- NULL + } + + if (missing(order)) + order <- 0:(getOrder(rdd_regobject) + 2) + seq_ord <- order + n_seq_ord <- length(seq_ord) + + ## set matrix for results: + seq_vals <- matrix(NA, nrow = n_seq_bw * n_seq_ord, ncol = 6) + colnames(seq_vals) <- c("bw", "order", "LATE", "se", "CI_low", "CI_high") + + ## get call: + object_call <- attr(object, "RDDcall") + + ## guess if obtained with ikbandwidth? (trick: call$bw would be empty) is_ikband <- is.null(object_call$bw) + + ## run each time: + for (j in 1:length(seq_ord)) { + for (i in 1:n_seq_bw) { + # assign new order/bw, and estimate: + object_call$bw <- seq_bw[i] + object_call$order <- seq_ord[j] + object_new <- try(eval(object_call), silent = TRUE) + + # put parameters bw/order into matrix: + seq_vals[i + (j - 1) * n_seq_bw, "bw"] <- if (is.null(seq_bw[i])) + NA else seq_bw[i] + seq_vals[i + (j - 1) * n_seq_bw, "order"] <- seq_ord[j] + + # put output estim/se into matrix: + if (!inherits(object_new, "try-error")) { + co <- rdd_coef(object_new, allInfo = TRUE) + seq_vals[i + (j - 1) * n_seq_bw, "LATE"] <- co[, 1] + seq_vals[i + (j - 1) * n_seq_bw, "se"] <- co[, 2] + } else { + warning("Problem evaluating model with new bw=", object_call$bw, " and new order=", object_call$order, ".") + } + } + } + + + + ## compute intervals: + probs <- (1 - level)/2 + probs <- c(probs, 1 - probs) + quants <- qnorm(probs) + seq_vals[, "CI_low"] <- seq_vals[, "LATE"] + quants[1] * seq_vals[, "se"] + seq_vals[, "CI_high"] <- seq_vals[, "LATE"] + quants[2] * seq_vals[, "se"] + + + ## plot results: + seq_vals_df <- as.data.frame(seq_vals) + rownames(seq_vals_df) <- 1:nrow(seq_vals_df) + if (hasBw) + seq_vals_df$order <- as.factor(seq_vals_df$order) + + + if (type == "colour") { + if (hasBw) { + est_point <- data.frame(bw = bw, LATE = est, order = as.factor(origOrder)) + sensPlot <- qplot(x = bw, y = LATE, data = seq_vals_df, colour = order, geom = "line") + geom_point(data = est_point) + + geom_smooth(aes(ymin = CI_low, ymax = CI_high), data = seq_vals_df, stat = "identity") + } else { + est_point <- data.frame(LATE = est, order = origOrder) + sensPlot <- qplot(x = order, y = LATE, data = seq_vals_df, geom = "line") + geom_point(data = est_point) + geom_smooth(aes(ymin = CI_low, + ymax = CI_high), data = seq_vals_df, stat = "identity") + } + } else { + sensPlot <- qplot(x = bw, y = LATE, data = seq_vals_df, geom = "line") + facet_grid(order ~ .) + geom_smooth(aes(ymin = CI_low, + ymax = CI_high), data = seq_vals_df, stat = "identity") + } + + if (plot) + print(sensPlot) + + + + ## export (silently) results: + out <- switch(output, data = seq_vals_df, ggplot = sensPlot) + invisible(out) +} diff --git a/R/qplot_experim.R b/R/qplot_experim.R new file mode 100644 index 0000000..58dad24 --- /dev/null +++ b/R/qplot_experim.R @@ -0,0 +1,63 @@ + + +gplot <- function(x, h, xlim = range(object$x, na.rm = TRUE), cex = 0.7, nplot = 3, type = c("base", "ggplot"), ...) { + object <- x + cutpoint <- getCutpoint(object) + + ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) + if (missing(h)) { + if (!all(xlim == range(object$x, na.rm = TRUE))) { + object <- subset(object, object$x > min(xlim) & object$x < max(xlim)) + } + h <- rdd_bw_rsw(object) + if (is_even(nplot)) { + se <- seq(from = 1 - (sum(1:nplot < (nplot/2))) * 0.2, to = 1 + (sum(1:nplot > (nplot/2))) * 0.2, by = 0.2) + } else { + se <- seq(from = 1 - floor(nplot/2) * 0.2, to = 1 + floor(nplot/2) * 0.2, by = 0.2) + } + hs <- if (nplot == 1) + h else se * h + } else { + if (length(h) == 1) { + if (is_even(nplot)) { + se <- seq(from = 1 - (sum(1:nplot < (nplot/2))) * 0.2, to = 1 + (sum(1:nplot > (nplot/2))) * 0.2, by = 0.2) + } else { + se <- seq(from = 1 - floor(nplot/2) * 0.2, to = 1 + floor(nplot/2) * 0.2, by = 0.2) + } + hs <- if (nplot == 1) + h else se * h + } else { + if (length(h == nplot)) { + hs <- h + } else { + stop("Length of h should be either one or equal to nplot (", nplot, ")") + } + } + } + + + + + ## plot + if (type == "base") { + par_orig <- par() + par(mfrow = c(nplot, 1)) + for (i in 1:nplot) { + plotBin(x = object$x, y = object$y, cutpoint = cutpoint, h = hs[i], xlim = xlim, cex = cex) + } + } else { + + plotBin_out <- plotBin(x = object$x, y = object$y, cutpoint = cutpoint, h = hs[1], xlim = xlim, cex = cex, plot = FALSE) + plotBin_out$h <- rep(hs[1], nrow(plotBin_out)) + for (i in 2:nplot) { + new <- plotBin(x = object$x, y = object$y, cutpoint = cutpoint, h = hs[i], xlim = xlim, cex = cex) + new$h <- rep(hs[i], nrow(new)) + plotBin_out <- rbind(plotBin_out, new) + } + + plotBin_out$h <- round(plotBin_out$h, 4) + qplot(x = x, y = y, data = plotBin_out) + facet_grid(h ~ .) + + } + +} diff --git a/R/rdd_coef.R b/R/rdd_coef.R new file mode 100644 index 0000000..a8fb39c --- /dev/null +++ b/R/rdd_coef.R @@ -0,0 +1,36 @@ +#' RDD coefficient +#' +#' Function to access the RDD coefficient in the various regressions +#' @param object A RDD regression object +#' @param allInfo whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval. +#' @param allCo Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients +#' @param \ldots Further arguments passed to/from specific methods +#' @return Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, +#' its standard value, t test and p-value and +#' @export + + +rdd_coef <- function(object, allInfo = FALSE, allCo = FALSE, ...) UseMethod("rdd_coef") + +#' @rdname rdd_coef +#' @export +rdd_coef.default <- function(object, allInfo = FALSE, allCo = FALSE, ...) { + res <- coef(summary(object)) + if (!allCo) + res <- res["D", , drop = FALSE] + if (!allInfo) + res <- res[, "Estimate"] + res +} + +#' @rdname rdd_coef +#' @export +rdd_coef.rdd_reg_np <- function(object, allInfo = FALSE, allCo = FALSE, ...) { + res <- object$coefMat + if (!allCo) + res <- res["D", , drop = FALSE] + if (!allInfo) + res <- res[, "Estimate"] + res +} + diff --git a/R/rdd_data.R b/R/rdd_data.R new file mode 100644 index 0000000..6741c4d --- /dev/null +++ b/R/rdd_data.R @@ -0,0 +1,197 @@ +#' Construct rdd_data +#' +#' Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. +#' +#' @param x Forcing variable +#' @param y Output +#' @param covar Exogeneous variables +#' @param cutpoint Cutpoint +#' @param labels Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently. +#' @param data A data-frame for the \code{x} and \code{y} variables. If this is provided, +#' the column names can be entered directly for argument \code{x}, \code{y} and \code{covar}. +#' For \code{covar}, should be a character vector. +#' @param z Assignment variable for the fuzzy case. Should be 0/1 or TRUE/FALSE variable. +#' @details Arguments \code{x}, \code{y} (and eventually \code{covar}) can be either given as: +#' * vectors (eventually data-frame for \code{covar}) +#' * quote/character when \code{data} is also provided. For multiple \code{covar}, use a vector of characters +#' @return Object of class \code{rdd_data}, inheriting from \code{data.frame} +#' @author Matthieu Stigler \email{Matthieu.Stigler@@gmail.com} +#' @export +#' @examples +#' data(house) +#' rd <- rdd_data(x=house$x, y=house$y, cutpoint=0) +#' rd2 <- rdd_data(x=x, y=y, data=house, cutpoint=0) +#' +#' # The print() function is the same as the print.data.frame: +#' rd +#' +#' # The summary() and plot() function are specific to rdd_data +#' summary(rd) +#' plot(rd) +#' +#' # for the fuzzy case, you need to specify the assignment variable z: +#' rd_dat_fakefuzzy <- rdd_data(x=house$x, y=house$y, +#' z=ifelse(house$x>0+rnorm(nrow(house), sd=0.05),1,0), +#' cutpoint=0) +#' summary(rd_dat_fakefuzzy) +#' @md + +rdd_data <- function(y, x, covar, cutpoint, z, labels, data) { + + + ## check args + type <- ifelse(missing(z), "Sharp", "Fuzzy") + hasCovar <- !missing(covar) + if (missing(cutpoint)) + stop("Please provide cutpoint") + covar_nam <- deparse(substitute(covar)) + + ## Use data in case: + if (!missing(data)) { + pf <- parent.frame() + x <- eval(substitute(x), data, enclos = pf) # copy from with.default + y <- eval(substitute(y), data, enclos = pf) # copy from with.default + if (hasCovar) { + ## make sure it's not already a df!? + class_robust <- try(class(eval(substitute(covar))), silent=TRUE) + if(inherits(class_robust, "try-error")) class_robust <- "quote" + + if(any(c("data.frame", "numeric") %in% class_robust)) { + covar_df <- covar + } else { + ## copy code from subset.data.frame + nl <- as.list(seq_along(data)) + names(nl) <- names(data) + covar_index <- eval(substitute(covar), nl, parent.frame()) + + covar_df <- data[,covar_index, drop=FALSE] + } + } + } + + if (missing(data) & hasCovar) covar_df <- covar + + + ### Check y, x univariate + k_y <- NCOL(y) + k_x <- NCOL(x) + + if (any(!c(k_y, k_x) == 1)) + stop("y or x should be univariate") + + ### Check y, x, z same size + n_y <- NROW(y) + n_x <- NROW(x) + n_covar <- if (hasCovar) + NROW(x) else NULL + + if (any(c(n_y, n_x) != n_covar)) + stop("y or x should be univariate") + + ### Check cutpoint + range_x <- range(x, na.rm = TRUE) + if (cutpoint < range_x[1] | cutpoint > range_x[2]) + stop("Cutpoint outside range of x") + + ## Check labels + if (!missing(labels)) { + if (!is.list(labels)) + stop("labels should be a list.") + if (is.null(names(labels)) || !all(names(labels) %in% c("x", "y", "covar"))) + stop("labels should be a list with components x, and/or y, and/or covar") + if (hasCovar) { + if ("covar" %in% names(labels) && length(labels$covar) != NCOL(covar_df)) + stop("There should be ", NCOL(covar_df), " values (dim of covar) for component 'covar' in labels") + } + } else { + labels <- list() + } + + # if(is.null(labels$x)) labels$x <- deparse(substitute(x)) if(is.null(labels$y)) labels$y <- deparse(substitute(y)) + # if(hasCova && is.null(labels$covar)) labels$covar <- if(NCOL(covar)==1) names(deparse(substitute(y)) + + ## Assemble data + rdd_dat <- data.frame(x = x, y = y) + if (hasCovar) { + rdd_dat <- cbind(rdd_dat, covar_df) + if (NCOL(covar_df) == 1 && is.null(colnames(covar_df))) + colnames(rdd_dat)[3] <- covar_nam + } + + if (type == "Fuzzy") { + rdd_dat <- cbind(rdd_dat, z) + } + + ## return + class(rdd_dat) <- c("rdd_data", "data.frame") + attr(rdd_dat, "hasCovar") <- hasCovar + attr(rdd_dat, "labels") <- labels + attr(rdd_dat, "cutpoint") <- cutpoint + attr(rdd_dat, "type") <- type + + rdd_dat +} + + +### Specific subsetting methods + +# as.data.frame.rdd_data <- function(x) { subset(x, y> }as.data.frame.default(x) + +#' @export +"[.rdd_data" <- function(x, i, ...) { + attr_x <- attributes(x) + r <- NextMethod("[", object = as.data.frame(x)) + + ## keep attributes only if remains a data frame! + if (inherits(r, "data.frame")) { + attr_x$row.names <- attr(r, "row.names") + attr_x$names <- attr(r, "names") + mostattributes(r) <- attr_x + attributes(r) <- attributes(r)[match(names(attr_x), names(attributes(r)))] + } + # newCla <- class(r) if(any(grepl('rdd_data', newCla))) newCla <- newCla[-grepl('rdd_data', newCla)] + # print(names(attributes(newCla))) if(!inherits(newCla, 'data.frame')) attr(r, 'class')[which(attr(r, + # 'class')=='data.frame')] <- newCla + r +} + +#' @export +subset.rdd_data <- function(x, subset, select, drop = FALSE, ...) { + attr_x <- attributes(x) + + ### subset code: start + if (missing(subset)) + r <- TRUE else { + e <- substitute(subset) + r <- eval(e, x, parent.frame()) + if (!is.logical(r)) + stop("'subset' must evaluate to logical") + r <- r & !is.na(r) + } + if (missing(select)) + vars <- TRUE else { + nl <- as.list(seq_along(x)) + names(nl) <- names(x) + vars <- eval(substitute(select), nl, parent.frame()) + } + res <- x[r, vars, drop = drop] + ### subset code: end r <- subset.data.frame(x,...) r <- NextMethod('subset') + + ## keep attributes only if remains a data frame! + if (inherits(r, "data.frame")) { + attr_x$row.names <- attr(res, "row.names") + attr_x$names <- attr(res, "names") + mostattributes(res) <- attr_x + attributes(res) <- attributes(res)[match(names(attr_x), names(attributes(res)))] + } + res +} + +#' @export +as.data.frame.rdd_data <- function(x, ...) { + class(x) <- "data.frame" + attr(x, "hasCovar") <- NULL + attr(x, "labels") <- NULL + attr(x, "cutpoint") <- NULL + x +} diff --git a/R/rdd_data_methods.R b/R/rdd_data_methods.R new file mode 100644 index 0000000..1a40c83 --- /dev/null +++ b/R/rdd_data_methods.R @@ -0,0 +1,150 @@ + + +### SUMMARY method +#' @export +summary.rdd_data <- function(object, ...) { + + cutpoint <- getCutpoint(object) + hasCovar_eng <- ifelse(hasCovar(object), "yes", "no") + cat("### rdd_data object ###\n") + cat("\nCutpoint:", cutpoint) + cat("\nType:", getType(object), "\n") + if(isFuzzy(object)) { + n_treat <- sum(object$z) + untr <- paste(", untreated:", nrow(object)-n_treat) + tr <- paste(", treated:", n_treat) + } + cat("Sample size:", "\n\t-Full :", nrow(object), + "\n\t-Left :", sum(object$x < cutpoint), if(isFuzzy(object)) untr else NULL, + "\n\t-Right:", sum(object$x >= cutpoint), if(isFuzzy(object)) tr else NULL) + cat("\nCovariates:", hasCovar_eng, "\n") +} + +#' Plot rdd_data +#' +#' Binned plot of the forcing and outcome variable +#' +#' @param x Object of class rdd_data +#' @param h The binwidth parameter (note this differs from the bandwidth parameter!) +#' @param nbins Alternative to h, the total number of bins in the plot. +#' @param xlim The range of the x data +#' @param cex Size of the points, see \code{\link{par}} +#' @param nplot Number of plot to draw +#' @param device Type of device used. Currently not used. +#' @param \ldots Further arguments passed to the \code{\link{plot}} function. +#' @return A plot +#' @details Produces a simple binned plot averaging values within each interval. The length of the intervals +#' is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth +#' argument, that gives half of the length of the kernel window. +#' When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{rdd_bw_rsw}}. +#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> +#' @export +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' plot(house_rdd) +#' +#' ## Specify manually the bandwidth: +#' plot(house_rdd, h=0.2) +#' +#' ## Show three plots with different bandwidth: +#' plot(house_rdd, h=c(0.2,0.3,0.4), nplot=3) +#' +#' ## Specify instead of the bandwidth, the final number of bins: +#' plot(house_rdd, nbins=22) +#' +#' ## If the specified number of bins is odd, the larger number is given to side with largest range +#' plot(house_rdd, nbins=21) + + +### PLOT method +plot.rdd_data <- function(x, h=NULL, nbins = NULL, xlim = range(object$x, na.rm = TRUE), cex = 0.7, nplot = 1, device = c("base", + "ggplot"), ...) { + + object <- x + cutpoint <- getCutpoint(object) + device <- match.arg(device) + + ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) + if (is.null(h) & is.null(nbins)) { + if (!all(xlim == range(object$x, na.rm = TRUE))) { + object <- subset(object, x > min(xlim) & x < max(xlim)) + } + h <- rdd_bw_rsw(object) + if (is_even(nplot)) { + se <- seq(from = 1 - (sum(1:nplot < (nplot/2))) * 0.2, to = 1 + (sum(1:nplot > (nplot/2))) * 0.2, by = 0.2) + } else { + se <- seq(from = 1 - floor(nplot/2) * 0.2, to = 1 + floor(nplot/2) * 0.2, by = 0.2) + } + hs <- ifelse(nplot == 1, h, se * h) + } else if (!is.null(h) & is.null(nbins)) { + if (length(h) == 1) { + if (is_even(nplot)) { + se <- seq(from = 1 - (sum(1:nplot < (nplot/2))) * 0.2, to = 1 + (sum(1:nplot > (nplot/2))) * 0.2, by = 0.2) + } else { + se <- seq(from = 1 - floor(nplot/2) * 0.2, to = 1 + floor(nplot/2) * 0.2, by = 0.2) + } + hs <- ifelse(nplot == 1, h, se * h) + } else { + if (length(h == nplot)) { + hs <- h + } else { + stop("Length of h should be either one or equal to nplot (", nplot, ")") + } + } + } else if (!is.null(nbins)) { + hs <- NULL + if (length(nbins) != nplot) { + stop("Length of nbins should be equal to nplot (", nplot, ")") + } + } + + + + + ## plot + + par_orig <- par() + par(mfrow = c(nplot, 1)) + for (i in 1:nplot) { + plotBin(x = object$x, y = object$y, cutpoint = cutpoint, h = hs[i], nbins = nbins[i], xlim = xlim, cex = cex, ...) + } + par(mfrow = c(1, 1)) + + + + ## invisible return: + invisible(object) +} + + + +#' Convert a rdd object to lm +#' @param x An object to convert to lm +#' @return An object of class \code{lm} +#' @seealso \code{\link{as.npreg}} which converts \code{rdd_reg} objects into \code{npreg} from package \code{np}. +#' @examples +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' reg_para <- rdd_reg_lm(rdd_object=house_rdd) +#' reg_para_lm <- as.lm(reg_para) +#' reg_para_lm +#' plot(reg_para_lm, which=4) +#' @export +as.lm <- function(x) UseMethod("as.lm") + + +as.lm_RDD <- function(x) { + + at_x <- attributes(x) + at_x[names(at_x) != "names"] <- NULL + class(x) <- "lm" + + x +} + +#' @export +as.lm.rdd_reg_np <- function(x) as.lm_RDD(x) + +#' @export +as.lm.rdd_reg <- function(x) as.lm_RDD(x) diff --git a/R/rdd_pred.R b/R/rdd_pred.R new file mode 100644 index 0000000..373c4b5 --- /dev/null +++ b/R/rdd_pred.R @@ -0,0 +1,179 @@ +#' RDD coefficient prediction +#' +#' Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{rdd_coef}}) +#' @param object A RDD regression object +#' @param covdata New data.frame specifying the values of the covariates, can have multiple rows. +#' @param se.fit A switch indicating if standard errors are required. +#' @param vcov. Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}} +#' @param newdata Another data on which to evaluate the x/D variables. Useful in very few cases. +#' @param stat The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them +#' @param weights Eventual weights for the averaging of the predicted values. +#' @details The function \code{rdd_pred} does a simple prediction of the RDD effect +#' \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)} +#' When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient, +#' shown in the outputs, or obtained with \code{\link{rdd_coef}}. If there were covariates, and if these covariates were estimated using the +#' \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e. +#' had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s). +#' \code{rdd_pred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with +#' the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}. +#' +#' In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat='mean'}. +#' Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases, +#' this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also! +#' +#' Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula: +#' \deqn{x_i \Omega x_i^{'}} +#' where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and +#' \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained +#' as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account. +#' @return Returns the predicted value(s), and, if se.fit=TRUE, their standard errors. +#' @export +#' @references Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024 +#' @examples +#' # Load data, add (artificial) covariates: +#' data(house) +#' n_Lee <- nrow(house) +#' z1 <- runif(n_Lee) +#' house_rdd <- rdd_data(y=y, x=x, data=house, covar=z1, cutpoint=0) +#' +#' # estimation without covariates: rdd_pred is the same than rdd_coef: +#' reg_para <- rdd_reg_lm(rdd_object=house_rdd) +#' +#' rdd_pred(reg_para) +#' rdd_coef(reg_para, allInfo=TRUE) +#' +#' # estimation with covariates: +#' reg_para_cov <- rdd_reg_lm(rdd_object=house_rdd, +#' covariates='z1', +#' covar.opt=list(slope='separate') ) +#' +#' # should obtain same result as with RDestimate +#' rdd_pred(reg_para_cov, covdata=data.frame(z1=0)) +#' +#' # evaluate at mean of z1 (as comes from uniform) +#' rdd_pred(reg_para_cov, covdata=data.frame(z1=0.5)) + +rdd_pred <- function(object, covdata, se.fit = TRUE, vcov. = NULL, newdata, stat = c("identity", "sum", "mean"), weights) { + + stat <- match.arg(stat) + + if (!missing(weights)) { + if (missing(covdata)) + stop("Arg 'weights' only useful with arg 'covdata'") + if (stat == "identity") + stop("Argument 'weights' not useful when arg: stat='identity'") + if (stat == "sum") { + warning("Providing weights for a sum makes little sense?!") + } + if (length(weights) != NROW(covdata)) + stop("Weights should be of the same length than covdata") + } + + x_call <- getCall(object) + hasCo <- hasCovar(object) + + if (is.null(x_call$covar.opt)) { + covar.slope <- "same" + covar.strat <- "include" + } else { + covar.slope <- ifelse(is.null(x_call$covar.opt$slope), "same", x_call$covar.opt$slope) + covar.strat <- ifelse(is.null(x_call$covar.opt$strategy), "include", x_call$covar.opt$strategy) + } + + + ## get original data structure: + mf <- model.frame(object)[1:2, -1] + if (any(grepl("\\(weights\\)", colnames(mf)))) + mf <- mf[, -grep("\\(weights\\)", colnames(mf))] + + ## Fill orig struc with 0/1 + if (missing(newdata)) { + which.D <- grep("^D$", colnames(mf)) + mf[, which.D] <- c(0, 1) ## set coeff of interest + mf[, -which.D] <- 0 ## remove others (not absolutely necessary actually) + newdata <- mf + } + + ## Merge covdata with newdata: + + if (!missing(covdata)) { + if (covar.strat == "residual") + stop("Do not provide 'covdata' if covariates were use with 'residual' strategy") + if (covar.slope == "separate") { + Nrow_cov <- nrow(covdata) + if (Nrow_cov > 1) + newdata <- newdata[c(1, rep(2, Nrow_cov)), ] + if (!is.null(rownames(covdata))) { + if ("1" %in% rownames(covdata)) + rownames(newdata)[1] <- "0" + rownames(newdata)[-1] <- rownames(covdata) + } else { + rownames(newdata) <- c(0, seq_len(Nrow_cov)) + } + colnames_cov <- colnames(covdata) + ind <- seq(from = 2, by = 2, length.out = Nrow_cov) + if (!all(colnames_cov %in% colnames(newdata))) + stop("Arg 'covdata' contains colnames not in the data") + newdata[2:nrow(newdata), paste(colnames(covdata), "D", sep = ":")] <- covdata + } + } + + multiN <- nrow(newdata) > 2 + + ## Merge and check no NAs + X_i <- as.matrix(cbind(1, newdata)) + if (any(is.na(X_i))) { + warning("data contains NA. Were removed") + X_i <- X_i[-apply(X_i, 1, function(x) any(is.na(x))), ] + } + + ## Set up variance matrix: X_i (X'X)^{-1} X_i' + if (is.null(vcov.)) + vcov. <- vcov(object) + X_inv <- vcov. + mat <- X_i %*% X_inv %*% t(X_i) + + ## preds: + + if (!multiN) { + pred_point <- drop(diff(X_i %*% rdd_coef(object, allCo = TRUE))) + if (se.fit) + pred_se <- sqrt(sum(c(diag(mat), -2 * mat[1, 2]))) + } else { + d <- X_i %*% coef(object) + + + Mat_SUM <- cbind(1, diag(nrow(d) - 1)) + Mat_DIAG <- matrix(diag(mat), ncol = 1) + if (missing(weights)) { + MAT_SmallSum <- matrix(c(-(nrow(d) - 1), rep(1, nrow(d) - 1)), nrow = 1) ## create vector: [- n-1, 1, 1, 1....] + } else { + MAT_SmallSum <- matrix(c(-1, weights), nrow = 1) ## create vector: [- 1, w_1, w_2, w_n] + } + + if (stat == "identity") { + Mat_DIFF <- Mat_SUM + Mat_DIFF[, 1] <- -1 + pred_point <- drop(Mat_DIFF %*% d) + if (se.fit) + pred_se <- drop(sqrt(Mat_SUM %*% Mat_DIAG - 2 * mat[1, 2:ncol(mat)])) + } else { + if (stat == "mean" & missing(weights)) + MAT_SmallSum <- MAT_SmallSum/Nrow_cov + pred_point <- drop(MAT_SmallSum %*% d) + if (se.fit) + pred_se <- drop(sqrt(MAT_SmallSum %*% mat %*% t(MAT_SmallSum))) + } + } + + + ## result: + if (se.fit) { + res <- list() + res$fit <- pred_point + res$se.fit <- pred_se + } else { + res <- pred_point + } + res +} diff --git a/R/rddtools.R b/R/rddtools.R new file mode 100644 index 0000000..495fa04 --- /dev/null +++ b/R/rddtools.R @@ -0,0 +1,86 @@ +#' @name rddtools +#' @docType package +#' @title Regression Discontinuity Design +#' @import np ggplot2 KernSmooth rdrobust +#' @description Set of functions for Regression Discontinuity Design ('RDD'), for data visualisation, estimation and testing. + +utils::globalVariables(c("x", "y", "position", "cutpoint", "LATE", "CI_low", "CI_high", "sd", "quantile", "ks.test", "t.test", "coef", "density")) +utils::globalVariables(c("abline", "as.formula", "coef density", "df.residual", "fitted", "glm", "hist", "ksmooth", +"lines", "lm", "model.frame", "model.matrix", "na.pass", "par", "pnorm", "points", "poly", +"predict", "printCoefmat", "qnorm", "qt", "rbeta", "residuals", "rnorm", "segments", "title", "var", "vcov")) + +#' @name indh +#' @docType data +#' @title INDH data set +#' @description Data from the Initiative Nationale du Development Humaine, collected as the part of the SNSF project "Development Aid and Social Dynamics" +#' @format A data frame with two variables with 720 observations each +#' @references Arcand, Rieger, and Nguyen (2015) 'Development Aid and Social Dyanmics Data Set' +#' @examples +#' # load the data +#' data(indh) +#' +#' # construct rdd_data frame +#' rdd_dat_indh <- rdd_data(y=choice_pg, x=poverty, data=indh, cutpoint=30) +#' +#' # inspect data frame +#' summary(rdd_dat_indh) +#' +#' # perform non-parametric regression +#' ( reg_np_indh <- rdd_reg_np(rdd_dat_indh) ) +NULL +#' @name house +#' @docType data +#' @title Dataset used in Lee (2008) +#' @description Randomized experiments from non-random selection in U.S. House elections +#' @description Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008), +#' @format A data frame with 6558 observations and two variables: +#' \describe{ +#' \item{x}{Vote at election t-1} +#' \item{y}{Vote at election t} +#' } +#' @source Guido Imbens webpage: \url{https://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} +#' @references Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,' +#' Review of Economic Studies (2012) 79, 933-959 +#' @references Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, +#' \emph{Journal of Econometrics}, 142, 675-697 +#' @examples +#' data(house) +#' rdd_house <- rdd_data(x=x, y=y, data=house, cutpoint=0) +#' summary(rdd_house) +#' plot(rdd_house) +NULL + + +#' @name STAR_MHE +#' @docType data +#' @title Transformation of the STAR dataset as used in Angrist and Pischke (2008) +#' @description Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) +#' @seealso \code{\link[AER]{STAR}} for the original dataset. +#' @format A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, +#' all other are created by Angrist and Pischke STAT code. +#' \describe{ +#' \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})} +#' \item{pscore}{The propensity score (computed by A & P)} +#' \item{classid}{The id of the class (computed by A & P)} +#' \item{cs}{Class size (computed by A & P)} +#' \item{female, nwhite}{Various covariates (computed by A & P)} +#' } +#' @details ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. +#' The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. +#' The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website, on the webstar.dta. +#' @references Krueger, A. (1999) 'Experimental Estimates Of Education Production Functions,' +#' \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May. +#' @references Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, +#' Princeton University press +#' @source Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website +#' @examples +#' data(STAR_MHE) +#' +#' # Compute the group means: +#' STAR_MHE_means <- aggregate(STAR_MHE[, c('classid', 'pscore', 'cs')], +#' by=list(STAR_MHE$classid), mean) +#' +#' # Regression of means, with weighted average: +#' reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs) +#' coef(summary(reg_krug_gls))[2,2] +NULL \ No newline at end of file diff --git a/R/reg_gen.R b/R/reg_gen.R new file mode 100644 index 0000000..d72acbb --- /dev/null +++ b/R/reg_gen.R @@ -0,0 +1,113 @@ +#' General polynomial estimator of the regression discontinuity +#' +#' Compute RDD estimate allowing a locally kernel weighted version of any estimation function +#' possibly on the range specified by bandwidth +#' @param rdd_object Object of class rdd_data created by \code{\link{rdd_data}} +#' @param covariates Formula to include covariates +#' @param order Order of the polynomial regression. +#' @param bw A bandwidth to specify the subset on which the kernel weighted regression is estimated +#' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} +#' @param slope Whether slopes should be different on left or right (separate), or the same. +#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). +#' @param fun The function to estimate the parameters +#' @param \ldots Further arguments passed to fun. See the example. +#' @details This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. +#' It is assumed that the custom funciton has following behaviour: +#' \enumerate{ +#' \item A formula interface, together with a \code{data} argument +#' \item A \code{weight} argument +#' \item A coef(summary(x)) returning a data-frame containing a column Estimate +#' } +#' Note that for the last requirement, this can be accomodated by writing a specific \code{\link{rdd_coef}} +#' function for the class of the object returned by \code{fun}. +#' @return An object of class rdd_reg_lm and class lm, with specific print and plot methods +#' @references TODO +#' @export rdd_gen_reg +#' @examples +#' ## Step 0: prepare data +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' +#' ## Estimate a local probit: +#' house_rdd$y <- with(house_rdd, ifelse(y= cutpoint - bw & dat$x <= cutpoint + bw, 1, 0) + } else if (!missing(weights)) { + weights <- weights + } else { + weights <- NULL + } + + ## Construct data + if (missing(weights)) + weights <- NULL + dat_step1 <- model.matrix(rdd_object, covariates = covariates, order = order, bw = bw, slope = slope, covar.opt = covar.opt) + + ## Regression + reg <- fun(y ~ ., data = dat_step1, weights = weights, ...) + + ## Return + RDDslot <- list() + RDDslot$rdd_data <- rdd_object + reg$RDDslot <- RDDslot + class(reg) <- c("rdd_reg_lm", "rdd_reg", class(reg)) + attr(reg, "PolyOrder") <- order + attr(reg, "cutpoint") <- cutpoint + attr(reg, "slope") <- slope + attr(reg, "RDDcall") <- match.call() + attr(reg, "bw") <- bw + reg +} + +rdd_gen_reg_old <- function(rdd_object, covariates = ".", bw = rdd_bw_ik(rdd_object), slope = c("separate", "same"), fun = glm, + ...) { + + slope <- match.arg(slope) + checkIsRDD(rdd_object) + if (!is.function(fun)) + stop("Arg 'fun' should be a function") + cutpoint <- getCutpoint(rdd_object) + + ## Construct data + dat <- as.data.frame(rdd_object) + + dat_step1 <- dat[, c("y", "x")] + dat_step1$x <- dat_step1$x - cutpoint + dat_step1$D <- ifelse(dat_step1$x >= 0, 1, 0) + if (slope == "separate") { + dat_step1$x_right <- dat_step1$x * dat_step1$D + } + + ### Weights + kernel_w <- Kernel_tri(dat_step1[, "x"], center = 0, bw = bw) + + ## Regression + reg <- fun(y ~ ., data = dat_step1, weights = kernel_w, ...) + + ## Return + class(reg) <- c("rdd_reg_gen", "rdd_reg", class(reg)) + attr(reg, "RDDcall") <- match.call() + attr(reg, "cutpoint") <- cutpoint + attr(reg, "bw") <- bw + reg +} diff --git a/R/reg_lm.R b/R/reg_lm.R new file mode 100644 index 0000000..4a6020a --- /dev/null +++ b/R/reg_lm.R @@ -0,0 +1,162 @@ +#' Parametric polynomial estimator of the regression discontinuity +#' +#' Compute a parametric polynomial regression of the ATE, +#' possibly on the range specified by bandwidth +#' @param rdd_object Object of class rdd_data created by \code{\link{rdd_data}} +#' @param covariates Formula to include covariates +#' @param order Order of the polynomial regression. +#' @param bw A bandwidth to specify the subset on which the parametric regression is estimated +#' @param covar.strat DEPRECATED, use covar.opt instead. +#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). +#' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} +#' @param slope Whether slopes should be different on left or right (separate), or the same. +#' @return An object of class rdd_reg_lm and class lm, with specific print and plot methods +#' @details This function estimates the standard \emph{discontinuity regression}: +#' \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon} +#' with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same, +#' i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}. +#' Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random. +#' Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}: +#' \describe{ +#' \item{include}{Covariates are simply added as supplementary regressors in the RD equation} +#' \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}} +#' The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble +#' the non-parametric local kernel \code{\link{rdd_reg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}). +#' +#' The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R, +#' heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations +#' a specific function \code{\link{clusterInf}} is provided. +#' @import Formula +#' @importFrom AER ivreg +#' @export +#' @examples +#' ## Step 0: prepare data +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' ## Step 2: regression +#' # Simple polynomial of order 1: +#' reg_para <- rdd_reg_lm(rdd_object=house_rdd) +#' print(reg_para) +#' plot(reg_para) +#' +#' # Simple polynomial of order 4: +#' reg_para4 <- rdd_reg_lm(rdd_object=house_rdd, order=4) +#' reg_para4 +#' plot(reg_para4) +#' +#' # Restrict sample to bandwidth area: +#' bw_ik <- rdd_bw_ik(house_rdd) +#' reg_para_ik <- rdd_reg_lm(rdd_object=house_rdd, bw=bw_ik, order=4) +#' reg_para_ik +#' plot(reg_para_ik) + + +rdd_reg_lm <- function(rdd_object, covariates = NULL, order = 1, bw = NULL, + slope = c("separate", "same"), + covar.opt = list(strategy = c("include", "residual"), + slope = c("same", "separate"), + bw = NULL), + covar.strat = c("include", "residual"), weights) { + + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + type <- getType(rdd_object) + + slope <- match.arg(slope) + + if (!missing(covar.strat)) + stop("covar.strat is deprecated, use covar.opt = list(strategy=...) instead") + if (!missing(weights) & !is.null(bw)) + stop("Cannot give both 'bw' and 'weights'") + + ## Subsetting + dat <- as.data.frame(rdd_object) + + if (!is.null(bw)) { + weights <- ifelse(dat$x >= cutpoint - bw & dat$x <= cutpoint + bw, 1, 0) + } else if (!missing(weights)) { + weights <- weights + } else { + weights <- NULL + } + + ## Construct data + if (missing(weights)) + weights <- NULL + dat_step1 <- model.matrix(rdd_object, covariates = covariates, order = order, bw = bw, slope = slope, covar.opt = covar.opt) + + ## Regression + if (type == "Sharp") { + reg <- lm(y ~ ., data = dat_step1, weights = weights) + class_reg <- "lm" + } else { + if (!is.null(covariates)) + stop("Covariates currently not implemented for Fuzzy case") + reg <- ivreg(y ~ . - ins | . - D, data = dat_step1, weights = weights) + class_reg <- "ivreg" + } + + + ## Return + RDDslot <- list() + RDDslot$rdd_data <- rdd_object + reg$RDDslot <- RDDslot + class(reg) <- c("rdd_reg_lm", "rdd_reg", class_reg) + attr(reg, "PolyOrder") <- order + attr(reg, "cutpoint") <- cutpoint + attr(reg, "slope") <- slope + attr(reg, "RDDcall") <- match.call() + attr(reg, "bw") <- bw + reg +} + + +#' @export +print.rdd_reg_lm <- function(x, ...) { + + order <- getOrder(x) + cutpoint <- getCutpoint(x) + slope <- getSlope(x) + bw <- getBW(x) + hasBw <- !is.null(bw) + bw2 <- if (hasBw) + bw else Inf + + x_var <- getOriginalX(x) + n_left <- sum(x_var >= cutpoint - bw2 & x_var < cutpoint) + n_right <- sum(x_var >= cutpoint & x_var <= cutpoint + bw2) + + cat("### RDD regression: parametric ###\n") + cat("\tPolynomial order: ", order, "\n") + cat("\tSlopes: ", slope, "\n") + if (hasBw) + cat("\tBandwidth: ", bw, "\n") + cat("\tNumber of obs: ", sum(n_left + n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep = "") + + cat("\n\tCoefficient:\n") + + printCoefmat(coef(summary(x))[2, , drop = FALSE]) + +} + + +#' @export +plot.rdd_reg_lm <- function(x, binwidth=NULL, ...) { + + ## set default binwitdh + if(is.null(binwidth)) { + bw_plot <- rdd_bw_cct_plot(x) + # binwidth <- bw_plot$results["Bin Length",, drop=TRUE] old version + binwidth <- bw_plot$h[1] + } + + ## data + dat <- getOriginalData(x) + subw <- if (!is.null(x$weights)) + x$weights > 0 else rep(TRUE, nrow(dat)) + pred <- data.frame(x = dat$x, y = fitted(x))[subw, ] + + ## plot + plotBin(dat$x, dat$y, h=binwidth, cutpoint=getCutpoint(x), ...) + lines(pred[order(pred$x), ]) +} diff --git a/R/reg_np.R b/R/reg_np.R new file mode 100644 index 0000000..e590201 --- /dev/null +++ b/R/reg_np.R @@ -0,0 +1,226 @@ +#' Parametric polynomial estimator of the regression discontinuity +#' +#' Compute a parametric polynomial regression of the ATE, +#' possibly on the range specified by bandwidth +#' @param rdd_object Object of class rdd_data created by \code{\link{rdd_data}} +#' @param covariates TODO +#' @param bw A bandwidth to specify the subset on which the parametric regression is estimated +#' @param inference Type of inference to conduct: non-parametric one (\code{np}) or standard (\code{lm}). See details. +#' @param slope Whether slopes should be different on left or right (separate), or the same. +#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). +#' @return An object of class rdd_reg_np and class lm, with specific print and plot methods +#' @seealso \code{\link{rdd_bw_ik}} Bandwidth selection using the plug-in bandwidth of Imbens and Kalyanaraman (2012) +#' @references TODO +#' @export rdd_reg_np +#' @examples +#' ## Step 0: prepare data +#' data(house) +#' house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +#' ## Step 2: regression +#' # Simple polynomial of order 1: +#' reg_nonpara <- rdd_reg_np(rdd_object=house_rdd) +#' print(reg_nonpara) +#' plot(reg_nonpara) + + +rdd_reg_np <- function(rdd_object, covariates = NULL, bw = rdd_bw_ik(rdd_object), slope = c("separate", "same"), inference = c("np", + "lm"), covar.opt = list(slope = c("same", "separate"), bw = NULL)) { + + slope <- match.arg(slope) + inference <- match.arg(inference) + checkIsRDD(rdd_object) + cutpoint <- getCutpoint(rdd_object) + + if (!is.null(covariates)) + warning("covariates not fully implemented for non-para reg") + + ## Construct data + if ("strategy" %in% names(covar.opt)) + warning("Arg 'strategy' should not be used for ") + covar.opt$strategy <- "include" + dat <- as.data.frame(rdd_object) + dat_step1 <- model.matrix(rdd_object, covariates = covariates, order = 1, bw = bw, slope = slope, covar.opt = covar.opt) + + + ### Weights + kernel_w <- Kernel_tri(dat_step1[, "x"], center = 0, bw = bw) + + ## Regression + reg <- lm(y ~ ., data = dat_step1, weights = kernel_w) + coefD <- coef(reg)["D"] + + ## Non-para inference: + if (inference == "np") { + var <- var_estim(x = dat$x, y = dat$y, point = cutpoint, bw = bw, eachSide = TRUE) + dens <- dens_estim(x = dat$x, point = cutpoint, bw = bw, eachSide = TRUE) + + const <- 4.8/(nrow(dat) * bw) + all <- const * sum(var)/dens + se <- sqrt(all) + tval <- coefD/se + pval <- 2 * pnorm(abs(tval), lower.tail = FALSE) + coefmat <- matrix(c(coefD, se, tval, pval), nrow = 1, dimnames = list("D", c("Estimate", "Std. Error", "z value", "Pr(>|z|)"))) + } else { + coefmat <- coef(summary(reg)) #['D', , drop=FALSE] + } + + ## Return + res <- list() + RDDslot <- list() + RDDslot$rdd_data <- rdd_object + RDDslot$model <- reg + res$coefficients <- coef(reg)["D"] + res$coefMat <- coefmat + res$residuals <- residuals(reg) + res$fitted <- fitted(reg) + res$RDDslot <- RDDslot + + class(res) <- c("rdd_reg_np", "rdd_reg", "lm") + attr(res, "RDDcall") <- match.call() + attr(res, "cutpoint") <- cutpoint + attr(res, "bw") <- bw + res +} + + +#' @export +print.rdd_reg_np <- function(x, signif.stars = getOption("show.signif.stars"), ...) { + + RDDcall <- attr(x, "RDDcall") + bw <- getBW(x) + cutpoint <- getCutpoint(x) + x_var <- getOriginalX(x) + + n_left <- sum(x_var >= cutpoint - bw & x_var < cutpoint) + n_right <- sum(x_var >= cutpoint & x_var <= cutpoint + bw) + + cat("### RDD regression: nonparametric local linear###\n") + cat("\tBandwidth: ", bw, "\n") + cat("\tNumber of obs: ", sum(n_left + n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep = "") + + cat("\n\tCoefficient:\n") + + printCoefmat(rdd_coef(x, allInfo = TRUE), signif.stars = signif.stars) + +} + + +#' @export +summary.rdd_reg_np <- function(object, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), + ...) { + + x <- object + bw <- getBW(x) + cutpoint <- getCutpoint(x) + x_var <- getOriginalX(x) + + ## compute numbers left/right: + n_left <- sum(x_var >= cutpoint - bw & x_var < cutpoint) + n_right <- sum(x_var >= cutpoint & x_var <= cutpoint + bw) + + ## compute residual summary: + res_quant <- quantile(residuals(x)) + names(res_quant) <- c("Min", "1Q", "Median", "3Q", "Max") + + ## compute R^2 + r.squared <- summary(x$RDDslot$model)$r.squared + + ## Extend the rdd_reg_no output with new computaations: + + object$r.squared <- r.squared + object$res_quant <- res_quant + object$n_obs <- list(n_left = n_left, n_right = n_right, total = n_left + n_right) + + class(object) <- c("summary.rdd_reg_np", class(object)) + object +} + + +#' @export +print.summary.rdd_reg_np <- function(x, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), + ...) { + + bw <- getBW(x) + + cat("### RDD regression: nonparametric local linear###\n") + cat("\tBandwidth: ", bw, "\n") + cat("\tNumber of obs: ", x$n_obs$total, " (left: ", x$n_obs$n_left, ", right: ", x$n_obs$n_right, ")\n", sep = "") + + cat("\n\tWeighted Residuals:\n") + print(zapsmall(x$res_quant, digits + 1)) + + + cat("\n\tCoefficient:\n") + + printCoefmat(rdd_coef(x, allInfo = TRUE), signif.stars = signif.stars) + + cat("\n\tLocal R squared:", formatC(x$r.squared, digits = digits), "\n") + +} + + +#' @export +plot.rdd_reg_np <- function(x, binwidth=NULL, chart = c("locpoly", "np"), ...) { + + chart <- match.arg(chart) + cutpoint <- getCutpoint(x) + bw <- getBW(x) + + ## set default binwitdh + if(is.null(binwidth)) { + bw_plot <- rdd_bw_cct_plot(x) + # binwidth <- bw_plot$results["Bin Length",, drop=TRUE] old version + binwidth <- bw_plot$h[1] + } + + ## data + dat <- getOriginalData(x, classRDD = FALSE) + + ## Use locpoly: + dat_left <- subset(dat, x < cutpoint) + dat_right <- subset(dat, x >= cutpoint) + + if (chart == "locpoly") { + llp_left <- locpoly(x = dat_left$x, y = dat_left$y, bandwidth = bw) + llp_right <- locpoly(x = dat_right$x, y = dat_right$y, bandwidth = bw) + + ## Use np: + } else { + np_reg_left <- np::npreg(np::npregbw(y ~ x, data = dat_left, regtype = "ll", ckertype = "epanechnikov", bandwidth.compute = FALSE, + bws = bw)) + + np_reg_right <- np::npreg(np::npregbw(y ~ x, data = dat_right, regtype = "ll", ckertype = "epanechnikov", bandwidth.compute = FALSE, + bws = bw)) + newDat_left <- data.frame(x = seq(min(dat_left$x), cutpoint - 0.001, by = 0.01)) + newDat_right <- data.frame(x = seq(cutpoint, max(dat_right$x), by = 0.01)) + pred_left <- predict(np_reg_left, newdata = newDat_left, se.fit = TRUE) + pred_right <- predict(np_reg_right, newdata = newDat_right, se.fit = TRUE) + } + ## plot + plotBin(dat$x, dat$y, h = binwidth, cutpoint=cutpoint, ...) + if (chart == "locpoly") { + lines(llp_left$x, llp_left$y) + lines(llp_right$x, llp_right$y) + } else { + lines(newDat_left$x, pred_left$fit, col = 1) + lines(newDat_left$x, pred_left$fit + 2 * pred_left$se.fit, col = 2, lty = 2) + lines(newDat_left$x, pred_left$fit - 2 * pred_left$se.fit, col = 2, lty = 2) + + lines(newDat_right$x, pred_right$fit, col = 1) + lines(newDat_right$x, pred_right$fit + 2 * pred_right$se.fit, col = 2, lty = 2) + lines(newDat_right$x, pred_right$fit - 2 * pred_right$se.fit, col = 2, lty = 2) + } +} + +#' @export +vcov.rdd_reg_np <- function(object, ...) { + + infType <- infType(object) + if (infType == "np") { + warning("No vcov() available when rdd_reg_np() was called with infType='np'") + res <- NULL + } else { + res <- vcov(object$RDDslot$model) + } + res +} diff --git a/R/var_estim.R b/R/var_estim.R new file mode 100644 index 0000000..a2ee059 --- /dev/null +++ b/R/var_estim.R @@ -0,0 +1,177 @@ + + + +dens_estim <- function(x, point, bw, eachSide = TRUE) { + + N <- length(x) + + if (missing(bw)) + bw <- 1.84 * sd(x) * N^(-1/5) + + if (eachSide) { + isIn_bw_left <- x >= (point - bw) & x < point + isIn_bw_right <- x >= point & x <= (point + bw) + + NisIn_bw_left <- sum(isIn_bw_left, na.rm = TRUE) + NisIn_bw_right <- sum(isIn_bw_right, na.rm = TRUE) + + res <- (NisIn_bw_left + NisIn_bw_right)/(2 * N * bw) + } else { + isIn_bw_both <- x >= (point - bw) & x <= (point + bw) + NisIn_bw_both <- sum(isIn_bw_both, na.rm = TRUE) + res <- NisIn_bw_both/(2 * N * bw) + } + res +} + +dens_estim2 <- function(x, point, bw, kernel = "gaussian", ...) { + + + if (missing(bw)) + bw <- "SJ" + + d <- density(x, from = point, to = point, n = 1, na.rm = TRUE, kernel = kernel, bw = bw, ...) + d$y +} + + +var_estim <- function(x, y, point, bw, eachSide = TRUE) { + + + N <- length(x) + if (missing(bw)) + bw <- 1.84 * sd(x) * N^(-1/5) + + if (eachSide) { + isIn_bw_left <- x >= (point - bw) & x < point + isIn_bw_right <- x >= point & x <= (point + bw) + var_inh_left <- var(y[isIn_bw_left], na.rm = TRUE) + var_inh_right <- var(y[isIn_bw_right], na.rm = TRUE) + res <- c(var_inh_left, var_inh_right) + } else { + isIn_bw <- x >= (point - bw) & x <= point + bw + var_inh <- var(y[isIn_bw], na.rm = TRUE) + res <- var_inh + } + res +} + + +#' @importFrom locpol locpol +#' @importFrom locpol gaussK + +### Add locpol kernel for uniform: +uniK <- function(x) ifelse(abs(x) <= 1, 1/2, 0) +attr(uniK, "RK") <- 1/2 ## Rk: kernel(u)^2 +attr(uniK, "mu0K") <- 1 +attr(uniK, "mu2K") <- 1/3 ## second orde rmoment of K +attr(uniK, "K4") <- NA ## see with author! +attr(uniK, "RdK") <- NA ## see with author! +attr(uniK, "dom") <- c(-1, 1) ## + +var_estim2 <- function(x, y, point, bw, estim = c("var", "NW", "NW_loc", "LL_kern", "LL_loc", "var_loc"), sides = c("both", "uni"), + kernel = c("Normal", "Uniform"), dfadj = TRUE) { + + sides <- match.arg(sides) + estim <- match.arg(estim) + kernel <- match.arg(kernel) + N <- length(x) + if (missing(bw)) + bw <- 1.84 * sd(x) * N^(-1/5) + + if (sides == "uni") { + isIn_bw_left <- x >= (point - bw) & x < point + isIn_bw_right <- x >= point & x <= (point + bw) + var_inh_left <- var(y[isIn_bw_left], na.rm = TRUE) + var_inh_right <- var(y[isIn_bw_right], na.rm = TRUE) + res <- c(var_inh_left, var_inh_right) + } else { + if (estim == "NW") { + ker <- switch(kernel, Uniform = "box", Normal = "normal") + m <- ksmooth(x = x, y = y, bandwidth = bw * 2, x.points = point, kernel = ker)$y + s <- ksmooth(x = x, y = y^2, bandwidth = bw * 2, x.points = point, kernel = ker)$y + } else if (estim == "NW_loc") { + ker <- switch(kernel, Uniform = uniK, Normal = gaussK) + df_xy <- data.frame(y = y, x = x, y2 = y^2) + # a <<- locCteSmootherC(x=x, y=y, xeval=point, bw=bw, kernel=uniK) aa <<- locCteSmootherC(x=x, y=y, xeval=point, bw=bw, + # kernel=gaussK) + m <- locpol(y ~ x, data = df_xy, bw = bw, xeval = point, deg = 0, kernel = ker) + s <- locpol(y2 ~ x, data = df_xy, bw = bw, xeval = point, deg = 0, kernel = ker) + m <- m$lpFit["y"] + s <- s$lpFit["y2"] + } else if (estim == "LL_kern") { + if (kernel != "Normal") + warning("Kernel set to Normal for locpoly") + m <- locpoly(x = x, y = y, bandwidth = bw, gridsize = 200) + s <- locpoly(x = x, y = y^2, bandwidth = bw, gridsize = 200) + m <- m$y[which.min(abs(m$x - point))] + s <- s$y[which.min(abs(s$x - point))] + } else if (estim == "LL_loc") { + ker <- switch(kernel, Uniform = uniK, Normal = gaussK) + df_xy <- data.frame(y = y, x = x, y2 = y^2) + m <- locpol(y ~ x, data = df_xy, bw = bw, xeval = point, kernel = ker) + s <- locpol(y2 ~ x, data = df_xy, bw = bw, xeval = point, kernel = ker) + m <- m$lpFit["y"] + s <- s$lpFit["y2"] + } else { + s <- m <- 1 + } + sh <- s - m^2 + res <- sh + if (estim == "var_loc") { + ker <- switch(kernel, Uniform = uniK, Normal = gaussK) + df_xy <- data.frame(y = y, x = x, y2 = y^2) + m <- locpol(y ~ x, data = df_xy, bw = bw, xeval = point, kernel = ker) + res <- m$lpFit$var + } else if (estim == "var") { + isIn_bw <- x >= (point - bw) & x <= (point + bw) + var <- var(y[isIn_bw], na.rm = TRUE) + res <- if (dfadj) + var * (sum(isIn_bw) - 1)/sum(isIn_bw) else var + } + + } + names(res) <- NULL + as.numeric(res) +} + + +## Formula: \sqrt[ (C_2 * \sigma(x)^2 / f(x)) / ( n * h) ] Imbens & Kalyan: C_2/N*h (sigma_l^2 + \sigma_r^2)/f(x) value of +## constant: 4.8 (using boundary kernel: Triangular (value of constant: 33.6 (using boundary kernel: Triangular +## library(locpol) computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=1), lower=0, upper=Inf) or: +## computeRK(equivKernel(TrianK, nu=0, deg=1, lower=-1, upper=1), lower=-Inf, upper=Inf) + +all_var_low <- function(x, y, point, bw, eachSide = TRUE, return = c("se", "all")) { + + return <- match.arg(return) + + N <- length(x) + if (missing(bw)) + bw <- 1.84 * sd(x) * N^(-1/5) + + var <- var_estim(x = x, y = y, point = point, bw = bw, eachSide = eachSide) + dens <- dens_estim(x = x, point = point, bw = bw, eachSide = eachSide) + + C2 <- if (eachSide) + 4.8 else 2/3 + const <- C2/(N * bw) + all <- const * sum(var)/dens + res <- sqrt(all) + names(res) <- "se" + if (return == "all") + res <- c(res, cons = const, dens = dens, var = sum(var)) + res + +} + + +all_var <- function(...) all_var_low(...) + +all_var.rdd_reg.np <- function(x) { + + bw <- getBW(x) + dat <- getOriginalData(x) + cutpoint <- getCutpoint(x) + res <- all_var_low(dat$x, dat$y, point = cutpoint, bw = bw, eachSide = TRUE, return = "se") + res +} diff --git a/R/various_code.R b/R/various_code.R new file mode 100644 index 0000000..b33b561 --- /dev/null +++ b/R/various_code.R @@ -0,0 +1,20 @@ +### MISC +is_even <- function(a) { + a%%2 == 0 +} + + +Kernel_tri <- function(X, center, bw) { + ifelse(abs(X - center) > bw, 0, 1 - (abs(X - center)/bw)) +} + +Kernel_uni <- function(X, center, bw) { + ifelse(abs(X - center) > bw, 0, 1) +} + +.onAttach <- function(...) { + packageStartupMessage(" +Please consider citing R and rddtools, +citation() +citation('rddtools') +")} diff --git a/R/waldci.R b/R/waldci.R new file mode 100644 index 0000000..33679e0 --- /dev/null +++ b/R/waldci.R @@ -0,0 +1,145 @@ +#' Confint allowing vcov +#' +#' Version of vcov allowing for confint +#' @param x Object of class lm or else +#' @param parm specification of which parameters are to be given confidence intervals, see confint +#' @param level the confidence level required, see confint() +#' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich +#' @param df Degrees of freedom +#' @param \ldots Further arguments + + +waldci <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) { + UseMethod("waldci") +} + +waldci.default <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) { + ## use S4 methods if loaded + coef0 <- if ("stats4" %in% loadedNamespaces()) + stats4::coef else coef + vcov0 <- if ("stats4" %in% loadedNamespaces()) + stats4::vcov else vcov + + ## extract coefficients and standard errors + est <- coef0(x) + if (is.null(vcov.)) + se <- vcov0(x) else { + if (is.function(vcov.)) + se <- vcov.(x) else se <- vcov. + } + se <- sqrt(diag(se)) + + ## match using names and compute t/z statistics + if (!is.null(names(est)) && !is.null(names(se))) { + anames <- names(est)[names(est) %in% names(se)] + est <- est[anames] + se <- se[anames] + } + + ## process level + a <- (1 - level)/2 + a <- c(a, 1 - a) + + ## get quantile from central limit theorem + if (is.null(df)) { + df <- try(df.residual(x), silent = TRUE) + if (inherits(df, "try-error")) + df <- NULL + } + if (is.null(df)) + df <- 0 + fac <- if (is.finite(df) && df > 0) + qt(a, df = df) else qnorm(a) + + ## set up confidence intervals + ci <- cbind(est + fac[1] * se, est + fac[2] * se) + colnames(ci) <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3L), "%") + + ## process parm + if (is.null(parm)) + parm <- seq_along(est) + # if(is.character(parm)) parm <- which(parm %in% names(est)) + if (is.character(parm)) + parm <- which(names(est) %in% parm) + ci <- ci[parm, , drop = FALSE] + return(ci) +} + + +## copy of stats:::format.perc +format.perc <- function(probs, digits) paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%") + +waldci.rdd_reg_np <- function(x, level = 0.95, vcov. = NULL, df = Inf, ...) { + + inf_met <- infType(x) ## def in Misc.R + if (inf_met == "se") { + if (!is.null(vcov.) | !is.infinite(df)) { + warning("Arg 'vcov.' and 'df' only work for rdd_reg with inf='lm'") + } + ## code recycled from stats::confint.default + co <- rdd_coef(x, allInfo = TRUE) + a <- (1 - level)/2 + a <- c(a, 1 - a) + fac <- qnorm(a) + pct <- format.perc(a, 3) ## import!! + ci <- array(NA, dim = c(1, 2L), dimnames = list("D", pct)) + ci[] <- co[, "Estimate"] + co[, "Std. Error"] %o% fac + return(ci) + } else { + waldci.default(x$RDDslot$model, parm = "D", level = level, vcov. = vcov., df = df, ...) + } +} + + + + +waldci.glm <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) waldci.default(x, parm = parm, level = level, + vcov. = vcov., df = df, ...) + +waldci.mlm <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) { + ## obtain vcov + v <- if (is.null(vcov.)) + vcov(x) else if (is.function(vcov.)) + vcov.(x) else vcov. + + ## nasty hack: replace coefficients so that their names match the vcov() method + x$coefficients <- structure(as.vector(x$coefficients), .Names = colnames(vcov(x))) + + ## call default method + waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) +} + +waldci.survreg <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) { + if (is.null(vcov.)) + v <- vcov(x) else { + if (is.function(vcov.)) + v <- vcov.(x) else v <- vcov. + } + if (length(x$coefficients) < NROW(x$var)) { + x$coefficients <- c(x$coefficients, `Log(scale)` = log(x$scale)) + } + waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) +} + + +if (FALSE) { + + library(sandwich) + library(lmtest) + + reg <- lm(freeny) + + ### Regular + all(confint(reg) == waldci(reg)) + confint(reg) + co_reg <- coeftest(reg) + co_reg[, 1] + qnorm(0.975) * co_reg[, 2] + co_reg[, 1] + qt(0.975, df = reg[["df.residual"]]) * co_reg[, 2] + + ## vcovHC + waldci(reg, vcov. = vcovHC) + co <- coeftest(reg, vcov. = vcovHC) + co[, 1] + qnorm(0.975) * co[, 2] + co[, 1] + qt(0.975, df = reg[["df.residual"]]) * co[, 2] + +} diff --git a/RDDtools/.Rbuildignore b/RDDtools/.Rbuildignore deleted file mode 100644 index b8752e7..0000000 --- a/RDDtools/.Rbuildignore +++ /dev/null @@ -1 +0,0 @@ -misc diff --git a/RDDtools/DESCRIPTION b/RDDtools/DESCRIPTION deleted file mode 100644 index bb9c9d7..0000000 --- a/RDDtools/DESCRIPTION +++ /dev/null @@ -1,26 +0,0 @@ -Package: RDDtools -Type: Package -Title: A toolbox for regression discontinuity design (RDD) -Version: 0.22 -Date: 21/05/2014 -Authors@R: person("Matthieu", "Stigler", role = c("aut","cre"), - email="Matthieu.Stigler@iheid.ch") -Maintainer: Matthieu Stigler -Imports: - KernSmooth, - ggplot2, - rdd, - np, - sandwich, - lmtest, - Formula, - locpol, - methods, -Depends: - AER -Suggests: - stats4, - car -Description: Provides a set of functions for RDD, from data visualisation, - estimation and testing. -License: GPL (>= 2) diff --git a/RDDtools/NAMESPACE b/RDDtools/NAMESPACE deleted file mode 100644 index 6c8a163..0000000 --- a/RDDtools/NAMESPACE +++ /dev/null @@ -1,67 +0,0 @@ -S3method("[",RDDdata) -S3method(RDDcoef,RDDreg_np) -S3method(RDDcoef,RDDreg_npreg) -S3method(RDDcoef,default) -S3method(as.data.frame,RDDdata) -S3method(as.lm,RDDreg) -S3method(as.lm,RDDreg_np) -S3method(bread,RDDreg_np) -S3method(covarTest_dis,RDDdata) -S3method(covarTest_dis,RDDreg) -S3method(covarTest_mean,RDDdata) -S3method(covarTest_mean,RDDreg) -S3method(estfun,RDDreg_np) -S3method(getCall,RDDreg) -S3method(model.frame,RDDreg_np) -S3method(model.matrix,RDDdata) -S3method(plot,RDDdata) -S3method(plot,RDDreg_lm) -S3method(plot,RDDreg_np) -S3method(plotPlacebo,PlaceboVals) -S3method(plotPlacebo,RDDreg) -S3method(plotPlaceboDens,PlaceboVals) -S3method(plotPlaceboDens,RDDreg) -S3method(plotSensi,RDDreg_lm) -S3method(plotSensi,RDDreg_np) -S3method(print,RDDreg_lm) -S3method(print,RDDreg_np) -S3method(print,summary.RDDreg_np) -S3method(subset,RDDdata) -S3method(summary,RDDdata) -S3method(summary,RDDreg_np) -S3method(vcov,RDDreg_np) -export(RDDbw_IK) -export(RDDbw_RSW) -export(RDDcoef) -export(RDDdata) -export(RDDgenreg) -export(RDDpred) -export(RDDreg_lm) -export(RDDreg_np) -export(ROT_bw) -export(as.lm) -export(as.npreg) -export(as.npregbw) -export(clusterInf) -export(computePlacebo) -export(covarTest_dis) -export(covarTest_mean) -export(dens_test) -export(gen_MC_IK) -export(plotPlacebo) -export(plotPlaceboDens) -export(plotSensi) -export(vcovCluster) -export(vcovCluster2) -import(Formula) -import(KernSmooth) -import(ggplot2) -import(lmtest) -import(methods) -import(np) -import(rdd) -import(sandwich) -importFrom(AER,ivreg) -importFrom(locpol,gaussK) -importFrom(locpol,locpol) -importFrom(stats,getCall) diff --git a/RDDtools/NEWS b/RDDtools/NEWS deleted file mode 100644 index d03d969..0000000 --- a/RDDtools/NEWS +++ /dev/null @@ -1,46 +0,0 @@ - -RDDtools 0.22 -=========== -Updated on 21/5/14 - -* RDDdata: change arg z to covar, add new argument z for sharp, currently unused. - -* dens_test: work now on RDDreg, return object htest - -* Multiple changes in help files - -* Correct import, suggests, calls to ::: - -RDDtools 0.21 -=========== -Updated on 25/7/13 - -* Add new function RDDpred - -* Add new model.matrix.RDDdata, preparing all output, now used by all RDDreg_np, RDDreg_lm, RDDgenre... - -* Add method vcov.RDDreg, as.lm.RDDreg - -* Add enw function vcovCluster2, complement doc, add M Arai, - -* Add data STAR_MHE - -* Many small fixes - -RDDtools 0.2 -=========== -Updated on 16/7/13 - -* Add new option to have separate or same covariates - -* Add as.nprg, to convert to a np regression from package np - -* Add RDDcoef, working on multiple models (lm, np, npreg). - -* Many fixes... - -RDDtools 0.1 -=========== -Initial commit on 29/04/2013 - -* Initial commit, containing RDDdata, RDDreg_lm, RDDreg_np, plotSensi, plotPlacebo, etc... diff --git a/RDDtools/R/Lee2008-data.R b/RDDtools/R/Lee2008-data.R deleted file mode 100644 index 173727b..0000000 --- a/RDDtools/R/Lee2008-data.R +++ /dev/null @@ -1,27 +0,0 @@ -#' @name Lee2008 -#' @title Dataset used in Lee (2008) -#' @description U.S. House elections data -#' @docType data -#' @usage Lee2008 -#' @description Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008), -#' @format A data frame with 6558 observations and two variables: -#' \describe{ -#' \item{x}{Vote at election t-1} -#' \item{y}{Vote at election t} -#' } -#' @source Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} -#' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," -#' Review of Economic Studies (2012) 79, 933-959 -#' @references Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, -#' \emph{Journal of Econometrics}, 142, 675-697 -#' @examples -#' data(Lee2008) -#' RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) -#' summary(RDDlee) -#' plot(RDDlee) - - -NULL -# Lee2008 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) -# colnames(Lee2008) <- c("x", "y") -# save(Lee2008, file="/home/mat/Dropbox/HEI/rdd/Rcode/RDDtools/data/Lee2008.rda") \ No newline at end of file diff --git a/RDDtools/R/RDDcoef.R b/RDDtools/R/RDDcoef.R deleted file mode 100644 index a275bb7..0000000 --- a/RDDtools/R/RDDcoef.R +++ /dev/null @@ -1,33 +0,0 @@ -#' RDD coefficient -#' -#' Function to access the RDD coefficient in the various regressions -#' @param object A RDD regression object -#' @param allInfo whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval. -#' @param allCo Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients -#' @param \ldots Further arguments passed to/from specific methods -#' @return Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, -#' its standard value, t test and p-value and -#' @export -RDDcoef <- function(object, allInfo=FALSE, allCo=FALSE, ...) - UseMethod("RDDcoef") - -#' @rdname RDDcoef -#' @method RDDcoef default -#' @S3method RDDcoef default -RDDcoef.default <- function(object, allInfo=FALSE, allCo=FALSE, ...){ - res <- coef(summary(object)) - if(!allCo) res <- res["D",, drop=FALSE] - if(!allInfo) res <- res[,"Estimate"] - res -} - -#' @rdname RDDcoef -#' @method RDDcoef RDDreg_np -#' @S3method RDDcoef RDDreg_np -RDDcoef.RDDreg_np <- function(object, allInfo=FALSE, allCo=FALSE, ...){ - res<- object$coefMat - if(!allCo) res <- res["D",, drop=FALSE] - if(!allInfo) res <- res[,"Estimate"] - res -} - diff --git a/RDDtools/R/RDDdata.R b/RDDtools/R/RDDdata.R deleted file mode 100644 index b1c2723..0000000 --- a/RDDtools/R/RDDdata.R +++ /dev/null @@ -1,228 +0,0 @@ -#'Construct RDDdata -#' -#' Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. -#' -#' @param x Forcing variable -#' @param y Output -#' @param covar Exogeneous variables -#' @param cutpoint Cutpoint -#' @param labels Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently. -#' @param data A data-frame for the \code{x} and \code{y} variables. If this is provided, -#' the column names can be entered directly for argument \code{x} and \code{y} -#' @param z Assignment variable for the fuzzy case. -#' @return Object of class \code{RDDdata}, inheriting from \code{data.frame} -#' @export -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @examples -#' data(Lee2008) -#' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -#' rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) -#' -#' # The print() function is the same as the print.data.frame: -#' rd -#' -#' # The summary() and plot() function are specific to RDDdata -#' summary(rd) -#' plot(rd) - - -RDDdata <- function(y, x, covar, cutpoint, z, labels, data){ - - -## check args - type <- ifelse(missing(z), "Sharp", "Fuzzy") - hasCovar <- !missing(covar) - if(missing(cutpoint)) stop("Please provide cutpoint") - covar_nam <- deparse(substitute(covar)) - -## Use data in case: - if(!missing(data)){ - pf <- parent.frame() - x <- eval(substitute(x), data, enclos = pf) # copy from with.default - y <- eval(substitute(y), data, enclos = pf) # copy from with.default - if(hasCovar) covar <- eval(substitute(covar), data, enclos = pf) # idem - } - -### Check y, x univariate - k_y <- NCOL(y) - k_x <- NCOL(x) - - if(any(!c(k_y, k_x)==1)) stop("y or x should be univariate") - -### Check y, x, z same size - n_y <- NROW(y) - n_x <- NROW(x) - n_covar <- if(hasCovar) NROW(x) else NULL - - if(any(c(n_y, n_x) != n_covar)) stop("y or x should be univariate") - -### Check cutpoint - range_x <- range(x, na.rm=TRUE) - if(cutpointrange_x[2]) stop("Cutpoint outside range of x") - -## Check labels - if(!missing(labels)){ - if(!is.list(labels)) stop("labels should be a list.") - if(is.null(names(labels)) || !all(names(labels)%in%c("x", "y", "covar"))) stop("labels should be a list with components x, and/or y, and/or covar") - if(hasCovar){ - if("covar"%in%names(labels) && length(labels$covar)!=NCOL(covar)) stop("There should be ", NCOL(covar), " values (dim of covar) for component 'covar' in labels") - } - } else { - labels <- list() - } - -# if(is.null(labels$x)) labels$x <- deparse(substitute(x)) -# if(is.null(labels$y)) labels$y <- deparse(substitute(y)) -# if(hasCova && is.null(labels$covar)) labels$covar <- if(NCOL(covar)==1) names(deparse(substitute(y)) - -## Assemble data - RDDdat <- data.frame(x=x, y=y) - if(hasCovar) { - RDDdat <- cbind(RDDdat,covar) - if(NCOL(covar)==1 && is.null(colnames(covar))) colnames(RDDdat)[3] <- covar_nam - } - - if(type=="Fuzzy"){ - RDDdat <- cbind(RDDdat,z) - } - -## return - class(RDDdat) <- c("RDDdata", "data.frame") - attr(RDDdat, "hasCovar") <- hasCovar - attr(RDDdat, "labels") <- labels - attr(RDDdat, "cutpoint") <- cutpoint - attr(RDDdat, "type") <- type - - RDDdat -} - - -### Specific subsetting methods - -##### @S3method as.data.frame RDDdata -# as.data.frame.RDDdata <- function(x) { -# subset(x, y> -# }as.data.frame.default(x) - -#' @S3method "[" RDDdata -'[.RDDdata' <- function(x,i,...){ - attr_x <- attributes(x) - r <- NextMethod("[", object=as.data.frame(x)) - -## keep attributes only if remains a data frame! - if(inherits(r, "data.frame")){ - attr_x$row.names <- attr(r, "row.names") - attr_x$names <- attr(r, "names") - mostattributes(r) <- attr_x - attributes(r) <- attributes(r)[match(names(attr_x), names(attributes(r)))] - } -# newCla <- class(r) -# if(any(grepl("RDDdata", newCla))) newCla <- newCla[-grepl("RDDdata", newCla)] -# print(names(attributes(newCla))) -# -# if(!inherits(newCla, "data.frame")) attr(r, "class")[which(attr(r, "class")=="data.frame")] <- newCla - r -} - -#' @S3method subset RDDdata -subset.RDDdata <- function (x, subset, select, drop = FALSE, ...) { - attr_x <- attributes(x) - -### subset code: start - if (missing(subset)) - r <- TRUE - else { - e <- substitute(subset) - r <- eval(e, x, parent.frame()) - if (!is.logical(r)) - stop("'subset' must evaluate to logical") - r <- r & !is.na(r) - } - if (missing(select)) - vars <- TRUE - else { - nl <- as.list(seq_along(x)) - names(nl) <- names(x) - vars <- eval(substitute(select), nl, parent.frame()) - } - res <- x[r, vars, drop = drop] -### subset code: end -# r <- subset.data.frame(x,...) -# r <- NextMethod("subset") - -## keep attributes only if remains a data frame! - if(inherits(r, "data.frame")){ - attr_x$row.names <- attr(res, "row.names") - attr_x$names <- attr(res, "names") - mostattributes(res) <- attr_x - attributes(res) <- attributes(res)[match(names(attr_x), names(attributes(res)))] - } - res -} - -#' @S3method as.data.frame RDDdata -as.data.frame.RDDdata <- function(x,...){ - class(x) <- "data.frame" - attr(x, "hasCovar") <- NULL - attr(x, "labels") <- NULL - attr(x, "cutpoint") <- NULL - x -} - - -if(FALSE){ - -library(RDDtools) -data(Lee2008) - -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -Lee2008_rdd2 <- RDDdata(y=y, x=x,data=Lee2008, cutpoint=0) - -all.equal(Lee2008_rdd, Lee2008_rdd2) - -### wrong covariate setting, legitimate warnings: -Lee2008_rdd_lab1 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=c("a","bb")) -Lee2008_rdd_lab2 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list("a","bb")) -Lee2008_rdd_lab3 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list(x="a",u="bb")) - -### Covariate setting: -Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE))) - -Lee2008_rdd_Z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=c("a","bb")) - -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha")) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", u="aa")) - -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", covar="aa")) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", z=c("aa", "hj"))) - -### subsetting -dat <- Lee2008_rdd -dat_sub <- subset(Lee2008_rdd, x<1000) -dat_ind <- Lee2008_rdd[1:nrow(Lee2008_rdd),] -dat_ind_1 <- Lee2008_rdd[,1] -dat_ind_2 <- Lee2008_rdd[1:5,] - - -all.equal(dat, dat_sub) -all.equal(attributes(dat), attributes(dat_sub)) - -all.equal(dat, dat_ind) -all.equal(attributes(dat), attributes(dat_ind)) - -df<- as.data.frame(Lee2008_rdd) -head(df) - - -head(Lee2008_rdd_Z) -colnames(Lee2008_rdd_Z[, -c(1,2)]) -attributes(Lee2008_rdd_Z[, -c(1,2)]) - -colnames(subset(Lee2008_rdd_Z,select= c("z1","z2"))) - -colnames(dat_sub) -colnames(dat_ind) -colnames(dat_ind_1) -colnames(dat_ind_2) -} \ No newline at end of file diff --git a/RDDtools/R/RDDdata_methods.R b/RDDtools/R/RDDdata_methods.R deleted file mode 100644 index d82fb05..0000000 --- a/RDDtools/R/RDDdata_methods.R +++ /dev/null @@ -1,176 +0,0 @@ - - -### SUMMARY method -#' @S3method summary RDDdata -summary.RDDdata <- function(object, ...){ - - cutpoint <- getCutpoint(object) - hasCovar_eng <- ifelse(hasCovar(object), "yes", "no") - cat("### RDDdata object ###\n") - cat("\nCutpoint:", cutpoint, "\n") - cat("Sample size:", - "\n\t-Full :", nrow(object), - "\n\t-Left :", sum(object$x=cutpoint)) - cat("\nCovariates:", hasCovar_eng, "\n") -} - -#' Plot RDDdata -#' -#' Binned plot of the forcing and outcome variable -#' -#' @param x Object of class RDDdata -#' @param h The binwidth parameter (note this differs from the bandwidth parameter!) -#' @param nbins Alternative to h, the total number of bins in the plot. -#' @param xlim The range of the x data -#' @param cex Size of the points, see \code{\link{par}} -#' @param nplot Number of plot to draw -#' @param device Type of device used. Currently not used. -#' @param \ldots Further arguments passed to the \code{\link{plot}} function. -#' @return A plot -#' @details Produces a simple binned plot averaging values within each interval. The length of the intervals -#' is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth -#' argument, that gives half of the length of the kernel window. -#' When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}. -#' -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @examples -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' plot(Lee2008_rdd) -#' -#' ## Specify manually the bandwidth: -#' plot(Lee2008_rdd, h=0.2) -#' -#' ## Show three plots with different bandwidth: -#' plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) -#' -#' ## Specify instead of the bandwidth, the final number of bins: -#' plot(Lee2008_rdd, nbins=22) -#' -#' ## If the specified number of bins is odd, the larger number is given to side with largest range -#' plot(Lee2008_rdd, nbins=21) -#' @method plot RDDdata -#' @S3method plot RDDdata - - -### PLOT method -plot.RDDdata <- function(x, h, nbins=NULL, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=1, device=c("base", "ggplot"),...){ - - object <- x - cutpoint <- getCutpoint(object) - device <- match.arg(device) - -## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) - if(missing(h) & is.null(nbins)) { - if(!all(xlim==range(object$x, na.rm=TRUE))){ - object <- subset(object, x> min(xlim) & x< max(xlim)) - } - h <- RDDbw_RSW(object) - if(is.even(nplot)) { - se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) - } else { - se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) - } - hs <- if(nplot==1) h else se *h - } else if(!missing(h) & is.null(nbins)){ - if(length(h)==1){ - if(is.even(nplot)) { - se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) - } else { - se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) - } - hs <- if(nplot==1) h else se *h - } else { - if(length(h==nplot)){ - hs <- h - } else { - stop("Length of h should be either one or equal to nplot (", nplot, ")") - } - } - } else if(!is.null(nbins)){ - hs <- rep(0.05, nplot) - if(length(nbins)!=nplot){ - stop("Length of nbins should be equal to nplot (", nplot, ")") - } - } - - - - -## plot - - par_orig <- par() - par(mfrow=c(nplot,1)) - for(i in 1:nplot){ - plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], nbins=nbins[i], xlim=xlim, cex=cex,...) - } - par(mfrow=c(1,1)) - - - -## invisible return: - invisible(object) -} - - - -#' Convert a rdd object to lm -#' @param x An object to convert to lm -#' @return An object of class \code{lm} -#' @seealso \code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}. -#' @examples -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -#' reg_para_lm <- as.lm(reg_para) -#' reg_para_lm -#' plot(reg_para_lm, which=4) -#' @export -as.lm <- function(x) - UseMethod("as.lm") - - -as.lm_RDD <- function(x){ - - at_x <- attributes(x) - at_x[names(at_x)!="names"] <- NULL - class(x) <- "lm" - - x -} - -#' @S3method as.lm RDDreg_np -as.lm.RDDreg_np <- function(x) as.lm_RDD(x) - -#' @S3method as.lm RDDreg -as.lm.RDDreg <- function(x) as.lm_RDD(x) - - - - -# subset.RDDdata <- function(x,...){ -# -# res <- subset.data.frame(x,...) -# attributes(res) <- attributes(x) -# res -# } - - -### EXAMPLE -if(FALSE){ - library(RDDtools) -# data(Lee2008) - - - environment(plot.RDDdata) <- environment(RDDdata) - - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - plot(Lee2008_rdd) - - plot(Lee2008_rdd, h=0.2) - plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) - - plot(Lee2008_rdd, nbins=21) - -} diff --git a/RDDtools/R/RDDpred.R b/RDDtools/R/RDDpred.R deleted file mode 100644 index d0cf456..0000000 --- a/RDDtools/R/RDDpred.R +++ /dev/null @@ -1,216 +0,0 @@ -#' RDD coefficient prediction -#' -#' Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{RDDcoef}}) -#' @param object A RDD regression object -#' @param covdata New data.frame specifying the values of the covariates, can have multiple rows. -#' @param se.fit A switch indicating if standard errors are required. -#' @param vcov. Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}} -#' @param newdata Another data on which to evaluate the x/D variables. Useful in very few cases. -#' @param stat The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them -#' @param weights Eventual weights for the averaging of the predicted values. -#' @details The function \code{RDDpred} does a simple prediction of the RDD effect -#' \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)} -#' When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient, -#' shown in the outputs, or obtained with \code{\link{RDDcoef}}. If there were covariates, and if these covariates were estimated using the -#' \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e. -#' had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s). -#' \code{RDDpred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with -#' the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}. -#' -#' In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat="mean"}. -#' Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases, -#' this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also! -#' -#' Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula: -#' \deqn{x_i \Omega x_i^{'}} -#' where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and -#' \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained -#' as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account. -#' @return Returns the predicted value(s), and, if se.fit=TRUE, their standard errors. -#' @export -#' @references Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024 -#' @examples -#' ## Load data, add (artificial) covariates: -#' data(Lee2008) -#' n_Lee <- nrow(Lee2008) -#' z1 <- runif(n_Lee) -#' Lee2008_rdd <- RDDdata(y=y, x=x, data=Lee2008, covar=z1, cutpoint=0) -#' -#' ## estimation without covariates: RDDpred is the same than RDDcoef: -#' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -#' -#' RDDpred(reg_para) -#' RDDcoef(reg_para, allInfo=TRUE) -#' -#' ## estimation with covariates: -#' reg_para_cov <- RDDreg_lm(RDDobject=Lee2008_rdd, covariates="z1", covar.opt=list(slope="separate")) -#' RDDpred(reg_para_cov, covdata=data.frame(z1=0)) ## should obtain same result than with RDestimate -#' RDDpred(reg_para_cov, covdata=data.frame(z1=0.5)) #evaluate at mean of z1 (as comes from uniform) - -RDDpred <- function(object, covdata, se.fit=TRUE, vcov. = NULL, newdata, stat=c("identity", "sum", "mean"), weights){ - - stat <- match.arg(stat) - - if(!missing(weights)) { - if(missing(covdata)) stop("Arg 'weights' only useful with arg 'covdata'") - if(stat=="identity") stop("Argument 'weights' not useful when arg: stat='identity'") - if(stat=="sum") { - warning("Providing weights for a sum makes little sense?!") - } - if(length(weights)!=NROW(covdata)) stop("Weights should be of the same length than covdata") - } - - x_call <- getCall(object) - hasCo <- hasCovar(object) - - if(is.null(x_call$covar.opt)){ - covar.slope <- "same" - covar.strat <- "include" - } else { - covar.slope <- ifelse(is.null(x_call$covar.opt$slope), "same", x_call$covar.opt$slope) - covar.strat <- ifelse(is.null(x_call$covar.opt$strategy), "include", x_call$covar.opt$strategy) - } - - -## get original data structure: - mf <- model.frame(object)[1:2,-1] - if(any(grepl("\\(weights\\)", colnames(mf)))) mf <- mf[,-grep("\\(weights\\)", colnames(mf))] - -## Fill orig struc with 0/1 - if(missing(newdata)){ - which.D <- grep("^D$", colnames(mf)) - mf[,which.D] <- c(0,1) ## set coeff of interest - mf[,-which.D] <- 0 ## remove others (not absolutely necessary actually) - newdata <- mf - } - -## Merge covdata with newdata: - - if(!missing(covdata)){ - if(covar.strat=="residual") stop("Do not provide 'covdata' if covariates were use with 'residual' strategy") - if(covar.slope=="separate"){ - Nrow_cov <- nrow(covdata) - if(Nrow_cov>1) newdata <- newdata[c(1, rep(2,Nrow_cov)),] - if(!is.null(rownames(covdata))) { - if("1" %in% rownames(covdata)) rownames(newdata)[1] <- "0" - rownames(newdata)[-1] <- rownames(covdata) - } else { - rownames(newdata) <- c(0, seq_len(Nrow_cov)) - } - colnames_cov <- colnames(covdata) - ind <- seq(from=2, by=2, length.out=Nrow_cov) - if(!all(colnames_cov%in% colnames(newdata))) stop("Arg 'covdata' contains colnames not in the data") - newdata[2:nrow(newdata), paste(colnames(covdata), "D", sep=":")] <- covdata - } - } - - multiN <- nrow(newdata)>2 - -## Merge and check no NAs - X_i <- as.matrix(cbind(1,newdata)) - if(any(is.na(X_i))){ - warning("data contains NA. Were removed") - X_i <- X_i[-apply(X_i, 1, function(x) any(is.na(x))),] - } - -## Set up variance matrix: X_i (X'X)^{-1} X_i' - if(is.null(vcov.)) vcov. <- vcov(object) - X_inv <- vcov. - mat <- X_i%*%X_inv%*%t(X_i) - -## preds: - - if(!multiN) { - pred_point <- drop(diff(X_i%*%RDDcoef(object, allCo=TRUE))) - if(se.fit) pred_se <- sqrt(sum(c(diag(mat), -2*mat[1,2]))) - } else { - d <- X_i%*%coef(object) - - - Mat_SUM <- cbind( 1, diag(nrow(d)-1)) - Mat_DIAG <- matrix(diag(mat), ncol=1) - if(missing(weights)) { - MAT_SmallSum <- matrix(c(-(nrow(d)-1), rep(1,nrow(d)-1 )), nrow=1) ## create vector: [- n-1, 1, 1, 1....] - } else { - MAT_SmallSum <- matrix(c(-1, weights), nrow=1) ## create vector: [- 1, w_1, w_2, w_n] - } - - if(stat=="identity"){ - Mat_DIFF <- Mat_SUM - Mat_DIFF[,1] <- -1 - pred_point <- drop(Mat_DIFF%*%d) - if(se.fit) pred_se <- drop(sqrt(Mat_SUM %*%Mat_DIAG -2* mat[1,2:ncol(mat)])) - } else { - if(stat=="mean" & missing(weights)) MAT_SmallSum <- MAT_SmallSum/Nrow_cov - pred_point <- drop(MAT_SmallSum%*%d) - if(se.fit) pred_se <- drop(sqrt(MAT_SmallSum%*%mat%*%t(MAT_SmallSum))) - } - } - - -## result: - if(se.fit){ - res <- list() - res$fit <- pred_point - res$se.fit <- pred_se - } else { - res <- pred_point - } -res -} - -if(FALSE){ - library(RDDtools) - data(Lee2008) - head(Lee2008) - - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - set.seed(123) - n_Lee <- nrow(Lee2008) - Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) - Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,utpoint=0) - -## use: - reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) - - RDDpred(reg_para) - RDDcoef(reg_para, allInfo=TRUE) - all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check=FALSE) - -## pred other coefs: - pred_Xr <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=0, Xr=c(0,1))) - all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[4,1:2], unlist(pred_Xr), check=FALSE) - - pred_Xl <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=c(0,1), Xr=0)) - all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[3,1:2], unlist(pred_Xl), check=FALSE) - - reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2) - RDDpred(reg_para2) - all.equal(unlist(RDDpred(reg_para2)), RDDcoef(reg_para2, allInfo=TRUE)[1:2], check=FALSE) - - -### Covariates - reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=1, covariates="z1", covar.opt=list(slope="separate")) - reg_para4_cov - summary(reg_para4_cov) - - RDDpred(reg_para4_cov) - all.equal(unlist(RDDpred(reg_para4_cov)), RDDcoef(reg_para4_cov, allInfo=TRUE)[1:2], check=FALSE) - - all.equal(RDDpred(reg_para4_cov, covdata=data.frame(z1=0)),RDDpred(reg_para4_cov)) - -### Check RDDpred: -vec_eval <- c(2,4,4,5,6) -estim_sep <- lapply(vec_eval, function(x) RDDpred(object=reg_para4_cov, covdata=data.frame(z1=x))) -estim_toget <- RDDpred(reg_para4_cov, covdata=data.frame(z1=vec_eval)) - -all(estim_toget$fit==sapply(estim_sep, function(x) x$fit)) -all(estim_toget$se.fit==sapply(estim_sep, function(x) x$se.fit)) - -environment(RDDpred) <- environment(RDDreg_lm) -sum(RDDpred(reg_para4_cov, covdata=data.frame(z1=c(0,1,2,1)))$fit) -# RDDpred(x=reg_para4_cov, covdata=data.frame(z1=c(2,4,4,4,5,6))) -# RDDpred(reg_para4_cov) - -} diff --git a/RDDtools/R/STAR_MHE-data.R b/RDDtools/R/STAR_MHE-data.R deleted file mode 100644 index 3155f20..0000000 --- a/RDDtools/R/STAR_MHE-data.R +++ /dev/null @@ -1,171 +0,0 @@ -#' @name STAR_MHE -#' @title Transformation of the STAR dataset as used in Angrist and Pischke (2008) -#' @description Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) -#' @docType data -#' @usage STAR_MHE -#' @seealso \code{\link[AER]{STAR}} for the original dataset. -#' @format A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, -#' all other are created by Angrist and Pischke STAT code. -#' \describe{ -#' \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})} -#' \item{pscore}{The propensity score (computed by A & P)} -#' \item{classid}{The id of the class (computed by A & P)} -#' \item{cs}{Class size (computed by A & P)} -#' \item{female, nwhite}{Various covariates (computed by A & P)} -#' } -#' @details ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. -#' The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. -#' The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website -#' (\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta. -#' @references Krueger, A. (1999) "Experimental Estimates Of Education Production Functions," -#' \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May. -#' @references Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, -#' Princeton University press -#' @source Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website -#' \url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012. -#' @examples -#' data(STAR_MHE) -#' -#' # Compute the group means: -#' STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean) -#' -#' # Regression of means, with weighted average: -#' reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs) -#' coef(summary(reg_krug_gls))[2,2] - -NULL - - -##### Quick R code used on the output data: -# STAR_MHE <- read.csv(".../abuelita.csv") -# STAR_MHE$female <- as.factor(STAR_MHE$female) -# STAR_MHE$nwhite <- as.factor(STAR_MHE$nwhite) -# STAR_MHE$n <- NULL -# -# save(STAR_MHE, file="STAR_MHE.rda") - - -##### STATA code krueger.do (retrieved 26 November 2012 on http://economics.mit.edu/faculty/angrist/data1/mhe/krueger) -# version 9 -# set more 1 -# capture log close -# log using krueger, text replace -# -# /* create Krueger scaled scores */ -# -# /* reading score */ -# -# clear -# use webstar -# -# keep if cltypek > 1 /* regular classes */ -# keep if treadssk ~= . -# -# sort treadssk -# gen pread0 = 100*_n/_N -# -# egen pread = mean(pread0), by(treadssk) /* percentile score in reg. classes */ -# -# keep treadssk pread -# sort tread -# keep if tread ~= tread[_n-1] -# save tempr, replace -# -# /* math score */ -# -# use webstar -# -# keep if cltypek > 1 /* regular classes */ -# keep if tmathssk ~= . -# -# sort tmathssk -# gen pmath0 = 100*_n/_N -# egen pmath = mean(pmath0), by(tmathssk) -# -# keep tmathssk pmath -# sort tmath -# keep if tmath ~= tmath[_n-1] -# save tempm, replace -# -# /* merge percentile scores back on */ -# -# use webstar -# -# keep if stark == 1 -# -# sort treadssk -# merge treadssk using tempr -# ipolate pread treadssk, gen(pr) epolate -# drop _merge -# -# sort tmathssk -# merge tmathssk using tempm -# ipolate pmath tmathssk, gen(pm) epolate -# replace pm = 0 if pm < 0 -# drop _merge -# -# egen pscore = rowmean(pr pm) -# -# /* make class ids */ -# -# egen classid1 = group(schidkn cltypek) -# egen cs1 = count(classid1), by(classid1) -# -# egen classid2 = group(classid1 totexpk hdegk cladk) if cltypek==1 & cs >= 20 -# egen classid3 = group(classid1 totexpk hdegk cladk) if cltypek>1 & cs >= 30 -# -# gen temp = classid1*100 -# egen classid = rowtotal(temp classid2 classid3) -# egen cs = count(classid), by(classid) -# -# gen female = ssex == 2 -# gen nwhite = srace >= 2 & srace <= 6 if srace ~= . -# -# keep if cs <= 27 & pscore ~= . -# keep pscore cs schidkn classid female nwhite -# gen n = 1 -# -# save temp, replace -# -# reg pscore cs, robust -# local se = _se[cs] -# local t = _b[cs]/`se' -# predict r, res -# loneway r classid -# local rho = r(rho) -# -# collapse cs, by(classid) -# sum cs -# -# dis r(Var) -# local m = 1 + (r(Var)/r(mean) + r(mean) - 1)*`rho' -# dis `m' -# dis sqrt(`m') -# dis `se' -# dis sqrt(`m')*`se' -# dis `t'/sqrt(`m') -# -# -# use temp, clear -# -# reg pscore cs, robust -# moulton pscore cs, cluster(classid) moulton -# moulton pscore cs, cluster(classid) -# reg pscore cs, cluster(classid) -# brl pscore cs, cluster(classid) -# -# -# -# set seed 123456789 -# bootstrap "reg pscore cs" _b, reps(1000) cluster(classid) -# -# areg pscore, absorb(classid) -# predict hat -# gen ry = pscore - hat + _b[_cons] -# collapse (mean) ry cs (sum) n, by(classid) -# -# reg ry cs [aw=n] -# -# -# log close -# set more 0 diff --git a/RDDtools/R/Waldci.R b/RDDtools/R/Waldci.R deleted file mode 100644 index 153108e..0000000 --- a/RDDtools/R/Waldci.R +++ /dev/null @@ -1,139 +0,0 @@ -#' Confint allowing vcov -#' -#' Version of vcov allowing for confint -#' @param x Object of class lm or else -#' @param parm specification of which parameters are to be given confidence intervals, see confint -#' @param level the confidence level required, see confint() -#' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich -#' @param df Degrees of freedom -#' @param \ldots Further argument -#' @keywords internal - -waldci <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) -{ - UseMethod("waldci") -} - -waldci.default <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) -{ - ## use S4 methods if loaded - coef0 <- if("stats4" %in% loadedNamespaces()) stats4::coef else coef - vcov0 <- if("stats4" %in% loadedNamespaces()) stats4::vcov else vcov - - ## extract coefficients and standard errors - est <- coef0(x) - if(is.null(vcov.)) se <- vcov0(x) else { - if(is.function(vcov.)) se <- vcov.(x) - else se <- vcov. - } - se <- sqrt(diag(se)) - - ## match using names and compute t/z statistics - if(!is.null(names(est)) && !is.null(names(se))) { - anames <- names(est)[names(est) %in% names(se)] - est <- est[anames] - se <- se[anames] - } - - ## process level - a <- (1 - level)/2 - a <- c(a, 1 - a) - - ## get quantile from central limit theorem - if(is.null(df)) { - df <- try(df.residual(x), silent = TRUE) - if(inherits(df, "try-error")) df <- NULL - } - if(is.null(df)) df <- 0 - fac <- if(is.finite(df) && df > 0) qt(a, df = df) else qnorm(a) - - ## set up confidence intervals - ci <- cbind(est + fac[1] * se, est + fac[2] * se) - colnames(ci) <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3L), "%") - - ## process parm - if(is.null(parm)) parm <- seq_along(est) -# if(is.character(parm)) parm <- which(parm %in% names(est)) -if(is.character(parm)) parm <- which(names(est)%in% parm ) - ci <- ci[parm, , drop = FALSE] - return(ci) -} - - -## copy of stats:::format.perc -format.perc <- function (probs, digits) - paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), - "%") - -waldci.RDDreg_np <- function(x, level = 0.95, vcov. = NULL, df = Inf, ...){ - - inf_met <- infType(x) ## def in Misc.R - if(inf_met=="se"){ - if(!is.null(vcov.)|!is.infinite(df)) {warning("Arg 'vcov.' and 'df' only work for RDDreg with inf='lm'") - } - ## code recycled from stats::confint.default - co <- RDDcoef(x, allInfo=TRUE) - a <- (1 - level)/2 - a <- c(a, 1 - a) - fac <- qnorm(a) - pct <- format.perc(a, 3) ## import!! - ci <- array(NA, dim = c(1, 2L), dimnames = list("D", pct)) - ci[] <- co[,"Estimate"] + co[,"Std. Error"] %o% fac - return(ci) - } else { - waldci.default(x$RDDslot$model, parm = "D", level = level, vcov. = vcov., df = df, ...) - } -} - - - - -waldci.glm <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) - waldci.default(x, parm = parm, level = level, vcov. = vcov., df = df, ...) - -waldci.mlm <- function(x, parm=NULL, level = 0.95, vcov. = NULL, df = NULL, ...) -{ - ## obtain vcov - v <- if(is.null(vcov.)) vcov(x) else if(is.function(vcov.)) vcov.(x) else vcov. - - ## nasty hack: replace coefficients so that their names match the vcov() method - x$coefficients <- structure(as.vector(x$coefficients), .Names = colnames(vcov(x))) - - ## call default method - waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) -} - -waldci.survreg <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) -{ - if(is.null(vcov.)) v <- vcov(x) else { - if(is.function(vcov.)) v <- vcov.(x) - else v <- vcov. - } - if(length(x$coefficients) < NROW(x$var)) { - x$coefficients <- c(x$coefficients, "Log(scale)" = log(x$scale)) - } - waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) -} - - -if(FALSE){ - -library(sandwich) -library(lmtest) - -reg <- lm(freeny) - -### Regular -all(confint(reg)==waldci(reg)) -confint(reg) -co_reg <- coeftest(reg) -co_reg[,1] + qnorm(0.975)*co_reg[,2] -co_reg[,1] + qt(0.975, df=reg[["df.residual"]] )*co_reg[,2] - -## vcovHC -waldci(reg, vcov.=vcovHC) -co <- coeftest(reg, vcov.=vcovHC) -co[,1] + qnorm(0.975)*co[,2] -co[,1] + qt(0.975, df=reg[["df.residual"]] )*co[,2] - -} \ No newline at end of file diff --git a/RDDtools/R/as.npreg.R b/RDDtools/R/as.npreg.R deleted file mode 100644 index f4f7164..0000000 --- a/RDDtools/R/as.npreg.R +++ /dev/null @@ -1,170 +0,0 @@ -#' Convert an RDDreg object to a \code{npreg} object -#' -#' Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np} -#' @param x Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}} -#' @param \ldots Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}} -#' @details This function converts an RDDreg object into an \code{npreg} object from package \code{np} -#' Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one. -#' Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg -#' proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the univariate one -#' is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. -#' @export -#' @return An object of class \code{npreg} or \code{npregbw} -#' @seealso \code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}. -#' @examples -#' # Estimate ususal RDDreg: -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -#' -#' ## Convert to npreg: -#' reg_nonpara_np <- as.npreg(reg_nonpara) -#' reg_nonpara_np -#' RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) -#' -#' ## Compare with result obtained with a Gaussian kernel: -#' bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) -#' reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) -#' all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) - - - - - - -as.npregbw <- function(x,...){ - res <- as.npregbw_low(x=x, npreg=FALSE,...) - res -} - -#' @rdname as.npregbw -#' @export -as.npreg <- function(x,...){ - res <- as.npregbw_low(x=x, npreg=TRUE,...) - res -} - - -as.npregbw_low <- function(x, npreg=FALSE, adjustIK_bw=TRUE, ...){ - - dat <- getOriginalData(x) - bw <- getBW(x) - cutpoint <- getCutpoint(x) - -## Specify inputs to npregbw: - - ## data: - x <- dat$x - dat_np <- data.frame(y=dat$y, x=x, D=ifelse(x>=cutpoint,1,0), Dx=ifelse(x>=cutpoint,x,0)) - dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint)) - - ## bw: - range.x <- range(dat$x, na.rm=TRUE, finite=TRUE) - if(adjustIK_bw ){ ## & names(bw) =="h_opt" - bw <- RDDbw_IK(dat, kernel="Normal") - } - bw_other <- 9999*diff(range.x) - bws <- c(bw, rep(bw_other, 2)) - - -## start npregbw - res <- npregbw(bws=bws, formula=y~x+D+Dx, data= dat_np, regtype = "ll", - eval=dataPoints, bandwidth.compute=FALSE, gradients=TRUE,...) - class(res) <- c("RDDreg_npregbw", class(res)) - -## if npreg, return instead model_np <- npreg(bw_np, newdata=dataPoints, gradients=TRUE) - if(npreg) { - options(np.messages = TRUE) ## otherwise got warnings messages... probably because comes only if loaded! - res <- npreg(res, newdata=dataPoints, gradients=TRUE,...) - class(res) <- c("RDDreg_npreg", class(res)) - } - attr(res, "RDDdf") <- dat_np - attr(res, "cutpoint") <- cutpoint - res -} - - -#' @S3method RDDcoef RDDreg_npreg -RDDcoef.RDDreg_npreg <- function(object, allInfo=FALSE, allCo=FALSE, ...){ - - co <- diff(object$mean) - if(allInfo) { - se <- sum(object$merr) - zval <- co/se - pval <- 2 * pnorm(abs(zval), lower.tail = FALSE) - res <- cbind(co, se, zval, pval) - colnames(res) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)") - rownames(res) <- "D" - } else { - res <- co - } - - if(allCo){ - cos <- c(object$mean[1], object$grad) - ses <- c(object$merr[1], object$gerr) - - ## X_right: - dataPoints_Xr <- data.frame(x=0, D=0, Dx=c(0,1)) - Xr <- diff(predict(object, newdata=dataPoints_Xr)) - - estimates <- c(cos[1], co, cos[2], Xr) - - if(allInfo){ - zvals <- cos/ses - pvals <- 2 * pnorm(abs(zvals), lower.tail = FALSE) - res <- data.frame("Estimate" = estimates, - "Std. Error" = c(ses[1], se, ses[2:3]), - "z value" = c(zvals[1], zval, zvals[2:3]), - "Pr(>|z|)" = c(pvals[1], pval, pvals[2:3]), - check.names=FALSE) - rownames(res) <- c("(Intercept)", "D", "x_left", "x_right") - } else { - res <- estimates - } - } - - res -} - - -if(FALSE){ - library(RDDtools) - data(Lee2008) - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) - -# environment(as.npregbw_low) <- environment(RDDdata) - reg_nonpara_npbw <- as.npregbw(reg_nonpara) - reg_nonpara_npbw -class(reg_nonpara_npbw) -RDDcoef(reg_nonpara_npbw) - - reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE) - reg_nonpara_np -class(reg_nonpara_np) -RDDcoef(reg_nonpara_np) -RDDcoef(reg_nonpara_np, allInfo=TRUE) -RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE) - -## manual predict: - -cutpoint <- 0 -dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint)) -dataPoints2 <- data.frame(x=0, D=c(0,1), Dx=0) -dataPoints3 <- data.frame(x=c(0,1), D=0, Dx=0) -dataPoints3 <- data.frame(x=0, D=0, Dx=c(0,1)) - -diff(predict(reg_nonpara_np, newdata=dataPoints)) -diff(predict(reg_nonpara_np, newdata=dataPoints2)) - -diff(predict(reg_nonpara_np, newdata=dataPoints3)) -RDDcoef(reg_nonpara_gaus, allCo=TRUE) - -## compare: - bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) - reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) - all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) - all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check=FALSE) - - -} diff --git a/RDDtools/R/bw_IK.R b/RDDtools/R/bw_IK.R deleted file mode 100644 index 99a295b..0000000 --- a/RDDtools/R/bw_IK.R +++ /dev/null @@ -1,230 +0,0 @@ -#' Imbens-Kalyanaraman Optimal Bandwidth Calculation -#' -#' Imbens-Kalyanaraman optimal bandwidth -#' for local linear regression in Regression discontinuity designs. -#' -#' @param RDDobject of class RDDdata created by \code{\link{RDDdata}} -#' @param kernel The type of kernel used: either \code{triangular} or \code{uniform}. -#' @return The optimal bandwidth -#' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," -#' Review of Economic Studies (2012) 79, 933-959 -#' @seealso \code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) -#' @export -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @examples -#' data(Lee2008) -#' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -#' RDDbw_IK(rd) - - -RDDbw_IK <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal")) { - - kernel <- match.arg(kernel) - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - - res <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=FALSE, kernel=kernel) - return(res) - -} - -IK_bias <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { - - kernel <- match.arg(kernel) - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - - resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel) - -## compute C1: see IK equ 5, and Fan Jijbels (1996, 3.23) -# is done in R with locpol, computeMu(i=2, equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf) - C1 <- switch(kernel, "Triangular"= -0.1, "Uniform"= -0.1666667, "Normal"= -0.7519384) ## from: - -## Compute bias as in IK equ:5, -# note here 1/4 is outside C1 - if(missing(bw)) bw <- resB$h_opt - res<- C1 * 1/2 * bw^2 *(resB$m2_right-resB$m2_left) - return(res) - -} - -IK_var <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { - - kernel <- match.arg(kernel) - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - - resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel) - -## compute C2: see IK equ 5, and Fan Jijbels (1996, 3.23) -# is done in R with locpol, computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf) - C2 <- switch(kernel, "Triangular"= 4.8, "Uniform"= 4, "Normal"=1.785961) ## from: - -## Compute var as in IK equ:5, - if(missing(bw)) bw <- resB$h_op - elem1 <- (resB$var_inh_left+resB$var_inh_right)/resB$f_cu - elem2 <- C2/(nrow(RDDobject)*bw) - res <- elem1*elem2 - res -} - -IK_amse <- function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { - - var <- IK_var(RDDobject=RDDobject, kernel=kernel, bw=bw) - bias <- IK_bias(RDDobject=RDDobject, kernel=kernel, bw=bw) - res <- bias^2+var - res -} - - -RDDbw_IK_low <-function (X,Y,threshold=0,verbose=FALSE, type=c("RES", "RES_imp","WP"), returnBig=FALSE, kernel=c("Triangular", "Uniform", "Normal")) { - - type <- match.arg(type) - kernel <- match.arg(kernel) - - - N <- length(X) - N_left <- sum(X=threshold, na.rm=TRUE) - - -########## -### STEP 1 -########## - -## Silverman bandwidth - h1 <- 1.84*sd(X)*N^(-1/5) - if(verbose) cat("\n-h1:", h1) - -## f(cut) - isIn_h1_left <- X>=(threshold-h1) & X=threshold & X<=(threshold+h1) - - NisIn_h1_left <- sum(isIn_h1_left, na.rm=TRUE) - NisIn_h1_right <- sum(isIn_h1_right, na.rm=TRUE) - if(verbose) cat("\n-N left /right:", NisIn_h1_left, NisIn_h1_right) - - - f_cut <-(NisIn_h1_left+NisIn_h1_right)/(2*N*h1) - if(verbose) cat("\n-f(threshold):", f_cut) - -## Variances : Equ (13) - - var_inh_left <- var(Y[isIn_h1_left], na.rm=TRUE) - var_inh_right <- var(Y[isIn_h1_right], na.rm=TRUE) - -# problem with working pap0er: Equ 4.9 is different! - if(type=="WP"){ - denom <- 1/(NisIn_h1_left+NisIn_h1_right) - var_inh_global <- denom* ((NisIn_h1_left-1)* var_inh_left + (NisIn_h1_right-1)* var_inh_right) - } - - if(verbose){ - cat("\n-Sigma^2 left:", var_inh_left, "\n-Sigma^2 right:", var_inh_right) - } -########## -### STEP 2 -########## - - -## Global function of order 3: Equ (14) - reg <-lm(Y~I(X>=threshold)+I(X-threshold)+I((X-threshold)^2)+I((X-threshold)^3)) - m3<- 6*coef(reg)[5] - if(verbose) cat("\n-m3:", m3) - - -## left and right bandwidths: Equ (15) - Ck_h2 <- 3.556702 # 7200^(1/7) - h2_left <- Ck_h2 * ( var_inh_left /(f_cut*m3^2))^(1/7) * N_left^(-1/7) - h2_right <- Ck_h2 * ( var_inh_right /(f_cut*m3^2))^(1/7) * N_right^(-1/7) - - if(verbose) cat("\n-h2 left:", h2_left, "\n-h2 right:", h2_right) - -## second derivatives right/left - isIn_h2_left <- X>=(threshold-h2_left) & X=threshold & X<=(threshold+h2_right) - - N_h2_left <- sum(isIn_h2_left, na.rm=TRUE) - N_h2_right <- sum(isIn_h2_right, na.rm=TRUE) - - reg2_left <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_left) - reg2_right <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_right) - - m2_left <- as.numeric(2*coef(reg2_left)[3]) - m2_right <- as.numeric(2*coef(reg2_right)[3]) - - if(verbose) cat("\n-m2 left:", m2_left, "\n-m2 right:", m2_right) - -########## -### STEP 3 -########## - -## Regularization: Equ (16) - if(type=="RES"){ - r_left <- (2160*var_inh_left) / (N_h2_left *h2_left^4) - r_right <- (2160*var_inh_right) / (N_h2_right*h2_right^4) - } else { - r_left <- (2160*var_inh_global) / (N_h2_left *h2_left^4) - r_right <- (2160*var_inh_global) / (N_h2_right*h2_right^4) - } - - - if(verbose) cat("\n-Reg left:", r_left, "\n-Reg right:", r_right) - -## Compute kernel dependent constant: (see file ~/Dropbox/HEI/rdd/Rcode/IK bandwidth/bandwidth_comput.R) - Ck <- switch(kernel, "Triangular"=3.4375, "Uniform"=2.70192, "Normal"=1.25864) # is not 5.4 as in paper since our kernel is on I(|x|<1), not <1/2 - -## Final bandwidth: Equ (17) - h_opt <- Ck * ( (var_inh_left+ var_inh_right) / (f_cut * ((m2_right-m2_left)^2 + r_left +r_right)))^(1/5) * N^(-1/5) - names(h_opt) <- "h_opt" - - if(verbose) cat("\n\n") - -### - if(returnBig){ - res<- list() - res$h_opt <- as.numeric(h_opt) - res$var_inh_left <- var_inh_left - res$var_inh_right <- var_inh_right - res$m2_right <- m2_right - res$m2_left <- m2_left - res$f_cut <- f_cut - res$h2_left <- h2_left - res$h2_right <- h2_right - } else { - res <- h_opt - } - - return(res) -} - -if(FALSE){ - lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) - colnames(lee_dat4) <- c("X", "Y") - IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE) - IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE, type="WP") - IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=FALSE, returnBig=TRUE) - - -data(Lee2008) -Lee2008_rdd <- RDDdata(x=Lee2008$x,y=Lee2008$y , cutpoint=0) - -### -bw_IK <- RDDbw_IK(Lee2008_rdd) -bws <- sort(c(bw_IK, seq(0.05, 0.5, by=0.05))) -bi <- Vectorize(IK_bias, vectorize.args="bw")(Lee2008_rdd, bw=bws) -va <- Vectorize(IK_var, vectorize.args="bw")(Lee2008_rdd, bw=bws) -ms <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws) - -df<- data.frame(bw=rep(bws,3), value=c(ms, va, bi^2), type=rep(c("ms", "va", "bias^2"), each=length(bws))) - - -# qplot(x=bw, y=value, data=df, geom="line", colour=type)+geom_point(data=subset(df, value==min(subset(df, type=="ms", "value")))) - -bws_03 <- sort(c(bw_IK, seq(0.25, 0.35, by=0.005))) -ms_03 <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws_03) -df2 <- data.frame(bw=bws_03,mse=ms_03) - -subset(df2, mse==min(mse)) ## 1.78, not 1.74 from: -qplot(x=bw, y=mse, data=df2, geom="line") -} diff --git a/RDDtools/R/bw_ROT.R b/RDDtools/R/bw_ROT.R deleted file mode 100644 index 89beaba..0000000 --- a/RDDtools/R/bw_ROT.R +++ /dev/null @@ -1,98 +0,0 @@ -#' Bandwidth selector -#' -#' implements dpill -#' -#' @param object object of class RDDdata -#' @references McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005} -#' @include plotBin.R -#' @export -#' @author Drew Dimmery <\email{drewd@@nyu.edu}> -#' @examples -#' #No discontinuity - -### Crary bw - -ROT_bw <- function(object){ - - if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") - cutpoint <- getCutpoint(object) - x <- object$x - y <- object$y - -##### first step - n <- length(y) - sd_x <- sd(x, na.rm=TRUE) - bw_pilot <- (2*sd_x)/sqrt(n) - -## hist - his <- plotBin(x=x, y=y, h=bw_pilot, cutpoint=cutpoint,plot=FALSE, type="number") -# his2 <- hist(x, breaks=c(min(x), his[["x"]], max(x))) - x1 <- his$x - y1 <- his[,"y.Freq"] - -##### second step - -## regs: - reg_left <- lm(y1 ~ poly(x1, degree=4, raw=TRUE), subset=x1=cutpoint) - - - -} - - -#' Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth} -#' -#' Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) -#' either to the whole function, or to the functions below and above the cutpoint. -#' -#' @param object object of class RDDdata created by \code{\link{RDDdata}} -#' @param type Whether to choose a global bandwidth for the whole function (\code{global}) -#' or for each side (\code{sided}) -#' @return One (or two for \code{sided}) bandwidth value. -#' @references See \code{\link[KernSmooth]{dpill}} -#' @include plotBin.R -#' @seealso \code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) -#' @import KernSmooth -#' @export -#' @examples -#' data(Lee2008) -#' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -#' RDDbw_RSW(rd) - - -#### -RDDbw_RSW <- function(object, type=c("global", "sided")){ - - type <- match.arg(type) - - if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") - cutpoint <- getCutpoint(object) - x <- object$x - y <- object$y - -## - if(type=="global"){ - bw <- dpill(x=x, y=y) - } else { - dat_left <- subset(object, x=cutpoint) - - bw_left <- dpill(x=dat_left$x, y=dat_left$y) - bw_right <- dpill(x=dat_right$x, y=dat_right$y) - bw <- c(bw_left, bw_right) - } - -## result - bw -} - - -if(FALSE){ -# lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) -# head(lee_dat4) -# a<-RDDdata(y=lee_dat4[,2], x=lee_dat4[,1], cutpoint=0) -# ROT_bw(object=a) -# RDDbw_RSW(object=a) -RDDbw_RSW(object=a, type="sided") -} diff --git a/RDDtools/R/clusterInf.R b/RDDtools/R/clusterInf.R deleted file mode 100644 index edbec7d..0000000 --- a/RDDtools/R/clusterInf.R +++ /dev/null @@ -1,178 +0,0 @@ -#' Post-inference for clustered data -#' -#' Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix -#' possibly on the range specified by bandwidth -#' @param object Object of class lm, from which RDDreg also inherits. -#' @param clusterVar The variable containing the cluster attributions. -#' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich -#' @param type The type of cluster correction to use: either the degrees of freedom, or a HC matrix. -#' @param \ldots Further arguments passed to coeftest -#' @return The output of the coeftest function, which is itself of class \code{coeftest} -#' @seealso \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf} -#' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. -#' \emph{AmericanEconomic Review}, 93, p. 133-138 -#' @export -#' @import sandwich -#' @import lmtest -#' @examples -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -#' -#' # here we just generate randomly a cluster variable: -#' nlet <- sort(c(outer(letters, letters, paste, sep=""))) -#' clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) -#' -#' # now do post-inference: -#' clusterInf(reg_para, clusterVar=clusRandom) -#' clusterInf(reg_para, clusterVar=clusRandom, type="HC") - - -clusterInf <- function(object, clusterVar, vcov. = NULL, type=c("df-adj", "HC"), ...){ - - if(is.null(clusterVar)) stop("clusterVar seems to be NULL?") - type <- match.arg(type) - - if(type=="df-adj"){ - nClus <- if(is.factor(clusterVar)) nlevels(clusterVar) else length(unique(clusterVar)) - res <- coeftest(object, vcov. = vcov., df = nClus, ...) - } else { - if(!is.null(vcov.)) warning("arg 'vcov.' not used when 'type=HC' (default vcovCluster used)") - res <- coeftest(object, vcov. = function(x) vcovCluster(x, clusterVar=clusterVar), ...) - } - - return(res) -} - -#' @S3method estfun RDDreg_np -estfun.RDDreg_np <- function(x,...){ - inf_met <- infType(x) ## def in Misc.R - if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") - estfun(x$RDDslot$model) -} - -#' @S3method bread RDDreg_np -bread.RDDreg_np <- function(x,...){ - inf_met <- infType(x) ## def in Misc.R - if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") - bread(x$RDDslot$model) -} - - -# sandwich.RDDreg_np <- function (x, bread. = bread, meat. = meat, ...){ -# inf_met <- infType(x) ## def in Misc.R -# if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") -# sandwich(x$RDDslot$model, bread.=bread., meat.=meat., ...) -# } - -#' @S3method model.frame RDDreg_np -model.frame.RDDreg_np <- function (formula, ...) - model.frame(formula$RDDslot$model) - -#' Cluster Heteroskedasticity-consistent estimation of the covariance matrix. -#' -#' Offer a cluster variant of the usual Heteroskedasticity-consistent -#' @param object Object of class lm, from which RDDreg also inherits. -#' @param clusterVar The variable containing the cluster attributions. -#' @return A matrix containing the covariance matrix estimate. -#' @author Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html} -#' @references Cameron, C., Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering, -#' \emph{Journal of Business and Economic Statistics}, vol. 29(2), pages 238-249. -#' #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. -#' \emph{AmericanEconomic Review}, 93, p. 133-138 -#' @references Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}. -#' @export -#' @seealso \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods. -#' See also \code{\link[rms]{robcov}} from package \code{rms} for another implementation of the cluster robust. -#' @examples -#' data(STAR_MHE) -#' if(all(c(require(sandwich), require(lmtest)))){ -#' -#' # Run simple regression: -#' reg_krug <- lm(pscore~cs, data=STAR_MHE) -#' -#' # Row 1 of Table 8.2.1, inference with standard vcovHC: -#' coeftest(reg_krug,vcov.=vcovHC(reg_krug, "HC1"))[2,2] -#' -#' # Row 4 of Table 8.2.1, inference with cluster vcovHC: -#' coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2] -#' } - -vcovCluster <- function(object, clusterVar){ - M <- length(unique(clusterVar)) - N <- length(clusterVar) - K <- getModelRank(object) - dfc <- (M/(M-1))*((N-1)/(N-K)) - uj <- apply(estfun(object),2, function(x) tapply(x, clusterVar, sum)) - dfc*sandwich(object, meat.=crossprod(uj)/N) -} - -#' @rdname vcovCluster -#' @param clusterVar1,clusterVar2 The two cluster variables for the 2-cluster case. -#' @export -vcovCluster2 <- function(object, clusterVar1, clusterVar2){ - # R-codes (www.r-project.org) for computing multi-way - # clustered-standard errors. Mahmood Arai, Jan 26, 2008. - # See: Thompson (2006), Cameron, Gelbach and Miller (2006) - # and Petersen (2006). - # reweighting the var-cov matrix for the within model - - K <- getModelRank(object) - estF <- estfun(object) - - clusterVar12 <- paste(clusterVar1,clusterVar2, sep="") - M1 <- length(unique(clusterVar1)) - M2 <- length(unique(clusterVar2)) - M12 <- length(unique(clusterVar12)) - N <- length(clusterVar1) - - dfc1 <- (M1/(M1-1))*((N-1)/(N-K)) - dfc2 <- (M2/(M2-1))*((N-1)/(N-K)) - dfc12 <- (M12/(M12-1))*((N-1)/(N-K)) - - u1j <- apply(estF, 2, function(x) tapply(x, clusterVar1, sum)) - u2j <- apply(estF, 2, function(x) tapply(x, clusterVar2, sum)) - u12j <- apply(estF, 2, function(x) tapply(x, clusterVar12, sum)) - vc1 <- dfc1*sandwich(object, meat.=crossprod(u1j)/N ) - vc2 <- dfc2*sandwich(object, meat.=crossprod(u2j)/N ) - vc12 <- dfc12*sandwich(object, meat.=crossprod(u12j)/N) - vcovMCL <- vc1 + vc2 - vc12 - vcovMCL -} - -getModelRank <- function(object,...) - UseMethod("getModelRank") - -getModelRank.default <- function(object,...) object$rank - -getModelRank.RDDreg_np <- function(object,...) getModelRank.default(object$RDDslot$model) - -if(FALSE){ - - library(RDDtools) - data(Lee2008) - - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - - reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) - print(x=reg_para ) - summary(reg_para ) - -## cluster inference - set.seed(123) - nlet <- sort(c(outer(letters, letters, paste, sep=""))) - clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) - clusterInf(reg_para, clusterVar=clusRandom) - - clusterInf(reg_para, clusterVar=clusRandom, type="HC") - -## compare with rdd: - library(rdd) - rddest <- RDestimate(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE) - rddest_2 <- RDestimate2(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE, cluster=clusRandom) - coef(summary(reg_para)) - coef(summary(rddest$model[[2]])) - - all.equal(clusterInf(reg_para, clusterVar=clusRandom, type="HC")["D", "Std. Error"],rddest_2[["se"]][2]) -} \ No newline at end of file diff --git a/RDDtools/R/covarTests.R b/RDDtools/R/covarTests.R deleted file mode 100644 index c6cb88e..0000000 --- a/RDDtools/R/covarTests.R +++ /dev/null @@ -1,249 +0,0 @@ -#' Testing for balanced covariates: equality of means with t-test -#' -#' Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold -#' -#' @param object object of class RDDdata -#' @param bw a bandwidth -#' @param paired Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests. -#' @param var.equal Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal -#' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function -#' @param \ldots currently not used -#' @return A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @seealso \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution -#' @examples -#' data(Lee2008) -#' -#' ## Add randomly generated covariates -#' set.seed(123) -#' n_Lee <- nrow(Lee2008) -#' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), -#' z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), -#' z3 = sample(letters, size = n_Lee, replace = TRUE)) -#' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) -#' -#' ## test for equality of means around cutoff: -#' covarTest_mean(Lee2008_rdd_Z, bw=0.3) -#' -#' ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: -#' covarTest_dis(Lee2008_rdd_Z, bw=0.3) -#' -#' ## covarTest_mean works also on regression outputs (bw will be taken from the model) -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) -#' covarTest_mean(reg_nonpara) - - - - - -#' @export -covarTest_mean <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) - UseMethod("covarTest_mean") - -#' @rdname covarTest_mean -#' @method covarTest_mean RDDdata -#' @S3method covarTest_mean RDDdata -covarTest_mean.RDDdata <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - cutpoint <- getCutpoint(object) - covar <- getCovar(object) - cutvar <- object$x - - covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust) - -} - - -#' @rdname covarTest_mean -#' @method covarTest_mean RDDreg -#' @S3method covarTest_mean RDDreg -covarTest_mean.RDDreg <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - cutpoint <- getCutpoint(object) - dat <- object$RDDslot$RDDdata - covar <- getCovar(dat) - cutvar <- dat$x - if(is.null(bw)) bw <- getBW(object) - - covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust) - -} - - -covarTest_mean_low <- function(covar,cutvar, cutpoint, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - p.adjust <- match.arg(p.adjust) - -## subset - if(!is.null(bw)){ - isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw - covar <- covar[isInH,] - cutvar <- cutvar[isInH] - } - regime <- cutvar < cutpoint - -## Split data - covar_num <- sapply(covar, as.numeric) - - tests <-apply(covar_num, 2, function(x) t.test(x[regime], x[!regime], paired=paired, var.equal=var.equal)) - tests_vals <- sapply(tests, function(x) c(x[["estimate"]], diff(x[["estimate"]]),x[c("statistic", "p.value")])) - -## Adjust p values if required: - if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust) - -## Print results - res <- t(tests_vals) - colnames(res)[3] <- "Difference" - res - - -} - - - - -#' Testing for balanced covariates: equality of distribution -#' -#' Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold -#' -#' @param object object of class RDDdata -#' @param bw a bandwidth -#' @param exact Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed. -#' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function -#' @param \ldots currently not used -#' @return A data frame with, for each covariate, the K-S statistic and its p-value. -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @seealso \code{\link{covarTest_mean}} for the t-test of equality of means -#' @examples -#' data(Lee2008) -#' -#' ## Add randomly generated covariates -#' set.seed(123) -#' n_Lee <- nrow(Lee2008) -#' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), -#' z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), -#' z3 = sample(letters, size = n_Lee, replace = TRUE)) -#' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) -#' -#' ## Kolmogorov-Smirnov test of equality in distribution: -#' covarTest_dis(Lee2008_rdd_Z, bw=0.3) -#' -#' ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff: -#' covarTest_mean(Lee2008_rdd_Z, bw=0.3) -#' ## covarTest_dis works also on regression outputs (bw will be taken from the model) -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) -#' covarTest_dis(reg_nonpara) - -#' @export -covarTest_dis <- function(object, bw, exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) - UseMethod("covarTest_dis") - -#' @rdname covarTest_dis -#' @method covarTest_dis RDDdata -#' @S3method covarTest_dis RDDdata -covarTest_dis.RDDdata <- function(object, bw=NULL, exact = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - cutpoint <- getCutpoint(object) - covar <- getCovar(object) - cutvar <- object$x - - covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust) - -} - -#' @rdname covarTest_dis -#' @method covarTest_dis RDDreg -#' @S3method covarTest_dis RDDreg -covarTest_dis.RDDreg <- function(object, bw=NULL, exact = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - cutpoint <- getCutpoint(object) - dat <- object$RDDslot$RDDdata - covar <- getCovar(dat) - cutvar <- dat$x - if(is.null(bw)) bw <- getBW(object) - - covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust) - -} - -covarTest_dis_low <- function(covar,cutvar, cutpoint, bw=NULL, exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { - - p.adjust <- match.arg(p.adjust) - -## subset - if(!is.null(bw)){ - isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw - covar <- covar[isInH,] - cutvar <- cutvar[isInH] - } - regime <- cutvar < cutpoint - - - -## Split data - covar_num <- sapply(covar, as.numeric) - - tests <-apply(covar_num, 2, function(x) ks.test(x[regime], x[!regime], exact=exact)) - tests_vals <- sapply(tests, function(x) x[c("statistic", "p.value")]) - -## Adjust p values if required: - if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust) - -## Print results - res <- t(tests_vals) - res - - -} - - -########################################## -###### TODO -########################################## -## -mean: can use t.test for factors? What else? Count test? Warn for character/factors! -## -mean: add multivariate hotelling -## -ks: ok for factors? -## -do qqplot? -## -add methods for regs? Once converted to other objects... -## -add example and bettet output documentation -## -## -## - -########################################## -###### TESTS -########################################## - -if(FALSE){ -library(Hotelling) -library(mvtnorm) - -data <- rmvnorm(n=200, mean=c(1,2)) -spli <- sample(c(TRUE, FALSE), size=200, replace=TRUE) - -a<-hotel.stat(data[spli,],data[!spli,]) -a - -b<-hotel.test(data[spli,],data[!spli,]) -b -b$stats - -} - - - - -if(FALSE){ -library(RDDtools) -data(Lee2008) - -Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE))) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0) - - -covarTest_mean(object=Lee2008_rdd) -covarTest_dis(object=Lee2008_rdd) - - - -} diff --git a/RDDtools/R/dens_test.R b/RDDtools/R/dens_test.R deleted file mode 100644 index 3120872..0000000 --- a/RDDtools/R/dens_test.R +++ /dev/null @@ -1,64 +0,0 @@ -#' Run the McCracy test for manipulation of the forcing variable -#' -#' Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{RDDobject}. -#' -#' @param RDDobject object of class RDDdata -#' @param bin Argument of the \code{\link{DCdensity}} function, the binwidth -#' @param bw Argument of the \code{\link{DCdensity}} function, the bandwidth -#' @param plot Whether to return a plot. Logical, default ot TRUE. -#' @param \ldots Further arguments passed to \code{\link[rdd]{DCdensity}}. -#' @export -#' @import rdd -#' @examples -#' library(RDDtools) -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' dens_test(Lee2008_rdd) - - - -dens_test <- function(RDDobject, bin=NULL, bw=NULL, plot=TRUE,...){ - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - x <- getOriginalX(RDDobject) - test <- try(DCdensity(runvar=x, cutpoint=cutpoint, bin = bin, bw = bw, plot=plot, ext.out=TRUE, ...), silent=TRUE) - if(inherits(test, "try-error")){ - warning("Error in computing the density, returning a simple histogram", if(is.null(bin)) " with arbitrary bin" else NULL) - if(is.null(bin)) { - test <- try(DCdensity(RDDobject$x, cutpoint, bin = bin, bw = 0.2, ext.out=TRUE, plot=FALSE), silent=TRUE) - bin <- test$binsize - } - max_x <- max(RDDobject$x, na.rm=TRUE) - seq_breaks <- seq(from=min(RDDobject$x, na.rm=TRUE), to=max_x, by=bin) - if(max_x>max(seq_breaks)) seq_breaks <- c(seq_breaks, max_x+0.001) - hist(RDDobject$x, breaks=seq_breaks) - abline(v=cutpoint, col=2, lty=2) - } - - test.htest <- list() - test.htest$statistic <- c("z-val"=test$z) - test.htest$p.value <- test$p - test.htest$data.name <- deparse(substitute(RDDobject)) - test.htest$method <- "McCrary Test for no discontinuity of density around cutpoint" - test.htest$alternative <- "Density is discontinuous around cutpoint" - test.htest$estimate <- c(Discontinuity=test$theta) - test.htest$test.output <- test - class(test.htest) <- "htest" - return(test.htest) -} - -# print.MCcraryTest <- function(x,...){ -# cat("#### MC Crary Test of no discontinuity in density\n\n") -# cat("Estimate of discontinuity:\t", x$theta, "\n") -# cat("z-value:\t", x$z, "\t p-value:\t", x$p, "\n") -# } - -if(FALSE){ - -library(RDDtools) -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - -dens_test(Lee2008_rdd) - -} \ No newline at end of file diff --git a/RDDtools/R/deprecated.R b/RDDtools/R/deprecated.R deleted file mode 100644 index ee38ddf..0000000 --- a/RDDtools/R/deprecated.R +++ /dev/null @@ -1,177 +0,0 @@ - -plotPlacebo_OLD<- function(RDDregobject, from, to, by=0.1, level=0.95, same_bw=FALSE){ - - object <- RDDregobject - bw <- getBW(object) - cutpoint <- getCutpoint(object) - forc_var <- object$model[,"x^1"] - -## set grid: - if(missing(from)) from <- median(forc_var[forc_var=cutpoint]) - - seqi <- sort(c(cutpoint,seq(from=from, to=to, by=by))) - n_seqi <- length(seqi) - -## set matrix for results: - seq_vals <- matrix(NA, nrow=n_seqi, ncol=4, dimnames=list(seqi, c("LATE", "se", "CI_low", "CI_high"))) - -## get call: - object_call <- attr(object, "RDDcall") - -## original dataset: - dat_orig <- eval(object_call$RDDobject) - -## run each time: - for(i in seq_along(seqi)){ - attr(dat_orig, "cutpoint") <- seqi[i] - bw_reg <- if(same_bw) bw else RDDbw_IK(dat_orig) - object_new <- RDDreg_np(dat_orig, bw=bw_reg) - if(!inherits(object_new, "try-error")){ - co <- coef(summary(object_new))[2,, drop=FALSE] - seq_vals[i,"LATE"] <- co[,1] - seq_vals[i,"se"] <- co[,2] - } - } - -## compute intervals: - probs <- (1 - level)/2 - probs <- c(probs, 1 - probs) - quants <- qnorm(probs) - seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"] - seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"] - - -## plot results: - ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) - plot(seqi, seq_vals[,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra) - title("Placebo test") - - lines(seqi, seq_vals[,"CI_low"], lty=2) - lines(seqi, seq_vals[,"CI_high"], lty=2) # - abline(h=0) - -## add optim in case: - est <- RDDcoef(object) - points(cutpoint, RDDcoef(RDDregobject), col=2) - segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2) - segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2) - -## export (silently) results: - invisible(seq_vals) -} - - -plotPlacebo_OTHER_OLD <- function(RDDregobject, from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, device=c("ggplot", "base")){ - - device <- match.arg(device) - object <- RDDregobject - bw <- getBW(object) - cutpoint <- getCutpoint(object) - forc_var <- getOriginalX(RDDregobject) - -## set grid: - quants_left <- quantile(forc_var[forc_var=cutpoint], probs=c(from, to)) - - seqi_left <- seq(from=quants_left[1], to=quants_left[2], by=by) - seqi_right <- seq(from=quants_right[1], to=quants_right[2], by=by) - seqi <- c(seqi_left, seqi_right) - - n_seqi_left <- length(seqi_left) - n_seqi_right <- length(seqi_right) - n_seqi <- length(seqi) - -## set matrix for results: - seq_vals <- matrix(NA, nrow=n_seqi, ncol=6) - colnames(seq_vals) <- c("cutpoint", "position", "LATE", "se", "CI_low", "CI_high") - seq_vals[, "cutpoint"] <- seqi - -## get call: - object_call <- attr(object, "RDDcall") - -## original dataset: - dat_orig <- eval(object_call$RDDobject) - -## run each time: - for(i in seq_along(seqi)){ - - ## select sample: - if(seqi[i]cutpoint) ## exclude x>cutpoint - } - - ## change the cutpoint - attr(dat_sides, "cutpoint") <- seqi[i] - - ## Re-estimate model and eventually bw - bw_reg <- if(same_bw) bw else RDDbw_IK(dat_sides) - object_new <- RDDreg_np(dat_sides, bw=bw_reg) - - ## assign results (LATE and se) - if(!inherits(object_new, "try-error")){ - co <- coef(summary(object_new))[2,, drop=FALSE] - seq_vals[i,"LATE"] <- co[,1] - seq_vals[i,"se"] <- co[,2] - } - } - -## compute intervals: - probs <- (1 - level)/2 - probs <- c(probs, 1 - probs) - quants <- qnorm(probs) - seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"] - seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"] - - -## plot results: - # prepare df: - seq_vals <- as.data.frame(seq_vals) - seq_vals$position <- ifelse(seq_vals$cutpoint < cutpoint, "left", "right") - - # get estimates at true cutpoint : - est <- RDDcoef(object) - est_conf <- confint(RDDregobject, level=level)["D",] - - if(device=="base"){ - ra <- range(seq_vals[,c("CI_low", "CI_high")], est_conf, na.rm=TRUE) - xlims <- c(quants_left[1], quants_right[2]) -# ylims <- range(seq_vals[, c("LATE", "CI_low", "CI_high")], est_conf) - plot(seqi_left, seq_vals[1:n_seqi_left,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra, xlim=xlims) - title("Placebo test") - abline(h=0) - - # left CI - lines(seqi_left, seq_vals[1:n_seqi_left,"CI_low"], lty=2) - lines(seqi_left, seq_vals[1:n_seqi_left,"CI_high"], lty=2) - - # right values: - lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"LATE"], lty=1) - lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_low"], lty=2) - lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_high"], lty=2) - - # add estimate at true cutoff - points(cutpoint, est, col=2) - segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2) - segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2) - } else { - - est_df <- data.frame(cutpoint=cutpoint, LATE=est, position="middle", CI_low=est_conf[1], CI_high=est_conf[2]) - - # hack for decent width of error bar: - last_left <- nrow(subset(seq_vals, position=="left")) - W <- diff(seq_vals[c(last_left, last_left+1), "cutpoint"])/5 - - pl <- qplot(x=cutpoint, y=LATE, data=seq_vals, geom="line", colour=position)+ - geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals, stat="identity")+ - theme(legend.position="none")+geom_hline(yintercept=0)+ - geom_point(aes(x=cutpoint, y=LATE), data=est_df)+ - geom_errorbar(aes(ymin=CI_low, ymax=CI_high), data=est_df, width=W) - print(pl) - } - -## export (silently) results: - invisible(seq_vals) -} diff --git a/RDDtools/R/gen_MC_IK.R b/RDDtools/R/gen_MC_IK.R deleted file mode 100644 index 5bf3efa..0000000 --- a/RDDtools/R/gen_MC_IK.R +++ /dev/null @@ -1,160 +0,0 @@ -#' Generate Monte Carlo simulations of Imbens and Kalyanaraman -#' -#' Generate the simulations reported in Imbens and Kalyanaraman (2012) -#' @param n The size of sampel to generate -#' @param version The MC version of Imbens and Kalnayaraman (between 1 and 4). -#' @param sd The standard deviation of the error term. -#' @param output Whether to return a data-frame, or already a RDDdata -#' @param size The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1 -#' @return An data frame with x and y variables. -#' @references TODO -#' @export -#' @examples -#' MC1_dat <- gen_MC_IK() -#' MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) -#' -#' ## Use np regression: -#' reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd) -#' reg_nonpara -#' -#' # Represent the curves: -#' plotCu <- function(version=1, xlim=c(-0.1,0.1)){ -#' res <- gen_MC_IK(sd=0.0000001, n=1000, version=version) -#' res <- res[order(res$x),] -#' ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y")) -#' plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version)) -#' abline(v=0) -#' xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),] -#' points(xCut, col=2) -#' } -#' layout(matrix(1:4,2, byrow=TRUE)) -#' plotCu(version=1) -#' plotCu(version=2) -#' plotCu(version=3) -#' plotCu(version=4) -#' layout(matrix(1)) - -gen_MC_IK <- function(n=200, version=1, sd=0.1295, output=c("data.frame", "RDDdata"), size){ - - output <- match.arg(output) - if(!version%in% c(1:4) |length(version) !=1) stop("arg 'version' should be between 1 and 4") - - foo <- switch(version, - "1"=gen_MC_IK_1, - "2"=gen_MC_IK_2, - "3"=gen_MC_IK_3, - "4"=gen_MC_IK_4) - if(missing(size)) { - size <- switch(version, - "1"=0.04, - "2"=0, - "3"=0.1, - "4"=0.1) - } - res <- foo(n=n, sd=sd, size=size) - if(output=="RDDdata"){ - res <- RDDdata(x=res$x, y=res$y, cutpoint=0) - } - res -} - - -#################################### -######### MC 1 -#################################### - -gen_MC_IK_1 <- function(n=200, sd=0.1295, size=0.04){ - -## Regressor: - Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) - X <- 2*Z-1 - error <- rnorm(n, sd=sd) - -## Prepare variables: - Y <- vector("numeric", length=n) - ind_below <- X<0 - X_low <- X[ind_below] - X_up <- X[!ind_below] - -## Compute Y variables: - Y[ind_below] <- 0.48 + 1.27*X_low + 7.18*X_low^2 + 20.21* X_low^3 +21.54*X_low^4 +7.33*X_low^5 + error[ind_below] - Y[!ind_below] <- 0.48+size + 0.84*X_up - 3* X_up^2 + 7.99* X_up^3 - 9.01*X_up^4 +3.56*X_up^5 + error[!ind_below] - -## Result: - res <- data.frame(x=X, y=Y) - return(res) -} - -#################################### -######### MC 2 -#################################### - -gen_MC_IK_2 <- function(n=200, sd=0.1295, size=0){ - -# if(!missing(size) && !is.null(size)) warning("Argument 'size' ignored for gen_MC_IK_2") -## Regressor: - Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) - X <- 2*Z-1 - error <- rnorm(n, sd=sd) - -## Compute Y variables: - Y <- ifelse(X<0, 3*X^2, 4*X^2+size) + error - -## Result: - res <- data.frame(x=X, y=Y) - return(res) -} - - -#################################### -######### MC 3 -#################################### - -gen_MC_IK_3 <- function(n=200, sd=0.1295, size=0.1){ - -## Regressor: - Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) - X <- 2*Z-1 - error <- rnorm(n, sd=sd) - -## Compute Y variables: - Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X - 3*X^2 +7.99 * X^3-9.01*X^4+3.56*X^5 + error - -## Result: - res <- data.frame(x=X, y=Y) - return(res) -} - -#################################### -######### MC 4 -#################################### - -gen_MC_IK_4 <- function(n=200, sd=0.1295, size=0.1){ - -## Regressor: - Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) - X <- 2*Z-1 - error <- rnorm(n, sd=sd) - -## Compute Y variables: - Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X +7.99 * X^3-9.01*X^4+3.56*X^5 + error - -## Result: - res <- data.frame(x=X, y=Y) - return(res) -} - - -#################################### -######### MC simple -#################################### - -gen_MC_simple <- function(n=200, LATE=0.3){ - -## Regressor: - x <- rnorm(n) - D <- x>= 0 - y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n) - RDDdata(x=x, y=y, cutpoint=0) - -} \ No newline at end of file diff --git a/RDDtools/R/get_methods.R b/RDDtools/R/get_methods.R deleted file mode 100644 index 255bb5d..0000000 --- a/RDDtools/R/get_methods.R +++ /dev/null @@ -1,143 +0,0 @@ - - -# checkIsRDD <- function(object) if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") -# checkIsAnyRDD <- function(object) if(!inherits(object, c("RDDdata", "RDDreg_np"))) stop("Only works for RDDdata objects") - -# function(object) if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") -checkIsAnyRDD <- checkIsRDD <- function(object) { - classesOk <- c("RDDdata", "RDDreg_np", "RDDreg_lm") - if(!inherits(object, classesOk)) stop("Only works for RDDdata objects") -} - -getType <- function(object){ - checkIsRDD(object) - attr(object, "type") -} - -isFuzzy <- function(object){ - checkIsRDD(object) - attr(object, "type")=="Fuzzy" -} - -getCutpoint <- function(object){ - - checkIsRDD(object) - attr(object, "cutpoint") -} - -getOrder <- function(object){ - - checkIsRDD(object) - attr(object, "PolyOrder") -} - -getSlope <- function(object){ - - checkIsRDD(object) - attr(object, "slope") -} - -getBW <- function(object, force.na=FALSE){ - - checkIsAnyRDD(object) - res <- attr(object, "bw") - if(force.na) if(is.null(res)) res <- NA - res -} - - - -## return the type of inference used by RDDreg_np -infType <- function(x) { - if(is.null(getCall(x)$inference)) "se" else getCall(x)$inference -} - - -hasCovar <- function(object) - UseMethod("hasCovar") - -hasCovar.RDDdata <- function(object) attr(object, "hasCovar") - -hasCovar.RDDreg <- function(object) { - call <- getCall(object) - !is.null(call$covariates) -} - -getCovar <- function(object){ - if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") - if(!hasCovar(object)) stop("object has no covariates") - - rem <- if(isFuzzy(object)) 1:3 else 1:2 - res <- object[,-rem, drop=FALSE] - as.data.frame(res) -} - -getCovarNames <- function(object){ - if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") - if(!hasCovar(object)) stop("object has no covariates") - - rem <- if(isFuzzy(object)) 1:3 else 1:2 - colnames(object)[-rem] -} - -getOriginalX <- function(object){ - - cutpoint <- getCutpoint(object) - x <- object$model[,"x"] - if(cutpoint!=0) x <- x+cutpoint - x -} - -getOriginalX <- function(object) - UseMethod("getOriginalX") - - -getOriginalX.RDDreg <- function(object){ - object$RDDslot$RDDdata[, "x"] -} - -getOriginalX.RDDdata <- function(object){ - object[, "x"] -} - -# getOriginalX.RDDreg_np <- function(object){ -# -# cutpoint <- getCutpoint(object) -# Xnam <- getXname(object) -# x <- object$model[,Xnam] -# if(cutpoint!=0) x <- x+cutpoint -# x -# } - - -getOriginalData <- function(object, na.rm=TRUE, classRDD=TRUE) - UseMethod("getOriginalData") - -# getOriginalData.RDDreg_np <- function(object, na.rm=TRUE){ -# -# cutpoint <- getCutpoint(object) -# Xnam <- getXname(object) -# dat <- object$model[,c("y",Xnam)] -# if(cutpoint!=0) dat[,Xnam] <- dat[,Xnam] +cutpoint -# if(na.rm) dat <- dat[apply(dat, 1, function(x) all(!is.na(x))),] # remove na rows -# dat -# } - - - -getOriginalData.RDDreg <- function(object, na.rm=TRUE, classRDD=TRUE){ - res <- object$RDDslot$RDDdata - if(na.rm) res <- res[apply(res, 1, function(x) all(!is.na(x))),] # remove na rows - if(!classRDD) res <- as.data.frame(res) - res -} - - - -#' @importFrom stats getCall -#' @S3method getCall RDDreg -getCall.RDDreg <- function(x,...) attr(x, "RDDcall") - - -#format(Sys.Date(), "%A %Y-%m-%d") - diff --git a/RDDtools/R/model.matrix.RDD.R b/RDDtools/R/model.matrix.RDD.R deleted file mode 100644 index 4a80b1d..0000000 --- a/RDDtools/R/model.matrix.RDD.R +++ /dev/null @@ -1,74 +0,0 @@ -#' @S3method model.matrix RDDdata - -model.matrix.RDDdata <- function(object, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), ...){ - - checkIsRDD(object) - RDDobject <- object - type <- getType(object) - - if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!") - - slope <- match.arg(slope) - covar.strat <- match.arg(covar.opt$strategy, choices=c("include", "residual")) - covar.slope <- match.arg(covar.opt$slope, choices=c("same", "separate")) - - cutpoint <- getCutpoint(RDDobject) - if(!is.null(covariates) & !hasCovar(RDDobject)) stop("Arg 'covariates' was specified, but no covariates found in 'RDDobject'.") - -## Construct data - dat <- as.data.frame(RDDobject) - - dat_step1 <- dat[, c("y", "x")] - dat_step1$x <- dat_step1$x -cutpoint - - L <- ifelse(dat_step1$x>= 0, 1,0) - dat_step1$D <- if(type=="Sharp") L else object$z - - if(order>0){ - polys <- poly(dat_step1$x, degree=order, raw=TRUE) - colnames(polys) <- paste("x", 1:order, sep="^") - dat_step1 <- cbind(dat_step1[,c("y", "D")],polys) - if(slope=="separate") { - polys2 <- polys*L - colnames(polys2) <- paste(colnames(polys), "right", sep="_") - dat_step1 <- cbind(dat_step1,polys2) - } - } else { - dat_step1$x <- NULL - } - -## Covariates - if(!is.null(covariates)){ - covar <- getCovar(RDDobject) - formu.cova <- covariates - - if(grepl("\\.", formu.cova)) formu.cova <- paste(colnames(covar), collapse=" + ") - if(covar.slope=="separate") { - formu.cova <- paste(formu.cova, "+", paste("D*(", formu.cova,")", sep=""), sep=" ") - covar$D <- dat_step1$D - } - - formula.cova <- as.formula(paste("~", formu.cova)) - mf <- model.frame(formula.cova, covar, na.action=na.pass) - M_covar <- model.matrix(formula.cova, data=mf) - - if(covar.strat=="residual"){ - M_covar <- data.frame(y=dat_step1$y, M_covar) - first_stage <- lm(y~., data=M_covar) ## regress y on covariates only - dat_step1$y <- residuals(first_stage) ## change in original data - } else { - rem <- switch(covar.slope, "separate"="^D$|(Intercept)", "same" ="(Intercept)") - M_covar <- M_covar[,-grep(rem, colnames(M_covar)), drop=FALSE ] - dat_step1 <- cbind(dat_step1, M_covar) ## add covar as regressors - } - } - -## Colnames cleaning - colnames(dat_step1) <- gsub("x\\^1", "x", colnames(dat_step1)) - -## - if(type=="Fuzzy") dat_step1$ins <- L - -## return results: - dat_step1 -} \ No newline at end of file diff --git a/RDDtools/R/myRDD-package.R b/RDDtools/R/myRDD-package.R deleted file mode 100644 index 46263d3..0000000 --- a/RDDtools/R/myRDD-package.R +++ /dev/null @@ -1,13 +0,0 @@ -#' Regression Discontinuity Design -#' -#' Provides function to do a comprehensive regression discontinuity analysis. -#' -#' @name RDDtools-package -#' @aliases RDDtools -#' @docType package -#' @import KernSmooth -#' @import np -#' @import ggplot2 -#' @title Regression Discontinuity Design -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -NULL diff --git a/RDDtools/R/placebo.R b/RDDtools/R/placebo.R deleted file mode 100644 index a32971c..0000000 --- a/RDDtools/R/placebo.R +++ /dev/null @@ -1,348 +0,0 @@ -#' Draw a (density) plot of placebo tests -#' -#' Draw a plot of placebo tests, estimating the impact on fake cutpoints -#' @param object the output of an RDD regression -#' @param device Whether to draw a base or a ggplot graph. -#' @param \ldots Further arguments passed to specific methods. -#' @param vcov. Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}. -#' @param plot Whether to actually plot the data. -#' @param output Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object -#' @return A data frame containing the cutpoints, their corresponding estimates and confidence intervals. -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @examples -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -#' plotPlacebo(reg_nonpara) -#' -#' # Use with another vcov function; cluster case -#' reg_nonpara_lminf <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") -#' # need to be a function applied to updated object! -#' vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x) -#' plotPlacebo(reg_nonpara_lminf, vcov. = vc) - - -#' @export -plotPlacebo <- function(object, device=c("ggplot", "base"), ...) - UseMethod("plotPlacebo") - -#' @rdname plotPlacebo -#' @method plotPlacebo RDDreg -#' @S3method plotPlacebo RDDreg -#' @param from Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint -#' @param to Ending point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint -#' @param by Increments of the from-to sequence -#' @param level Level of the confidence interval shown -#' @param same_bw Whether to re-estimate the bandwidth at each point -plotPlacebo.RDDreg <- function(object, device=c("ggplot", "base"), from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, vcov.=NULL, plot=TRUE, output=c("data", "ggplot"), ...){ - - device <- match.arg(device) - output <- match.arg(output) - - # compute Placebos: - seq_vals <- computePlacebo(object=object, from=from, to=to, by=by, level=level, - same_bw=same_bw, vcov.=vcov.) - - ## Use low-level to plot: - plotPlacebo_low(seq_vals, device=device, plot=plot, output=output,...) - - invisible(seq_vals) -} - - - -#' @S3method plotPlacebo PlaceboVals -plotPlacebo.PlaceboVals <- function(object, device=c("ggplot", "base"),plot=TRUE, output=c("data", "ggplot"), ...){ - - device <- match.arg(device) - output <- match.arg(output) - plotPlacebo_low(object, device=device, plot=plot, output=output,...) - - invisible(object) -} - - -plotPlacebo_low <- function(seq_vals, device=c("ggplot", "base"), output=c("data", "ggplot"), plot=TRUE){ - - device <- match.arg(device) - output <- match.arg(output) - - if(device=="base"){ - if(plot){ - ylims <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) - xlims <- range(seq_vals$cutpoint) - - dat_left <- subset(seq_vals, position=="left") - dat_right <- subset(seq_vals, position=="right") - dat_true <- subset(seq_vals, position=="True") - - plot(dat_left$cutpoint, dat_left$LATE, type="l", ylab="LATE", xlab="Cutpoints", ylim=ylims, xlim=xlims) - title("Placebo test") - abline(h=0) - - # left CI - lines(dat_left$cutpoint, dat_left$CI_low, lty=2) - lines(dat_left$cutpoint, dat_left$CI_high, lty=2) - - # right values: - lines(dat_right$cutpoint, dat_right$LATE, lty=1) - lines(dat_right$cutpoint, dat_right$CI_low, lty=2) - lines(dat_right$cutpoint, dat_right$CI_high, lty=2) - - # add estimate at true cutoff - points(dat_true$cutpoint, dat_true$LATE, col=2) - segments(dat_true$cutpoint,ylims[1]-1, dat_true$cutpoint, dat_true$LATE, col="red", lty=2) ## vertical line - segments(xlims[1]-1, dat_true$LATE, dat_true$cutpoint, dat_true$LATE, col="red", lty=2) - } - if(output!="data") warning("output='ggplot' only makes sense with device='ggplot'") - } else { - seq_vals_placeb <- subset(seq_vals, position!="True") - seq_vals_true <- subset(seq_vals, position=="True") - - # hack for decent width of error bar: - last_left <- nrow(subset(seq_vals_placeb, position=="left")) - W <- diff(seq_vals_placeb[c(last_left, last_left+1), "cutpoint"])/5 - - pl <- qplot(x=cutpoint, y=LATE, data=seq_vals_placeb, geom="line", colour=position)+ - geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals_placeb, stat="identity")+ - theme(legend.position="none")+geom_hline(yintercept=0)+ - geom_point(aes(x=cutpoint, y=LATE), data=seq_vals_true)+ - geom_errorbar(aes(ymin=CI_low, ymax=CI_high), data=seq_vals_true, width=W) - if(plot) print(pl) - } - -## export (silently) results: - out <- switch(output, "data"=seq_vals, "ggplot"=pl) - invisible(out) -} - - -#' @rdname plotPlacebo -#' @export -plotPlaceboDens <- function(object, device=c("ggplot", "base"), ...) - UseMethod("plotPlaceboDens") - -#' @rdname plotPlacebo -#' @method plotPlaceboDens RDDreg -#' @S3method plotPlaceboDens RDDreg -plotPlaceboDens.RDDreg <- function(object, device=c("ggplot", "base"), from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, vcov.=NULL, ...){ - - device <- match.arg(device) - - # compute Placebos: - seq_vals <- computePlacebo(object=object, from=from, to=to, by=by, level=level, same_bw=same_bw, vcov.=vcov.) - - ## Use low-level to plot: - plotPlaceboDens_low(seq_vals, device=device) - - invisible(seq_vals) -} - - -#' @S3method plotPlaceboDens PlaceboVals -plotPlaceboDens.PlaceboVals <- function(object, device=c("ggplot", "base"), ...){ - - device <- match.arg(device) - plotPlaceboDens_low(object, device=device,...) - - invisible(object) -} - - -plotPlaceboDens_low <- function(seq_vals, device=c("ggplot", "base")){ - - device <- match.arg(device) - seq_vals_placeb <- subset(seq_vals, position!="True") - perc_rejected <- 100*mean(seq_vals_placeb$p_value<0.05) - - - if(device=="base") { - stop("not implemented") - } else { - seq_vals_true <- subset(seq_vals, position=="True") - - dens_max <- max(density(seq_vals_placeb$LATE)$y) # not efficient.... - text_rej <- paste("Perc rejected:", perc_rejected, "%") - - - pl <- qplot(x=LATE, data=seq_vals_placeb, geom="density")+ - geom_vline(xintercept=0, lty=2)+geom_vline(xintercept=seq_vals_true$LATE, colour="red")+ - annotate("text", x = seq_vals_true$LATE, y = dens_max, label = "LATE at true \ncutpoint ", colour="red", hjust=1)+ - annotate("text", x = seq_vals_true$LATE, y = 0, label = text_rej, hjust=1, vjust=1) - print(pl) - } - -## export (silently) results: - invisible(seq_vals) -} - - -#' @rdname plotPlacebo -#' @export computePlacebo - - -computePlacebo <- function(object, from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, vcov.=NULL){ - - bw <- getBW(object) - hasBw <- !is.null(bw) - if(!hasBw) bw <- NA - - if(!is.null(vcov.)&& !is.function(vcov.)) stop("'arg' vcov. should be a function (so can be updated at each step, not a matrix") - cutpoint <- getCutpoint(object) - forc_var <- getOriginalX(object) - -## set grid: - quants_left <- quantile(forc_var[forc_var=cutpoint], probs=c(from, to)) - - seqi_left <- seq(from=quants_left[1], to=quants_left[2], by=by) - seqi_right <- seq(from=quants_right[1], to=quants_right[2], by=by) - seqi <- c(seqi_left, seqi_right) - - n_seqi_left <- length(seqi_left) - n_seqi_right <- length(seqi_right) - n_seqi <- length(seqi) - -## set matrix for results: - seq_vals <- matrix(NA, nrow=n_seqi, ncol=8) - colnames(seq_vals) <- c("cutpoint", "position", "LATE", "se", "p_value", "CI_low", "CI_high", "bw") - seq_vals[, "cutpoint"] <- seqi - -## get original call: - object_call <- getCall(object) - -## original dataset: - dat_orig <- eval(object_call$RDDobject) - hasCov <- hasCovar(dat_orig) - -## run each time: - for(i in seq_along(seqi)){ - - ## select sample - if(seqi[i]cutpoint) ## exclude x>cutpoint - } - - - ## change the cutpoint, reattribute new data: - attr(dat_sides, "cutpoint") <- seqi[i] - object_call$RDDobject <- dat_sides - - ## Change bw if(same_bw=FALSE) - if(hasBw) object_call$bw <- if(!same_bw) RDDbw_IK(dat_sides) else bw - - ## Re-estimate model with new cutpoint/bw - object_new <- eval(object_call) # RDDreg_np(dat_sides, bw=bw_reg) - - ## assign results (LATE and se) - if(!inherits(object_new, "try-error")){ - - seq_vals[i,"LATE"] <- RDDcoef(object_new) - if(!is.null(vcov.)) { - co <- coeftest(object_new, vcov.=vcov.)["D",, drop=FALSE] - } else { - co <- RDDcoef(object_new, allInfo=TRUE) - } - seq_vals[i,"se"] <- co[,"Std. Error"] - seq_vals[i,"p_value"] <- co[,4] - seq_vals[i,"bw"] <- getBW(object_new, force.na=TRUE) - seq_vals[i,c("CI_low", "CI_high")] <- waldci(object_new, level=level, vcov.=vcov.)["D",] ## confint version working with vcov. - } - } - - -## Add midpoint: - if(!is.null(vcov.)) { - true_co <- coeftest(object, vcov.=vcov.)["D",, drop=FALSE] - } else { - true_co <- RDDcoef(object, allInfo=TRUE) - } - true_confint <- as.numeric(waldci(object, level=level, vcov.=vcov.)["D",]) - true <- data.frame(cutpoint=cutpoint, position="True", LATE=RDDcoef(object), - se=true_co["D","Std. Error"], p_value=true_co["D",4], - CI_low=true_confint[1], CI_high=true_confint[2], bw=bw) - - -## output - seq_vals <- as.data.frame(seq_vals) - seq_vals$position <- ifelse(seq_vals$cutpoint < cutpoint, "left", "right") - - seq_vals <- rbind(seq_vals, true) - seq_vals <- seq_vals[order(seq_vals$cutpoint),] - rownames(seq_vals) <- seq_len(nrow(seq_vals)) - - -# seq_vals$position <- if(seq_vals$cutpoint == cutpoint) "True" - - class(seq_vals) <- c("PlaceboVals", "data.frame") - return(seq_vals) -} - - -########################################## -###### TODO -########################################## -## help file -## -choose between functions - -if(FALSE){ -library(RDDtools) -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - -## Regs -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) - -environment(plotPlacebo) <- environment(RDDdata) -pla_lm <- plotPlacebo(reg_para, by=0.05) -head(pla_lm) - -pla_np <- plotPlacebo(reg_nonpara, by=0.05) -head(pla_np ) - -pla_dat <- computePlacebo(reg_nonpara, by=0.05) -head(pla_dat ) -plotPlacebo(pla_dat) -plotPlacebo(pla_dat, device="base") - - -plaDe_lm <- plotPlaceboDens(reg_para, by=0.05) -plotPlaceboDens(pla_dat) - -### - -## MC simple rdd -x<-runif(1000,-1,1) -cov<-rnorm(1000) -y<-3+2*x+10*(x>=0)+rnorm(1000) - - -mc_dat <- RDDdata(y=y, x=x, cutpoint=0) - -bw_ik <- RDDbw_IK(mc_dat) -mc_reg <- RDDreg_np(mc_dat, bw=bw_ik) - -mc_reg_lm <- RDDreg_lm(mc_dat, bw=bw_ik) -mc_reg_np <- RDDreg_np(mc_dat, bw=bw_ik) -waldci(mc_reg_lm) - -environment(plotPlacebo) <- environment(RDDdata) - -plotPlacebo(mc_reg) -plotPlacebo(mc_reg, from=0.1) -plotPlacebo(mc_reg, device="ggplot") -plotPlacebo(mc_reg, device="ggplot", by=0.05) -plotPlacebo(mc_reg, device="ggplot", from=0.05,by=0.05, to=0.95) - -a<-plotPlacebo(mc_reg_lm) -a -RDDtools:::waldci.default(mc_reg_lm) -waldci(mc_reg_np) -plotPlacebo(mc_reg_lm, device="ggplot") - - - -} diff --git a/RDDtools/R/plotBin.R b/RDDtools/R/plotBin.R deleted file mode 100644 index fa29598..0000000 --- a/RDDtools/R/plotBin.R +++ /dev/null @@ -1,73 +0,0 @@ -#' Bin plotting -#' -#' Do a "scatterplot bin smoothing" -#' -#' @param x Forcing variable -#' @param y Output -#' @param h the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)}) -#' @param cutpoint Cutpoint -#' @param plot Logical. Whether to plot or only returned silently -#' @param type Whether returns the y averages, or the x frequencies -#' @param xlim,cex,main,xlab,ylab Usual parameters passed to plot(), see \code{\link{par}} -#' @param \ldots further arguments passed to plot. -#' @return Returns silently values -#' @references McCrary, Justin. -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @keywords internal - -plotBin <- function(x, y, h=0.05, nbins=NULL, cutpoint=0, plot=TRUE, type=c("value", "number"),xlim=range(x, na.rm=TRUE), cex=0.9,main=NULL, xlab, ylab, ...){ - - type <- match.arg(type) - x_name <- if(missing(xlab)) deparse(substitute(x)) else xlab - y_name <- if(missing(ylab)) deparse(substitute(y)) else ylab - - -## Set intervals and midpoints - min_x <- min(xlim) - max_x <- max(xlim) - - if(!is.null(nbins)) h <- diff(xlim)/nbins - - K0 <- ceiling((cutpoint-min_x)/h) # Number of cells on left - K1 <- ceiling((cutpoint+max_x)/h) # Number of cells on right - K <- K0+K1 - if(!is.null(nbins) && K!=nbins) { - ranges <- c(cutpoint-min_x, cutpoint+max_x) - if(which.min(ranges)==1) { - K0 <- K0-1 - } else { - K1 <- K1-1 - } - K <- K0+K1 - } - - b_k <- cutpoint - (K0-c(1:K)+1)*h # Lee and Lemieux (2010) p. 308 - mid_points_bk <- b_k+h/2 - n_bins <- length(mid_points_bk) - brk <- c(b_k,cutpoint + (K1+2)*h) - -## compute output (mean of count) - intervs <- cut(x, breaks=brk, include.lowest=TRUE) - table_intervs <- table(intervs) - n_non0_intervs <- sum(table_intervs!=0) - - y2 <- switch(type, - "value" =tapply(y, intervs, mean, na.rm=TRUE), - "number" =table_intervs) - - -## plot - if(plot){ - plot(mid_points_bk, as.numeric(y2), pch=19, cex=cex, xlab=x_name, ylab=y_name, xlim=xlim,...) - title(main=main, sub=paste("h=", round(h,4), ",\\tn bins=", n_non0_intervs, sep="")) - abline(v=cutpoint, lty=2) - } - -## return invisible result - res <- data.frame(x=mid_points_bk,y=y2) - invisible(res) -} - - - - diff --git a/RDDtools/R/plotSensi.R b/RDDtools/R/plotSensi.R deleted file mode 100644 index 01f4ac7..0000000 --- a/RDDtools/R/plotSensi.R +++ /dev/null @@ -1,309 +0,0 @@ -#' Plot the sensitivity to the bandwidth -#' -#' Draw a plot showing the LATE estimates depending on multiple bandwidths -#' -#' @param RDDregobject object of a RDD regression, from either \code{\link{RDDreg_lm}} or \code{\link{RDDreg_np}} -#' @param from First bandwidth point. Default value is max(1e-3, bw-0.1) -#' @param to Last bandwidth point. Default value is bw+0.1 -#' @param by Increments in the \code{from} \code{to} sequence -#' @param level Level of the confidence interval -#' @param order For parametric models (from \code{\link{RDDreg_lm}}), the order of the polynomial. -#' @param type For parametric models (from \code{\link{RDDreg_lm}}) whether different orders are represented as different colour or as different facets. -#' @param device Whether to draw a base or a ggplot graph. -#' @param output Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object -#' @param plot Whether to actually plot the data. -#' @param \ldots Further arguments passed to specific methods -#' @return A data frame containing the bandwidths and corresponding estimates and confidence intervals. -#' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> -#' @import methods -#' @examples -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' -#' #Non-parametric estimate -#' bw_ik <- RDDbw_IK(Lee2008_rdd) -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik) -#' plotSensi(reg_nonpara) -#' plotSensi(reg_nonpara, device="base") -#' -#' #Parametric estimate: -#' reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4, bw=bw_ik) -#' plotSensi(reg_para_ik) -#' plotSensi(reg_para_ik, type="facet") - - - -################################### -##### plotSensi: function to plot sensitivity to bandwidth -################################### - -#' @export -plotSensi <- function(RDDregobject, from, to, by=0.01, level=0.95, output=c("data", "ggplot"), plot=TRUE, ...) - UseMethod("plotSensi") - -#' @rdname plotSensi -#' @method plotSensi RDDreg_np -#' @S3method plotSensi RDDreg_np -#' @param vcov. Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}} -plotSensi.RDDreg_np <- function(RDDregobject, from, to, by=0.05, level=0.95, output=c("data", "ggplot"), plot=TRUE, device=c("ggplot", "base"), vcov.=NULL, ...){ - - device <- match.arg(device) - output <- match.arg(output) - if(!is.null(vcov.)&& !is.function(vcov.)) stop("'arg' vcov. should be a function (so can be updated at each step, not a matrix") - if(device=="base"&&output=="ggplot") stop("Arg 'output=ggplot' only relevant for 'device=ggplot'") - - object <- RDDregobject - bw <- getBW(object) - est <- RDDcoef(object) - -## set grid: - if(missing(from)) from <- max(1e-3, bw-0.1) - if(missing(to)) to <- bw+0.1 - - seq_bw <- unique(sort(c(bw,seq(from=from, to=to, by=by)))) - n_seq_bw <- length(seq_bw) - -## set matrix for results: - seq_vals <- matrix(NA, nrow=n_seq_bw, ncol=6) - colnames(seq_vals) <- c("bw", "LATE", "se", "p_value", "CI_low", "CI_high") - seq_vals[,"bw"] <- seq_bw - -## get call: - object_call <- getCall(object) - -## run each time: - for(i in seq_along(seq_bw)){ - object_call$bw <- seq_bw[i] - object_new <- try(eval(object_call), silent=TRUE) - if(!inherits(object_new, "try-error")){ - seq_vals[i,"LATE"] <- RDDcoef(object_new) - if(!is.null(vcov.)) { - co <- coeftest(object_new, vcov.=vcov.)["D",, drop=FALSE] - } else { - co <- RDDcoef(object_new, allInfo=TRUE) - } - seq_vals[i,"se"] <- co[,"Std. Error"] - seq_vals[i,"p_value"] <- co[,4] - seq_vals[i,c("CI_low", "CI_high")] <- waldci(object_new, level=level, vcov.=vcov.)["D",] ## confint version working with vcov. - } - } - - -## plot results: - seq_vals <- as.data.frame(seq_vals) - if(device=="base" && plot){ - ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) - plot(seq_vals[,"bw"], seq_vals[,"LATE"], type="l", ylab="LATE", xlab="bandwidth", ylim=ra) - title("Sensitivity to bandwidth choice") - lines(seq_bw, seq_vals[,"CI_low"], lty=2) - lines(seq_bw, seq_vals[,"CI_high"], lty=2) # - - - ## add optim in case: - points(bw, est, col="red") - segments(bw,0, bw, est, col="red", lty=2) - segments(0,est, bw, est, col="red", lty=2) - } else { - sensPlot <- qplot(x=bw, y=LATE, data=seq_vals, geom="line") - sensPlot <- sensPlot+ geom_smooth(aes(ymax = CI_high, ymin=CI_low),data=seq_vals, stat="identity") # add the conf int - point.df <- data.frame(bw=bw, LATE=est) - sensPlot <- sensPlot + geom_point(data=point.df) # add the conf int - sensPlot <- sensPlot + geom_vline(xintercept=0, lty=2) - if(plot) print(sensPlot) - } - -## export (silently) results: - out <- switch(output, "data"=seq_vals, "ggplot"=sensPlot) - invisible(out) -} - - - - - - - - - - - - -#' @rdname plotSensi -#' @method plotSensi RDDreg_lm -#' @S3method plotSensi RDDreg_lm -plotSensi.RDDreg_lm <- function(RDDregobject, from, to, by=0.05, level=0.95, output=c("data", "ggplot"), plot=TRUE, order, type=c("colour", "facet"), ...){ - - type <- match.arg(type) - output <- match.arg(output) - object <- RDDregobject - est <- RDDcoef(object) - bw <- getBW(object) - origOrder <- getOrder(object) - hasBw <- !is.null(bw) - if(!hasBw&type=="facet") stop("Arg 'type=facet' works only when the parametric regression was estimated with a bandwidth") - -## set grid: - if(hasBw){ - if(missing(from)) from <- max(1e-3, bw-0.1) - if(missing(to)) to <- bw+0.1 - - seq_bw <- unique(sort(c(bw,seq(from=from, to=to, by=by)))) - n_seq_bw <- length(seq_bw) - } else { - if(!all(c(missing(from), missing(to)))) warning("Args 'from' and 'to' not considered since original input has no bw") - n_seq_bw <- 1 - seq_bw <- NULL - } - - if(missing(order)) order <- 0:(getOrder(RDDregobject)+2) - seq_ord <- order - n_seq_ord <- length(seq_ord) - -## set matrix for results: - seq_vals <- matrix(NA, nrow=n_seq_bw*n_seq_ord, ncol=6) - colnames(seq_vals) <- c("bw", "order", "LATE", "se", "CI_low", "CI_high") - -## get call: - object_call <- attr(object, "RDDcall") - -## guess if obtained with IKbandwidth? (trick: call$bw would be empty) -# is_IKband <- is.null(object_call$bw) - -## run each time: - for(j in 1:length(seq_ord)){ - for(i in 1:n_seq_bw){ - # assign new order/bw, and estimate: - object_call$bw <- seq_bw[i] - object_call$order <- seq_ord[j] - object_new <- try(eval(object_call), silent=TRUE) - - # put parameters bw/order into matrix: - seq_vals[i+(j-1)*n_seq_bw,"bw"] <- if(is.null(seq_bw[i])) NA else seq_bw[i] - seq_vals[i+(j-1)*n_seq_bw,"order"] <- seq_ord[j] - - # put output estim/se into matrix: - if(!inherits(object_new, "try-error")){ - co <- RDDcoef(object_new, allInfo=TRUE) - seq_vals[i+(j-1)*n_seq_bw,"LATE"] <- co[,1] - seq_vals[i+(j-1)*n_seq_bw,"se"] <- co[,2] - } else { - warning("Problem evaluating model with new bw=", - object_call$bw, " and new order=",object_call$order, ".") - } - } - } - - - -## compute intervals: - probs <- (1 - level)/2 - probs <- c(probs, 1 - probs) - quants <- qnorm(probs) - seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"] - seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"] - - -## plot results: - seq_vals_df <- as.data.frame(seq_vals) - rownames(seq_vals_df) <- 1:nrow(seq_vals_df) - if(hasBw) seq_vals_df$order <- as.factor(seq_vals_df$order) - - - if(type=="colour"){ - if(hasBw){ - est_point <- data.frame(bw=bw, LATE=est, order=as.factor(origOrder)) - sensPlot <- qplot(x=bw, y=LATE, data=seq_vals_df, colour=order, geom="line")+ - geom_point(data=est_point)+ - geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals_df, stat="identity") - } else { - est_point <- data.frame(LATE=est, order=origOrder) - sensPlot <- qplot(x=order, y=LATE, data=seq_vals_df, geom="line")+ - geom_point(data=est_point)+ - geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals_df, stat="identity") - } - } else { - sensPlot <- qplot(x=bw, y=LATE, data= seq_vals_df, geom="line")+facet_grid(order~.)+ - geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals_df, stat="identity") - } - - if(plot) print(sensPlot) - - -# if(n_seq_ord==1){ -# ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) -# plot(seq_bw, seq_vals[,"LATE"], type="l", ylab="LATE", xlab="bandwidth", ylim=ra) -# title("Sensitivity to order choice") -# lines(seq_bw, seq_vals[,"CI_low"], lty=2) -# lines(seq_bw, seq_vals[,"CI_high"], lty=2) # -# } else { -# ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) -# for(i in 1:n_seq_ord){ -# if(i==1) { -# plot(seq_bw, seq_vals[(1:n_seq_bw)+(i-1)*n_seq_bw,"LATE"], type="l", ylab="LATE", xlab="bandwidth", ylim=ra, col=i) -# } else { -# lines(seq_bw, seq_vals[(1:n_seq_bw)+(i-1)*n_seq_bw,"LATE"], col=i) -# } -# title("Sensitivity to order choice") -# lines(seq_bw, seq_vals[(1:n_seq_bw)+(i-1)*n_seq_bw,"CI_low"], lty=2, col=i) -# lines(seq_bw, seq_vals[(1:n_seq_bw)+(i-1)*n_seq_bw,"CI_high"], lty=2, col=i) -# } -# } - -## add optim in case: -# if(is_IKband) { -# points(object$bw, object$est, col="red") -# segments(object$bw,0, object$bw, object$est, col="red", lty=2) -# segments(0,object$est, object$bw, object$est, col="red", lty=2) -# } - -## export (silently) results: - out <- switch(output, "data"=seq_vals_df, "ggplot"=sensPlot) - invisible(out) -} - - - -########################################## -###### TODO -########################################## -## -plotSensi lm: work when no bandwidth!! - - -if(FALSE){ - -library(RDDtools) -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2) - -bw_ik <- RDDbw_IK(Lee2008_rdd) -reg_para_ik2 <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=2) -reg_para_ik3 <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=3) - -plotSensi(reg_para) -plotSensi(reg_para_ik2) -plotSensi(reg_para_ik2, type="facet") -plotSensi(reg_nonpara) -plotSensi(reg_nonpara, device="base") - -plo_res <- plotSensi(RDDregobject=reg_para_ik2, order=1:4) - - - -## extract matrix: -plotSensi.RDDreg_lm(RDDregobject=reg_para_ik2, order=1:4) - -a <- plotSensi(RDDregobject=reg_para_ik2, order=1:4, type="facet") -library(ggplot2) - - - -environment(plotSensi.RDDreg_lm) <- environment(RDDdata) -plotSensi(reg_para) - -} - diff --git a/RDDtools/R/qplot_experim.R b/RDDtools/R/qplot_experim.R deleted file mode 100644 index f8459b3..0000000 --- a/RDDtools/R/qplot_experim.R +++ /dev/null @@ -1,61 +0,0 @@ - - -gplot <- function(x, h, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=3,type=c("base", "ggplot"),...){ - object <- x - cutpoint <- getCutpoint(object) - -## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) - if(missing(h)) { - if(!all(xlim==range(object$x, na.rm=TRUE))){ - object <- subset(object, object$x> min(xlim) & object$x< max(xlim)) - } - h <- RDDbw_RSW(object) - if(is.even(nplot)) { - se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) - } else { - se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) - } - hs <- if(nplot==1) h else se *h - } else { - if(length(h)==1){ - if(is.even(nplot)) { - se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) - } else { - se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) - } - hs <- if(nplot==1) h else se *h - } else { - if(length(h==nplot)){ - hs <- h - } else { - stop("Length of h should be either one or equal to nplot (", nplot, ")") - } - } - } - - - - -## plot - if(type=="base"){ - par_orig <- par() - par(mfrow=c(nplot,1)) - for(i in 1:nplot){ - plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex) - } - } else { - - plotBin_out <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[1], xlim=xlim, cex=cex, plot=FALSE) - plotBin_out$h <- rep(hs[1], nrow(plotBin_out)) - for(i in 2:nplot){ - new <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex) - new$h <- rep(hs[i], nrow(new)) - plotBin_out <- rbind(plotBin_out, new) - } - - plotBin_out$h <- round(plotBin_out$h,4) - qplot(x=x, y=y, data=plotBin_out)+facet_grid(h~.) - - } - -} diff --git a/RDDtools/R/reg_gen.R b/RDDtools/R/reg_gen.R deleted file mode 100644 index 5202ab9..0000000 --- a/RDDtools/R/reg_gen.R +++ /dev/null @@ -1,207 +0,0 @@ -#' General polynomial estimator of the regression discontinuity -#' -#' Compute RDD estimate allowing a locally kernel weighted version of any estimation function -#' possibly on the range specified by bandwidth -#' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}} -#' @param covariates Formula to include covariates -#' @param order Order of the polynomial regression. -#' @param bw A bandwidth to specify the subset on which the kernel weighted regression is estimated -#' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} -#' @param slope Whether slopes should be different on left or right (separate), or the same. -#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). -#' @param fun The function to estimate the parameters -#' @param \ldots Further arguments passed to fun. See the example. -#' @details This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. -#' It is assumed that the custom funciton has following behaviour: -#' \enumerate{ -#' \item A formula interface, together with a \code{data} argument -#' \item A \code{weight} argument -#' \item A coef(summary(x)) returning a data-frame containing a column Estimate -#' } -#' Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}} -#' function for the class of the object returned by \code{fun}. -#' @return An object of class RDDreg_lm and class lm, with specific print and plot methods -#' @references TODO -#' @include plotBin.R -#' @export RDDgenreg -#' @examples -#' ## Step 0: prepare data -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' -#' ## Estimate a local probit: -#' Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0) - } else if(!missing(weights)){ - weights <- weights - } else { - weights <- NULL - } - -## Construct data - if(missing(weights)) weights <- NULL - dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, - slope=slope, covar.opt=covar.opt) - -## Regression - reg <- fun(y~., data=dat_step1, weights=weights,...) - - ##Return - RDDslot <- list() - RDDslot$RDDdata <- RDDobject - reg$RDDslot <- RDDslot - class(reg) <- c("RDDreg_lm", "RDDreg", class(reg)) - attr(reg, "PolyOrder") <- order - attr(reg, "cutpoint") <- cutpoint - attr(reg, "slope") <- slope - attr(reg, "RDDcall") <- match.call() - attr(reg, "bw") <- bw - reg -} - -RDDgenreg_old <- function(RDDobject, covariates=".", bw=RDDbw_IK(RDDobject), slope=c("separate", "same"), fun=glm, ...){ - - slope <- match.arg(slope) - checkIsRDD(RDDobject) - if(!is.function(fun)) stop("Arg 'fun' should be a function") - cutpoint <- getCutpoint(RDDobject) - -## Construct data - dat <- as.data.frame(RDDobject) - - dat_step1 <- dat[, c("y", "x")] - dat_step1$x <- dat_step1$x -cutpoint - dat_step1$D <- ifelse(dat_step1$x >= 0, 1,0) - if(slope=="separate") { - dat_step1$x_right <- dat_step1$x*dat_step1$D - } - -### Weights - kernel_w <- Kernel_tri(dat_step1[,"x"], center=0, bw=bw) - -## Regression - reg <- fun(y~., data=dat_step1, weights=kernel_w,...) - -##Return - class(reg) <- c("RDDreg_gen", "RDDreg", class(reg)) - attr(reg, "RDDcall") <- match.call() - attr(reg, "cutpoint") <- cutpoint - attr(reg, "bw") <- bw - reg -} - - -if(FALSE){ - - library(RDDtools) - data(Lee2008) - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) - environment(RDDgenreg) <- environment(RDDdata) - reg_glm_norm <- RDDgenreg(RDDobject=Lee2008_rdd) - -reg_nonpara -reg_glm_norm -plot(reg_glm_norm) - - -### Binary example: - -## gen from latent model: -gen_MC_binom <- function(n=200, LATE=0.3){ - x <- rnorm(n) - D <- x>= 0 - y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n) - y <- as.integer(ifelse(y> -0.5, 1, 0)) - if(mean(y==1)<0.04) y[sample(c(0,1), prob=c(0.1, 0.9), replace=TRUE, size=n)] <- 1 - RDDdata(x=x, y=y, cutpoint=0) -} - -mc <- gen_MC_binom() -environment(RDDgenreg) <- environment(RDDdata) -reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit")) - -## quantile: - library(quantreg) - MC1_dat <- gen_MC_IK() - MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) - - RDDcoef.rq <- function(object, allInfo=FALSE, ...){ - res <- coef(summary(object))["D",, drop=FALSE] - if(!allInfo) res <- res[,"coefficients"] - res - } - - reg_bin_rq1 <- RDDgenreg(RDDobject=MC1_rdd, fun=rq, tau=0.5, bw=0.5) - reg_bin_rq1 - coef(reg_bin_rq1) - RDDcoef(reg_bin_rq1) - RDDcoef(reg_bin_rq1, allInfo=TRUE) - summary(reg_bin_rq1) - - pl_rq <- plotSensi(reg_bin_rq1, order=1, from=0.1, to=1) - pl_rq - - - - - -## Monte Carlo - -doEs<- function(n){ -mc <- gen_MC_binom() - reg_bin_np <- RDDreg_np(RDDobject=mc) - environment(RDDgenreg) <- environment(RDDdata) - reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit")) - reg_bin_glm_log <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="logit")) - -a<- RDDtools:::RDDcoef(reg_bin_glm)/2.5 -b<- RDDtools:::RDDcoef(reg_bin_glm_log)/4 -d<- RDDtools:::RDDcoef(reg_bin_np) - -res <- c(a, b, d) -names(res) <- c("Probit", "Logit", "LPM") -res -} - -MC_logs <- replicate(500, doEs()) - -MC_logs2 <- t(MC_logs) -colMeans(MC_logs2) - -colMeans(MC_logs2-0.2) -apply(MC_logs2, 2, sd) - -colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, var) -colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, sd) - -head(MC_logs) - -reg_bin_glm -reg_bin_np - -fav <- mean(dnorm(predict(reg_bin_glm, type = "link"))) -fav * coef(swiss_probit) - - -} diff --git a/RDDtools/R/reg_lm.R b/RDDtools/R/reg_lm.R deleted file mode 100644 index 6503573..0000000 --- a/RDDtools/R/reg_lm.R +++ /dev/null @@ -1,180 +0,0 @@ -#' Parametric polynomial estimator of the regression discontinuity -#' -#' Compute a parametric polynomial regression of the ATE, -#' possibly on the range specified by bandwidth -#' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}} -#' @param covariates Formula to include covariates -#' @param order Order of the polynomial regression. -#' @param bw A bandwidth to specify the subset on which the parametric regression is estimated -#' @param covar.strat DEPRECATED, use covar.opt instead. -#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). -#' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} -#' @param slope Whether slopes should be different on left or right (separate), or the same. -#' @return An object of class RDDreg_lm and class lm, with specific print and plot methods -#' @details This function estimates the standard \emph{discontinuity regression}: -#' \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon} -#' with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same, -#' i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}. -#' Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random. -#' Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}: -#' \describe{ -#' \item{include}{Covariates are simply added as supplementary regressors in the RD equation} -#' \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}} -#' The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble -#' the non-parametric local kernel \code{\link{RDDreg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}). -#' -#' The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R, -#' heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations -#' a specific function \code{\link{clusterInf}} is provided. -#' @references TODO -#' @include plotBin.R -#' @import Formula -#' @importFrom AER ivreg -#' @export -#' @examples -#' ## Step 0: prepare data -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' ## Step 2: regression -#' # Simple polynomial of order 1: -#' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -#' print(reg_para) -#' plot(reg_para) -#' -#' # Simple polynomial of order 4: -#' reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) -#' reg_para4 -#' plot(reg_para4) -#' -#' # Restrict sample to bandwidth area: -#' bw_ik <- RDDbw_IK(Lee2008_rdd) -#' reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) -#' reg_para_ik -#' plot(reg_para_ik) - - -RDDreg_lm <- function(RDDobject, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), weights){ - - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - type <- getType(RDDobject) - - slope <- match.arg(slope) - - if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!") - if(!missing(weights)&!is.null(bw)) stop("Cannot give both 'bw' and 'weights'") - -## Subsetting - dat <- as.data.frame(RDDobject) - - if(!is.null(bw)){ - weights <- ifelse(dat$x >= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0) - } else if(!missing(weights)){ - weights <- weights - } else { - weights <- NULL - } - -## Construct data - if(missing(weights)) weights <- NULL - dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, - slope=slope, covar.opt=covar.opt) - -## Regression - if(type=="Sharp"){ - reg <- lm(y~., data=dat_step1, weights=weights) - class_reg <- "lm" - } else { - if(!is.null(covariates)) stop("Covariates currently not implemented for Fuzzy case") - reg <- ivreg(y~.-ins|.-D, data=dat_step1, weights=weights) - class_reg <- "ivreg" - } - - -##Return - RDDslot <- list() - RDDslot$RDDdata <- RDDobject - reg$RDDslot <- RDDslot - class(reg) <- c("RDDreg_lm", "RDDreg", class_reg) - attr(reg, "PolyOrder") <- order - attr(reg, "cutpoint") <- cutpoint - attr(reg, "slope") <- slope - attr(reg, "RDDcall") <- match.call() - attr(reg, "bw") <- bw - reg -} - - -#' @S3method print RDDreg_lm -print.RDDreg_lm <- function(x,...) { - - order <- getOrder(x) - cutpoint <- getCutpoint(x) - slope <- getSlope(x) - bw <- getBW(x) - hasBw <- !is.null(bw) - bw2 <- if(hasBw) bw else Inf - - x_var <- getOriginalX(x) - n_left <- sum(x_var >= cutpoint -bw2 & x_var < cutpoint) - n_right <- sum(x_var >= cutpoint & x_var <= cutpoint+bw2) - - cat("### RDD regression: parametric ###\n") - cat("\tPolynomial order: ", order, "\n") - cat("\tSlopes: ", slope, "\n") - if(hasBw) cat("\tBandwidth: ", bw, "\n") - cat("\tNumber of obs: ", sum(n_left+n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep="") - - cat("\n\tCoefficient:\n") - - printCoefmat(coef(summary(x))[2,, drop=FALSE]) - -} - -#' @S3method plot RDDreg_lm -plot.RDDreg_lm <- function(x,...) { - -## data - dat <- getOriginalData(x) - subw <- if(!is.null(x$weights)) x$weights>0 else rep(TRUE, nrow(dat)) - pred <- data.frame(x=dat$x,y=fitted(x))[subw,] - -##plot - plotBin(dat$x, dat$y, ...) - lines(pred[order(pred$x),]) -} - - - -if(FALSE){ - - library(RDDtools) - data(Lee2008) - - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - - reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) - print(x=reg_para ) - summary(reg_para ) - - reg_para_same <- RDDreg_lm(RDDobject=Lee2008_rdd, slope="same") - print(x=reg_para_same ) - summary(reg_para_same ) - - reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2) - reg_para2 - summary(reg_para2) - plot(reg_para2) - - reg_para2_same <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2, slope="same") - reg_para2_same - summary(reg_para2_same) - plot(reg_para2) - - bw_ik <- RDDbw_IK(Lee2008_rdd) - reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik) - print(x=reg_para_ik) - plot(x=reg_para_ik) - -} \ No newline at end of file diff --git a/RDDtools/R/reg_np.R b/RDDtools/R/reg_np.R deleted file mode 100644 index 4ae8405..0000000 --- a/RDDtools/R/reg_np.R +++ /dev/null @@ -1,308 +0,0 @@ -#' Parametric polynomial estimator of the regression discontinuity -#' -#' Compute a parametric polynomial regression of the ATE, -#' possibly on the range specified by bandwidth -#' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}} -#' @param covariates TODO -#' @param bw A bandwidth to specify the subset on which the parametric regression is estimated -#' @param inference Type of inference to conduct: non-parametric one (\code{np}) or standard (\code{lm}). See details. -#' @param slope Whether slopes should be different on left or right (separate), or the same. -#' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). -#' @return An object of class RDDreg_np and class lm, with specific print and plot methods -#' @seealso \code{\link{RDDbw_IK}} Bandwidth selection using the plug-in bandwidth of Imbens and Kalyanaraman (2012) -#' @references TODO -#' @include plotBin.R -#' @export RDDreg_np -#' @examples -#' ## Step 0: prepare data -#' data(Lee2008) -#' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -#' ## Step 2: regression -#' # Simple polynomial of order 1: -#' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -#' print(reg_nonpara) -#' plot(reg_nonpara) -#' - - -RDDreg_np <- function(RDDobject, covariates=NULL, bw=RDDbw_IK(RDDobject), slope=c("separate", "same"), inference=c("np", "lm"), covar.opt=list(slope=c("same", "separate"), bw=NULL)){ - - slope <- match.arg(slope) - inference <- match.arg(inference) - checkIsRDD(RDDobject) - cutpoint <- getCutpoint(RDDobject) - - if(!is.null(covariates)) warning("covariates not fully implemented for non-para reg") - -## Construct data - if("strategy"%in%names(covar.opt)) warning("Arg 'strategy' should not be used for ") - covar.opt$strategy <- "include" - dat <- as.data.frame(RDDobject) - dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=1, bw=bw, - slope=slope, covar.opt=covar.opt) - - -### Weights - kernel_w <- Kernel_tri(dat_step1[,"x"], center=0, bw=bw) - -## Regression - reg <- lm(y~., data=dat_step1, weights=kernel_w) - coefD <- coef(reg)["D"] - -## Non-para inference: - if(inference=="np"){ - var <- var_estim(x=dat$x, y=dat$y, point=cutpoint, bw=bw, eachSide=TRUE) - dens <- dens_estim(x=dat$x, point=cutpoint, bw=bw, eachSide=TRUE) - - const <- 4.8/(nrow(dat)*bw) - all <- const*sum(var)/dens - se <- sqrt(all) - tval <- coefD/se - pval <- 2 * pnorm(abs(tval), lower.tail = FALSE) - coefmat <- matrix(c(coefD, se,tval, pval), nrow=1, dimnames=list("D", c("Estimate", "Std. Error", "z value", "Pr(>|z|)"))) - } else { - coefmat <- coef(summary(reg))#["D", , drop=FALSE] - } - -##Return - res <- list() - RDDslot <- list() - RDDslot$RDDdata <- RDDobject - RDDslot$model <- reg - res$coefficients <- coef(reg)["D"] - res$coefMat <- coefmat - res$residuals <- residuals(reg) - res$fitted <- fitted(reg) - res$RDDslot <- RDDslot - - class(res) <- c("RDDreg_np", "RDDreg", "lm") - attr(res, "RDDcall") <- match.call() - attr(res, "cutpoint") <- cutpoint - attr(res, "bw") <- bw - res -} - - -#' @S3method print RDDreg_np -print.RDDreg_np <- function(x, signif.stars = getOption("show.signif.stars"), ...) { - - RDDcall <- attr(x, "RDDcall") - bw <- getBW(x) - cutpoint <- getCutpoint(x) - x_var <- getOriginalX(x) - - n_left <- sum(x_var >= cutpoint -bw & x_var < cutpoint) - n_right <- sum(x_var >= cutpoint & x_var <= cutpoint+bw) - - cat("### RDD regression: nonparametric local linear###\n") - cat("\tBandwidth: ", bw, "\n") - cat("\tNumber of obs: ", sum(n_left+n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep="") - - cat("\n\tCoefficient:\n") - - printCoefmat(RDDcoef(x, allInfo=TRUE), signif.stars=signif.stars) - -} - -#' @S3method summary RDDreg_np -summary.RDDreg_np <- function(object, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), ...) { - - x <- object - bw <- getBW(x) - cutpoint <- getCutpoint(x) - x_var <- getOriginalX(x) - -## compute numbers left/right: - n_left <- sum(x_var >= cutpoint -bw & x_var < cutpoint) - n_right <- sum(x_var >= cutpoint & x_var <= cutpoint+bw) - -## compute residual summary: - res_quant <- quantile(residuals(x)) - names(res_quant) <- c("Min", "1Q", "Median", "3Q", "Max") - -## compute R^2 - r.squared <- summary(x$RDDslot$model)$r.squared - -## Extend the RDDreg_no output with new computaations: - - object$r.squared <- r.squared - object$res_quant <- res_quant - object$n_obs <- list(n_left=n_left, n_right=n_right, total=n_left+n_right) - - class(object) <- c("summary.RDDreg_np", class(object)) - object -} - -#' @S3method print summary.RDDreg_np -print.summary.RDDreg_np <- function(x, digits = max(3, getOption("digits") - 3), signif.stars = getOption("show.signif.stars"), ...) { - - bw <- getBW(x) - - cat("### RDD regression: nonparametric local linear###\n") - cat("\tBandwidth: ", bw, "\n") - cat("\tNumber of obs: ", x$n_obs$total, " (left: ", x$n_obs$n_left, ", right: ", x$n_obs$n_right, ")\n", sep="") - - cat("\n\tWeighted Residuals:\n") - print(zapsmall(x$res_quant, digits + 1)) - - - cat("\n\tCoefficient:\n") - - printCoefmat(RDDcoef(x, allInfo=TRUE), signif.stars=signif.stars) - - cat("\n\tLocal R squared:", formatC(x$r.squared, digits = digits), "\n") - -} - - -#' @S3method plot RDDreg_np -plot.RDDreg_np <- function(x,binwidth,chart=c("locpoly", "np"), ...) { - - chart <- match.arg(chart) - cutpoint <- getCutpoint(x) - bw <- getBW(x) - if(missing(binwidth)) binwidth <- bw/5 # binwidth!=bandwidth - -## data - dat <- getOriginalData(x, classRDD=FALSE) - -## Use locpoly: - dat_left <- subset(dat, x=cutpoint) - - if(chart=="locpoly"){ - llp_left <- locpoly(x=dat_left$x, y=dat_left$y, bandwidth=bw) - llp_right <- locpoly(x=dat_right$x, y=dat_right$y, bandwidth=bw) - -## Use np: - } else { - np_reg_left <- npreg(npregbw(y~x, data=dat_left, regtype="ll", ckertype="epanechnikov", - bandwidth.compute=FALSE, bws=bw)) - - np_reg_right <- npreg(npregbw(y~x, data=dat_right, regtype="ll", ckertype="epanechnikov", - bandwidth.compute=FALSE, bws=bw)) - newDat_left <- data.frame(x=seq(min(dat_left$x), cutpoint-0.001, by=.01)) - newDat_right <- data.frame(x=seq(cutpoint, max(dat_right$x), by=.01)) - pred_left <- predict(np_reg_left, newdata=newDat_left,se.fit=TRUE) - pred_right <- predict(np_reg_right, newdata=newDat_right,se.fit=TRUE) - } -##plot - plotBin(dat$x, dat$y, h=binwidth, ...) - if(chart=="locpoly"){ - lines(llp_left$x, llp_left$y) - lines(llp_right$x, llp_right$y) - } else { - lines(newDat_left$x, pred_left$fit, col=1) - lines(newDat_left$x, pred_left$fit+2*pred_left$se.fit, col=2, lty=2) - lines(newDat_left$x, pred_left$fit-2*pred_left$se.fit, col=2, lty=2) - - lines(newDat_right$x, pred_right$fit, col=1) - lines(newDat_right$x, pred_right$fit+2*pred_right$se.fit, col=2, lty=2) - lines(newDat_right$x, pred_right$fit-2*pred_right$se.fit, col=2, lty=2) -} -} - -#' @S3method vcov RDDreg_np -vcov.RDDreg_np <- function(object, ...){ - - infType <- infType(object) - if(infType=="np") { - warning("No vcov() available when RDDreg_np() was called with infType='np'") - res <- NULL - } else { - res <- vcov(object$RDDslot$model) - } - res -} - -if(FALSE){ - library(RDDtools) - data(Lee2008) - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - - environment(RDDreg_np) <- environment(RDDdata) - environment(plot.RDDreg_np) <- environment(RDDdata) - environment(print.RDDreg_np) <- environment(RDDdata) - environment(summary.RDDreg_np) <- environment(RDDdata) - - - reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) - reg_nonpara_inflm <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") - RDDtools:::getCutpoint(reg_nonpara) - head(RDDtools:::getOriginalX.RDDreg(reg_nonpara)) - - print(reg_nonpara) - print(reg_nonpara_inflm) - summary(reg_nonpara) - plot(x=reg_nonpara) - plot(x=reg_nonpara, chart="np") - plot(x=reg_nonpara, binwidth=0.05) - - - RDDtools:::waldci.RDDreg_np(reg_nonpara) - RDDtools:::waldci.RDDreg_np(reg_nonpara_inflm) - -environment(waldci.RDDreg_np) <- environment(RDDdata) -waldci.RDDreg_np(reg_nonpara) - -plotSensi(reg_nonpara) - - -class(getCall(reg_nonpara)) -class(attr(reg_nonpara, "RDDcall")) - - -### MC -mc_simple <- function(n=10000, xr=0.1){ - x<- rnorm(n) - y <- 1+1.2*x+ 1.4*ifelse(x>=0,1,0)+ xr*ifelse(x>=0,1,0)*x+rnorm(n) - RD <- RDDdata(x=x, y=y, cutpoint=0) - RD -} - -r<-RDDreg_np(mc_simple()) -summary(r) -plot(r) - - -} - -if(FALSE){ -bw <- RDDbw_IK(Lee2008_rdd) -dat <- Lee2008_rdd -x<- Lee2008_rdd$x -y<- Lee2008_rdd$y -cutpoint <- 0 - dat_left <- subset(dat, x=cutpoint) - - llp_left <- locpoly(x=dat_left$x, y=dat_left$y, bandwidth=bw) - llp_right <- locpoly(x=dat_right$x, y=dat_right$y, bandwidth=bw) - -p1 <- -0.7346403 -llp_left$x[which.min(abs(llp_left$x-p1))] -llp_left$y[which.min(abs(llp_left$x-p1))] - -## around x: -point <- -0.7350795 - -po <- subset(dat, x> point -bw & x< point+bw) -mean(po$y) -a<- plotBin(dat$x, dat$y, h=bw) -a - -a$x1 <- a$x-bw -a$x2 <- a$x+bw - -b <- rownames(a) -b1 <- gsub("\\[|\\(","c(",b) -b2 <- gsub("\\]|\\)",")",b1) - -mean(eval(parse(text=b2[1]))) -diff(eval(parse(text=b2[1]))) - - - lines(llp_left$x, llp_left$y) - lines(llp_right$x, llp_right$y) - -} \ No newline at end of file diff --git a/RDDtools/R/var_estim.R b/RDDtools/R/var_estim.R deleted file mode 100644 index f50fc6e..0000000 --- a/RDDtools/R/var_estim.R +++ /dev/null @@ -1,326 +0,0 @@ - - - -dens_estim <- function(x, point, bw, eachSide=TRUE){ - - N <- length(x) - - if(missing(bw)) bw <- 1.84*sd(x)*N^(-1/5) - - if(eachSide){ - isIn_bw_left <- x>=(point-bw) & x=point & x<=(point+bw) - - NisIn_bw_left <- sum(isIn_bw_left, na.rm=TRUE) - NisIn_bw_right <- sum(isIn_bw_right, na.rm=TRUE) - - res <-(NisIn_bw_left+NisIn_bw_right)/(2*N*bw) - } else { - isIn_bw_both <- x>=(point-bw) & x<=(point+bw) - NisIn_bw_both <- sum(isIn_bw_both, na.rm=TRUE) - res <- NisIn_bw_both/(2*N*bw) - } - res -} - -dens_estim2 <- function(x, point, bw, kernel="gaussian",...){ - - - if(missing(bw)) bw <- "SJ" - - d <- density(x, from=point, to=point, n=1, na.rm=TRUE, kernel=kernel, bw=bw,...) - d$y -} - - -var_estim <- function(x,y, point, bw, eachSide=TRUE){ - - - N <- length(x) - if(missing(bw)) bw <- 1.84*sd(x)*N^(-1/5) - - if(eachSide){ - isIn_bw_left <- x>=(point-bw) & x=point & x<=(point+bw) - var_inh_left <- var(y[isIn_bw_left], na.rm=TRUE) - var_inh_right <- var(y[isIn_bw_right], na.rm=TRUE) - res <- c(var_inh_left, var_inh_right) - } else { - isIn_bw <- x>=(point-bw) & x<=point+bw - var_inh <- var(y[isIn_bw], na.rm=TRUE) - res <- var_inh - } -res -} - - -#' @importFrom locpol locpol -#' @importFrom locpol gaussK - -### Add locpol kernel for uniform: -uniK <- function(x) ifelse(abs(x) <= 1, 1/2, 0) -attr(uniK, "RK") <- 1/2 ## Rk: kernel(u)^2 -attr(uniK,"mu0K") <- 1 -attr(uniK,"mu2K") <- 1/3 ## second orde rmoment of K -attr(uniK,"K4") <- NA ## see with author! -attr(uniK,"RdK") <- NA ## see with author! -attr(uniK, "dom") <- c(-1,1) ## - -var_estim2 <- function(x,y, point, bw, estim=c("var", "NW", "NW_loc", "LL_kern", "LL_loc", "var_loc"), sides=c("both", "uni"), kernel=c("Normal", "Uniform"), dfadj=TRUE){ - - sides <- match.arg(sides) - estim <- match.arg(estim) - kernel <- match.arg(kernel) - N <- length(x) - if(missing(bw)) bw <- 1.84*sd(x)*N^(-1/5) - - if(sides=="uni"){ - isIn_bw_left <- x>=(point-bw) & x=point & x<=(point+bw) - var_inh_left <- var(y[isIn_bw_left], na.rm=TRUE) - var_inh_right <- var(y[isIn_bw_right], na.rm=TRUE) - res <- c(var_inh_left, var_inh_right) - } else { - if(estim=="NW"){ - ker <- switch(kernel, "Uniform"="box", "Normal"="normal") - m <- ksmooth(x=x, y=y, bandwidth=bw*2, x.points=point, kernel=ker)$y - s <- ksmooth(x=x, y=y^2, bandwidth=bw*2, x.points=point, kernel=ker)$y - } else if(estim=="NW_loc"){ - ker <- switch(kernel, "Uniform"=uniK, "Normal"=gaussK) - df_xy <- data.frame(y=y, x=x, y2=y^2) -# a <<- locCteSmootherC(x=x, y=y, xeval=point, bw=bw, kernel=uniK) -# aa <<- locCteSmootherC(x=x, y=y, xeval=point, bw=bw, kernel=gaussK) - m <- locpol(y~x,data=df_xy, bw=bw, xeval=point, deg=0, kernel=ker) - s <- locpol(y2~x,data=df_xy, bw=bw, xeval=point, deg=0, kernel=ker) - m <- m$lpFit["y"] - s <- s$lpFit["y2"] - } else if(estim=="LL_kern"){ - if(kernel!="Normal") warning("Kernel set to Normal for locpoly") - m <- locpoly(x=x, y=y, bandwidth=bw, gridsize=200) - s <- locpoly(x=x, y=y^2, bandwidth=bw, gridsize=200) - m <- m$y[which.min(abs(m$x-point))] - s <- s$y[which.min(abs(s$x-point))] - } else if(estim=="LL_loc"){ - ker <- switch(kernel, "Uniform"=uniK, "Normal"=gaussK) - df_xy <- data.frame(y=y, x=x, y2=y^2) - m <- locpol(y~x,data=df_xy, bw=bw, xeval=point, kernel=ker) - s <- locpol(y2~x,data=df_xy, bw=bw, xeval=point, kernel=ker) - m <- m$lpFit["y"] - s <- s$lpFit["y2"] - } else { - s <- m <- 1 - } - sh <- s - m^2 - res <- sh - if(estim=="var_loc"){ - ker <- switch(kernel, "Uniform"=uniK, "Normal"=gaussK) - df_xy <- data.frame(y=y, x=x, y2=y^2) - m <- locpol(y~x,data=df_xy, bw=bw, xeval=point, kernel=ker) - res <- m$lpFit$var - } else if(estim=="var"){ - isIn_bw<- x>=(point-bw) & x<=(point+bw) - var <- var(y[isIn_bw], na.rm=TRUE) - res <- if(dfadj) var*(sum(isIn_bw)-1)/sum(isIn_bw) else var - } - - } - names(res) <- NULL -as.numeric(res) -} - - -## Formula: \sqrt[ (C_2 * \sigma(x)^2 / f(x)) / ( n * h) ] -## Imbens & Kalyan: C_2/N*h (sigma_l^2 + \sigma_r^2)/f(x) -## value of constant: 4.8 (using boundary kernel: Triangular -## (value of constant: 33.6 (using boundary kernel: Triangular -## library(locpol) -## computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=1), lower=0, upper=Inf) -## or: -## computeRK(equivKernel(TrianK, nu=0, deg=1, lower=-1, upper=1), lower=-Inf, upper=Inf) - -all_var_low <- function(x,y, point, bw, eachSide=TRUE, return=c("se", "all")){ - - return <- match.arg(return) - - N <- length(x) - if(missing(bw)) bw <- 1.84*sd(x)*N^(-1/5) - - var <- var_estim(x=x, y=y, point=point, bw=bw, eachSide=eachSide) - dens <- dens_estim(x=x, point=point, bw=bw, eachSide=eachSide) - - C2 <- if(eachSide) 4.8 else 2/3 - const <- C2/(N*bw) - all <- const*sum(var)/dens - res <- sqrt(all) - names(res) <- "se" - if(return=="all") res <- c(res, cons=const, dens=dens, var=sum(var)) - res - -} - - -all_var <- function(...) all_var_low(...) - -all_var.RDDreg.np <- function(x){ - - bw <- getBW(x) - dat <- getOriginalData(x) - cutpoint <- getCutpoint(x) - res <- all_var_low(dat$x,dat$y, point=cutpoint, bw=bw, eachSide=TRUE, return="se") - res -} - - - - -#################################################################################### -############################ -#################################################################################### - -if(FALSE){ - - library(KernSmooth) - library(RDDtools) - library(locpol) - if(packageVersion("locpol")<=0.6) stop("Should get latest dev version of locpol") - - -environment(all_var.RDDreg.np) <- environment(RDDdata) - ## small test: - MC1_df <- gen_MC_IK() - - # true val - point <- 0 - dbeta((point+1)/2 , shape1=2, shape2=4)*1/2 - - dens_estim(x=MC1_df$x, point=point, bw=0.1) - dens_estim(x=MC1_df$x, point=point) - dens_estim2(x=MC1_df$x, point=point, bw=0.1) - dens_estim2(x=MC1_df$x, point=point) - -## should correspond? - dens_estim(x=MC1_df$x, point=point, bw=0.1, eachSide=FALSE) - dens_estim2(x=MC1_df$x, point=point, bw=0.1, kernel="rectangular") - d <- density(x=MC1_df$x, bw=0.1, kernel="rectangular") - d$y[which.min(abs(d$x-point))] - density(x=MC1_df$x, from=0, to=0, n=1,bw=0.1, kernel="rectangular")$y - - #### VARiance - sqrt(var_estim(x=MC1_df$x, y=MC1_df$y, point=0)) - - sqrt(var_estim(x=MC1_df$x, y=MC1_df$y, point=0, eachSide=FALSE)) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="var")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="NW_loc", kernel="Uniform")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="NW", kernel="Uniform")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="NW_loc",kernel="Normal")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="LL_kern")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="LL_loc")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="LL_loc", kernel="Uniform")) - sqrt(var_estim2(x=MC1_df$x, y=MC1_df$y, point=0, estim="var_loc")) - - - - all_var(x=MC1_df$x, y=MC1_df$y, point=0) - - ### test: - library(RDDtools) - - - MC1_df_rdd <- RDDdata(x=MC1_df$x, y=MC1_df$y, cutpoint=0) - - bw_ik <- RDDbw_IK(MC1_df_rdd) - RDD_est <- RDDreg_np(MC1_df_rdd, bw=bw_ik) - RDD_est_lmnp <- RDDreg_lm(MC1_df_rdd, weights=dnorm(MC1_df_rdd$x, sd=bw_ik)) - -all_var.RDDreg.np(x=RDD_est) - - ## with np: - library(np) - MC1_df_D <- data.frame(MC1_df, D=ifelse(MC1_df$x>=0, 1, 0), Dx=ifelse(MC1_df$x>=0, MC1_df$x, 0)) - bw_ik.np <- npregbw(bws=bw_ik, formula=y~x, data= MC1_df, bandwidth.compute=FALSE, regtype = "ll") - bw_ik.np_D <- npregbw(bws=rep(bw_ik,3), formula=y~x+D+Dx, data= MC1_df_D, bandwidth.compute=FALSE, regtype = "ll", - eval=data.frame(x=c(0,0), D=c(0,1), Dx=c(0,0))) - bw_ik.np_D_mixed <- npregbw(bws=c(bw_ik,0.49,bw_ik), formula=y~x+factor(D)+Dx, data= MC1_df_D, bandwidth.compute=FALSE, regtype = "ll", - eval=data.frame(x=c(0,0), D=c(0,1), Dx=c(0,0))) - - model.np <- npreg(bw_ik.np, exdat=0) - model.np_D <- npreg(bw_ik.np_D, exdat=data.frame(x=0,D=0, Dx=0)) - model.np_D_mix <- npreg(bw_ik.np_D_mixed) - model.np_left <- npreg(npregbw(bws=bw_ik, formula=y~x, data= subset(MC1_df,x<0), bandwidth.compute=FALSE, regtype = "ll")) - model.np_right <- npreg(npregbw(bws=bw_ik, formula=y~x, data= subset(MC1_df,x>=0), bandwidth.compute=FALSE, regtype = "ll")) - - - pred_np <- predict(model.np, newdata=data.frame(x=0), se.fit=TRUE) - pred_np_D0 <- predict(model.np_D, newdata=data.frame(x=0, D=0, Dx=0), se.fit=TRUE) - pred_np_mix_D0 <- predict(model.np_D_mix, newdata=data.frame(x=0, D=factor(0), Dx=0), se.fit=TRUE) - pred_np_D1 <- predict(model.np_D, newdata=data.frame(x=0, D=1, Dx=0), se.fit=TRUE) - pred_np_mix_D1 <- predict(model.np_D_mix, newdata=data.frame(x=0, D=factor(1), Dx=0), se.fit=TRUE) - pred_np_D1$fit -pred_np_D0$fit - - pred_left <- predict(model.np_left, newdata=data.frame(x=0), se.fit=TRUE) - pred_right <- predict(model.np_right, newdata=data.frame(x=0), se.fit=TRUE) - - pred_li <- list(pred_np=pred_np, pred_left=pred_left, pred_right=pred_right, - pred_np_D0=pred_np_D0, pred_np_D1=pred_np_D1, - pred_np_D0_mix=pred_np_mix_D0, pred_np_D1_mix=pred_np_mix_D1) - sapply(pred_li, function(x) c(fit=x$fit, se.fit=x$se.fit)) - - pred_right$fit-pred_left$fit - - summary(RDD_est ) - -## get same result with RDDreg_lm: - com_vals <-rbind( - left_point=c(RDD=coef(summary(RDD_est_lmnp))[1,1], np_1side=pred_left$fit, np_D0=pred_np_D0$fit), - left_point_se=c(RDD=coef(summary(RDD_est_lmnp))[1,2], np_1side=pred_left$se.fit, np_D0=pred_np_D0$se.fit), - right_point=c(RDD=sum(coef(summary(RDD_est_lmnp))[1:2,1]), np_1side=pred_right$fit, np_D1=pred_np_D1$fit), - right_point_se=c(RDD=sum(coef(summary(RDD_est_lmnp))[1:2,2]), np_1side=pred_right$se.fit, np_D1=pred_np_D1$se.fit), - diff=c(RDD=coef(summary(RDD_est_lmnp))[2,1], np_1side=pred_np_D1$fit -pred_np_D0$fit, np_D1=NA) - ) -com_vals - coef(summary(RDD_est_lmnp))[2,1] - -a<-plot(model.np_D, plot.errors.method="bootstrap", plot.behavior="plot-data", plot.errors.style="bar")#, plot.errors.center="bias") -str(a) -head(a$r2$eval) -head(a$r1$eval) - -## with liblocpol - library(locpol) - library(devtools) - - - model.liblocpol_both <- locpol(y~x, data=MC1_df, kernel=gaussK, xeval=0, bw=bw_ik, bwVar=1.2) - model.liblocpol_both_triK <- locpol(y~x, data=MC1_df, kernel=TrianK, xeval=0, bw=bw_ik, bwVar=1.2) - model.liblocpol_left <- locpol(y~x, data=subset(MC1_df,x<0), kernel=gaussK, xeval=0, bw=bw_ik, bwVar=1.2) - model.liblocpol_left_a <- locpol(y~x, data=subset(MC1_df,x<0), kernel=gaussK, xeval=0, bw=bw_ik, bwVar=1) - model.liblocpol_right <- locpol(y~x, data=subset(MC1_df,x>=0), kernel=gaussK, xeval=0, bw=bw_ik) - model.liblocpol_right_triK <- locpol(y~x, data=subset(MC1_df,x>=0), kernel=TrianK, xeval=0, bw=bw_ik) - - model_locpol_li <- list(liblocpol_both=model.liblocpol_both, - liblocpol_left=model.liblocpol_left, - liblocpol_left_a=model.liblocpol_left_a, - liblocpol_right=model.liblocpol_right) - -se.locpol <- function(x) sqrt(x$CIwidth * x$lpFit$var/x$lpFit$xDen) - -## Compare se of np and locpol on full, left and right: -round(sapply(model_locpol_li, function(x) c(fit=fitted(x), se.fit=se.locpol(x))),9) -round(sapply(pred_li, function(x) c(fit=x$fit, se.fit=x$se.fit)),9) - - -## Compare se of np and locpol on full: - a<- all_var(x=MC1_df$x, y=MC1_df$y, point=0, bw=bw_ik, return="all") - aa<- all_var(x=MC1_df$x, y=MC1_df$y, point=0, bw=bw_ik, eachSide=FALSE, return="all") - loc_right <- c(se.locpol(model.liblocpol_right_triK), model.liblocpol_right_triK$CIwidth, model.liblocpol_right_triK$lpFit$xDen,model.liblocpol_right_triK$lpFit$var) - loc_both <- c(se.locpol(model.liblocpol_both_triK), model.liblocpol_both_triK$CIwidth, model.liblocpol_both_triK$lpFit$xDen,model.liblocpol_both_triK$lpFit$var) - -pred_np -model.np$merr - -rbind(a, loc_right, aa, loc_both) - -computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=1), lower=0, upper=Inf)/(nrow(MC1_df)*bw_ik) -computeRK(equivKernel(TrianK, nu=0, deg=1, lower=-1, upper=1), lower=-Inf, upper=Inf)/(nrow(MC1_df)*bw_ik) - -} \ No newline at end of file diff --git a/RDDtools/R/various_code.R b/RDDtools/R/various_code.R deleted file mode 100644 index 6ec771c..0000000 --- a/RDDtools/R/various_code.R +++ /dev/null @@ -1,17 +0,0 @@ -### MISC -is.even <- function (a) { - a%%2 == 0 -} - - -Kernel_tri <- function(X, center, bw) { - ifelse(abs(X - center) > bw, 0, 1 - (abs(X - center) / bw)) -} - -Kernel_uni <- function(X, center, bw) { - ifelse(abs(X - center) > bw, 0, 1) -} - -.onLoad <- function(libname, pkgname) - packageStartupMessage("\nRDDtools ", utils::packageVersion("RDDtools"), - "\nPLEASE NOTE THIS is currently only a development version. \nRun vignette('RDDtools') for the documentation") diff --git a/RDDtools/data/Lee2008.rda b/RDDtools/data/Lee2008.rda deleted file mode 100644 index df517b7..0000000 Binary files a/RDDtools/data/Lee2008.rda and /dev/null differ diff --git a/RDDtools/inst/doc/RDDtools.pdf b/RDDtools/inst/doc/RDDtools.pdf deleted file mode 100644 index c52136a..0000000 Binary files a/RDDtools/inst/doc/RDDtools.pdf and /dev/null differ diff --git a/RDDtools/man/Lee2008.Rd b/RDDtools/man/Lee2008.Rd deleted file mode 100644 index 52dd488..0000000 --- a/RDDtools/man/Lee2008.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\docType{data} -\name{Lee2008} -\alias{Lee2008} -\title{Dataset used in Lee (2008)} -\format{A data frame with 6558 observations and two variables: -\describe{ -\item{x}{Vote at election t-1} -\item{y}{Vote at election t} -}} -\source{ -Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} -} -\usage{ -Lee2008 -} -\description{ -U.S. House elections data -} -\examples{ -data(Lee2008) -RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) -summary(RDDlee) -plot(RDDlee) -} -\references{ -Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," -Review of Economic Studies (2012) 79, 933-959 - -Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, -\emph{Journal of Econometrics}, 142, 675-697 -} - diff --git a/RDDtools/man/RDDbw_IK.Rd b/RDDtools/man/RDDbw_IK.Rd deleted file mode 100644 index cbda82d..0000000 --- a/RDDtools/man/RDDbw_IK.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{RDDbw_IK} -\alias{RDDbw_IK} -\title{Imbens-Kalyanaraman Optimal Bandwidth Calculation} -\usage{ -RDDbw_IK(RDDobject, kernel = c("Triangular", "Uniform", "Normal")) -} -\arguments{ -\item{RDDobject}{of class RDDdata created by \code{\link{RDDdata}}} - -\item{kernel}{The type of kernel used: either \code{triangular} or \code{uniform}.} -} -\value{ -The optimal bandwidth -} -\description{ -Imbens-Kalyanaraman optimal bandwidth -for local linear regression in Regression discontinuity designs. -} -\examples{ -data(Lee2008) -rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -RDDbw_IK(rd) -} -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} -\references{ -Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," -Review of Economic Studies (2012) 79, 933-959 -} -\seealso{ -\code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) -} - diff --git a/RDDtools/man/RDDdata.Rd b/RDDtools/man/RDDdata.Rd deleted file mode 100644 index 903eaab..0000000 --- a/RDDtools/man/RDDdata.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{RDDdata} -\alias{RDDdata} -\title{Construct RDDdata} -\usage{ -RDDdata(y, x, covar, cutpoint, z, labels, data) -} -\arguments{ -\item{x}{Forcing variable} - -\item{y}{Output} - -\item{covar}{Exogeneous variables} - -\item{cutpoint}{Cutpoint} - -\item{labels}{Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently.} - -\item{data}{A data-frame for the \code{x} and \code{y} variables. If this is provided, -the column names can be entered directly for argument \code{x} and \code{y}} - -\item{z}{Assignment variable for the fuzzy case.} -} -\value{ -Object of class \code{RDDdata}, inheriting from \code{data.frame} -} -\description{ -Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. -} -\examples{ -data(Lee2008) -rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) - -# The print() function is the same as the print.data.frame: -rd - -# The summary() and plot() function are specific to RDDdata -summary(rd) -plot(rd) -} -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} - diff --git a/RDDtools/man/RDDtools-package.Rd b/RDDtools/man/RDDtools-package.Rd deleted file mode 100644 index 2f68ace..0000000 --- a/RDDtools/man/RDDtools-package.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\docType{package} -\name{RDDtools-package} -\alias{RDDtools} -\alias{RDDtools-package} -\title{Regression Discontinuity Design} -\description{ -Regression Discontinuity Design -} -\details{ -Provides function to do a comprehensive regression discontinuity analysis. -} -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} - diff --git a/RDDtools/man/ROT_bw.Rd b/RDDtools/man/ROT_bw.Rd deleted file mode 100644 index 9ae13e0..0000000 --- a/RDDtools/man/ROT_bw.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{ROT_bw} -\alias{ROT_bw} -\title{Bandwidth selector} -\usage{ -ROT_bw(object) -} -\arguments{ -\item{object}{object of class RDDdata} -} -\description{ -implements dpill -} -\examples{ -#No discontinuity -} -\author{ -Drew Dimmery <\email{drewd@nyu.edu}> -} -\references{ -McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005} -} - diff --git a/RDDtools/man/as.lm.Rd b/RDDtools/man/as.lm.Rd deleted file mode 100644 index 219a196..0000000 --- a/RDDtools/man/as.lm.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{as.lm} -\alias{as.lm} -\title{Convert a rdd object to lm} -\usage{ -as.lm(x) -} -\arguments{ -\item{x}{An object to convert to lm} -} -\value{ -An object of class \code{lm} -} -\description{ -Convert a rdd object to lm -} -\examples{ -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -reg_para_lm <- as.lm(reg_para) -reg_para_lm -plot(reg_para_lm, which=4) -} -\seealso{ -\code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}. -} - diff --git a/RDDtools/man/as.npregbw.Rd b/RDDtools/man/as.npregbw.Rd deleted file mode 100644 index 471b91e..0000000 --- a/RDDtools/man/as.npregbw.Rd +++ /dev/null @@ -1,48 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{as.npregbw} -\alias{as.npreg} -\alias{as.npregbw} -\title{Convert an RDDreg object to a \code{npreg} object} -\usage{ -as.npregbw(x, ...) - -as.npreg(x, ...) -} -\arguments{ -\item{x}{Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}}} - -\item{\ldots}{Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}}} -} -\value{ -An object of class \code{npreg} or \code{npregbw} -} -\description{ -Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np} -} -\details{ -This function converts an RDDreg object into an \code{npreg} object from package \code{np} -Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one. -Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg -proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the univariate one -is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. -} -\examples{ -# Estimate ususal RDDreg: - data(Lee2008) - Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) - -## Convert to npreg: - reg_nonpara_np <- as.npreg(reg_nonpara) - reg_nonpara_np - RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) - -## Compare with result obtained with a Gaussian kernel: - bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) - reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) - all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) -} -\seealso{ -\code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}. -} - diff --git a/RDDtools/man/plotSensi.Rd b/RDDtools/man/plotSensi.Rd deleted file mode 100644 index 162db17..0000000 --- a/RDDtools/man/plotSensi.Rd +++ /dev/null @@ -1,68 +0,0 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{plotSensi} -\alias{plotSensi} -\alias{plotSensi.RDDreg_lm} -\alias{plotSensi.RDDreg_np} -\title{Plot the sensitivity to the bandwidth} -\usage{ -plotSensi(RDDregobject, from, to, by = 0.01, level = 0.95, - output = c("data", "ggplot"), plot = TRUE, ...) - -\method{plotSensi}{RDDreg_np}(RDDregobject, from, to, by = 0.05, - level = 0.95, output = c("data", "ggplot"), plot = TRUE, - device = c("ggplot", "base"), vcov. = NULL, ...) - -\method{plotSensi}{RDDreg_lm}(RDDregobject, from, to, by = 0.05, - level = 0.95, output = c("data", "ggplot"), plot = TRUE, order, - type = c("colour", "facet"), ...) -} -\arguments{ -\item{RDDregobject}{object of a RDD regression, from either \code{\link{RDDreg_lm}} or \code{\link{RDDreg_np}}} - -\item{from}{First bandwidth point. Default value is max(1e-3, bw-0.1)} - -\item{to}{Last bandwidth point. Default value is bw+0.1} - -\item{by}{Increments in the \code{from} \code{to} sequence} - -\item{level}{Level of the confidence interval} - -\item{order}{For parametric models (from \code{\link{RDDreg_lm}}), the order of the polynomial.} - -\item{type}{For parametric models (from \code{\link{RDDreg_lm}}) whether different orders are represented as different colour or as different facets.} - -\item{device}{Whether to draw a base or a ggplot graph.} - -\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} - -\item{plot}{Whether to actually plot the data.} - -\item{\ldots}{Further arguments passed to specific methods} - -\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}} -} -\value{ -A data frame containing the bandwidths and corresponding estimates and confidence intervals. -} -\description{ -Draw a plot showing the LATE estimates depending on multiple bandwidths -} -\examples{ -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - -#Non-parametric estimate -bw_ik <- RDDbw_IK(Lee2008_rdd) -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik) -plotSensi(reg_nonpara) -plotSensi(reg_nonpara, device="base") - -#Parametric estimate: -reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4, bw=bw_ik) -plotSensi(reg_para_ik) -plotSensi(reg_para_ik, type="facet") -} -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} - diff --git a/RDDtools/tests/RDDpred.R b/RDDtools/tests/RDDpred.R deleted file mode 100644 index 4a85245..0000000 --- a/RDDtools/tests/RDDpred.R +++ /dev/null @@ -1,199 +0,0 @@ -library(RDDtools) -library(car) - - -#### DATA -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - -n_Lee <- nrow(Lee2008) - -set.seed(123) -Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) -Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) - -#### REGS -bw_IK <- RDDbw_IK(Lee2008_rdd_z) -w_IK <- RDDtools:::Kernel_tri(Lee2008_rdd_z$x, 0, bw_IK) -reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) -reg_para4_cov_slSep_W <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"), weights=w_IK) -reg_np_cov <- RDDreg_np(RDDobject=Lee2008_rdd_z, covariates="z1", bw=bw_IK, inference="lm") - - - - -reg_para4_cov_slSep_2Z <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1+z2", covar.opt=list(slope="separate")) - -reg_li <- list( reg_para4_cov_slSep=reg_para4_cov_slSep, - reg_para4_cov_slSep_W=reg_para4_cov_slSep_W, - reg_np_cov=reg_np_cov, - reg_para4_cov_slSep_2Z=reg_para4_cov_slSep_2Z) - -checkRDDmean <- function(x, n=5){ - covDF <- model.frame(x) - zDF <- grep("z", colnames(covDF), value=FALSE) - hasD <- zDF[-grep(":", colnames(covDF)[zDF])] - - DF_1 <- covDF[1:n,hasD, drop=FALSE] - DF_2 <- data.frame(t(colMeans(DF_1))) - - pred_1 <- RDDpred(x, covdata=DF_1, stat="mean") - pred_2 <- RDDpred(x, covdata=DF_2) - all.equal(pred_1, pred_2, check.attributes=FALSE) -} - -sapply(reg_li, checkRDDmean) - -sapply(reg_li, function(x) all.equal(unlist(RDDpred(x)),RDDcoef(x, allInfo=TRUE)[1,1:2], check.attributes=FALSE)) - - -# -# reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -# print(reg_para) -# summary(reg_para) -# plot(reg_para) -# -# formula(reg_para) -# -# update(as.formula("y ~ D + `x^1` + `x^1_right`"), reg_para) -# reg_para_l <- as.lm(reg_para) -# # update(reg_para_l, y ~ D + `x^1` + `x^1_right`) -# -# mf <- model.frame(reg_para) -# -# lm("y ~ D + `x^1` + `x^1_right`", mf) -# a<-lm("y ~ -1 + D +I(1-D) + `x^1` + `x^1_right`", mf) -# diff(coef(a)[2:1]) -# coef(reg_para) -# -# # deltaMethod(a, "I(1-D) - D", parameterNames=paste("a", 1:4, sep="")) -# deltaMethod(a, "a1 - a2", parameterNames=paste("a", 1:4, sep="")) -# coef(summary(reg_para))[2,] -# -# reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) -# -mf_2 <- model.frame(reg_para4_cov_slSep) -# formula(reg_para4_cov_slSep) -# -aa <- lm("y ~ D + `x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) -aaa <- lm("y ~ -1+ D + I(1-D)+`x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) -# -# diff(coef(aaa)[2:1]) -# RDDpred(reg_para4_cov_slSep) -# RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) -# -# RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) - -## compare RDDpred and Delta at 1: -rdd_p_1 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=1)) -delta_1 <- deltaMethod(aaa, "a1 - a2 + a12", parameterNames=paste("a", 1:12, sep="")) -rdd_p_1 -delta_1 -all.equal(unlist(rdd_p_1), drop(as.matrix(delta_1[1:2])), check.attributes=FALSE) - -## compare RDDpred and Delta at 0: -rdd_p_0 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) -rdd_c_0 <- RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) -delta_0 <- deltaMethod(aaa, "a1 - a2 ", parameterNames=paste("a", 1:12, sep="")) -rdd_p_0 -rdd_c_0 -delta_0 -all.equal(unlist(rdd_p_0), drop(as.matrix(delta_0[1:2])), check.attributes=FALSE) -all.equal(unlist(rdd_p_0), drop(as.matrix(rdd_c_0[1:2])), check.attributes=FALSE) - -## compare RDDpred and Delta at 2 points: -rdd_p_01_AGG <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.5))) -rdd_p_01_all <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1))) -rdd_p_01_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="sum") -rdd_p_01_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="mean") - -delta_01_S <- deltaMethod(aaa, "2*(a1 - a2) +1*a12", parameterNames=paste("a", 1:12, sep="")) -delta_01_M <- deltaMethod(aaa, "(2*(a1 - a2) +1*a12)/2", parameterNames=paste("a", 1:12, sep="")) -delta_01_S -delta_01_M - -all(delta_01_S/2==delta_01_M) - -## compare individuals (stat=ident) -all.equal(rdd_p_01_all$fit, c(delta_0[1,1], delta_1[1,1])) -all.equal(rdd_p_01_all$se.fit, c(delta_0[1,2], delta_1[1,2])) -c(rdd_p_01_M$fit/2, rdd_p_01_AGG$fit) - -## compare sum (stat=sum) -all.equal(unlist(rdd_p_01_S), drop(as.matrix(delta_01_S[1:2])), check.attributes=FALSE) - -## compare mean (stat=mean) -all.equal(unlist(rdd_p_01_M), drop(as.matrix(delta_01_M[1:2])), check.attributes=FALSE) -all.equal(rdd_p_01_M$fit, rdd_p_01_S$fit/2) -all.equal(rdd_p_01_M$fit, rdd_p_01_AGG$fit, check.attributes=FALSE) -all.equal(rdd_p_01_M$se.fit, rdd_p_01_AGG$se.fit, check.attributes=FALSE) - -## compare RDDpred and Delta at 5 first points: -ind_z_pos <- head(which(Lee2008_rdd_z$z1>0),5) - -rdd_p_01_5z_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="sum") -rdd_p_01_5z_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[1:5])), stat="sum") -rdd_p_01_5zPos_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[ind_z_pos]), stat="sum") -rdd_p_01_5zPos_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[ind_z_pos])), stat="sum") -rdd_p_01_5z_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="mean") -rdd_p_01_5z_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1[1:5])), stat="mean") -rdd_p_01_ALLz_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1), stat="mean") -rdd_p_01_ALLz_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1)), stat="mean") - -del <- function(x, mean=FALSE) { - n <- length(x) - res <- paste(c(paste(n, "*(a1-a2) "), paste(x, "*a12", sep="")), collapse=" +") - su <- sum(x) - sig <- if(sign(su)==1) "+" else NULL - res <- paste(n, "*(a1-a2) ", sig, su, "*a12", sep="") - if(mean) res <- paste("(", res, ")/", n, sep="") - res -} - -del(x=Lee2008_rdd_z$z1[1:5]) -delta_01_5z_S <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5]), parameterNames=paste("a", 1:12, sep=""), func="RDD") -delta_01_5z_M <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5], mean=TRUE), parameterNames=paste("a", 1:12, sep=""), func="RDD") - -all.equal(unlist(rdd_p_01_5z_S), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) -all.equal(unlist(rdd_p_01_5z_Sb), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) -all.equal(unlist(rdd_p_01_5z_M), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) -all.equal(unlist(rdd_p_01_5z_Mb), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) - -## All z: -# all.equal(rdd_p_01_ALLz_M, rdd_p_01_ALLz_Mb, check.attributes=FALSE) - -#### Weighted mean!! -w_5 <- c(0.1, 0.2, 0.4, 0.2, 0.1) -w <- c(0.4, 0.6) -rdd_p_01_Sid <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="identity") -wm <- weighted.mean(rdd_p_01_Sid$fit , w=w) - -delta_2z_w <- deltaMethod(aaa, "0.4*(a1 - a2) + 0.4*0.2*a12+0.6*(a1 - a2) + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) -delta_2z_w2 <- deltaMethod(aaa, "1*(a1 - a2) + 0.4*0.2*a12 + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) -delta_2z_w3 <- deltaMethod(aaa, "1*(a1 - a2) + a12*(0.4*0.2 + 0.6)", parameterNames=paste("a", 1:12, sep="")) -all(delta_2z_w==delta_2z_w2) -all.equal(delta_2z_w, delta_2z_w3, check.attributes=FALSE) -all.equal(delta_2z_w[1,1],wm) - -rdd_p_01_W_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="sum", weights=w) -rdd_p_01_W_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="mean", weights=w) -all.equal(rdd_p_01_W_M$fit,wm) - -all.equal(unlist(rdd_p_01_W_S), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) -all.equal(unlist(rdd_p_01_W_M), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) - - -###### 2 Z: -df_2Z_5z <- Lee2008_rdd_z[1:5, c("z1", "z2")] -df_2Z_5z_M <- data.frame(t(colMeans(df_2Z_5z))) -df_2Z_5z_Mw <- data.frame(t(apply(df_2Z_5z, 2, weighted.mean, w=w_5))) - -rdd_p_sZ_5z_S <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="sum") -rdd_p_sZ_5z_M <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean") -rdd_p_sZ_5z_Mb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_M, stat="sum") - -rdd_p_sZ_5z_MW <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean", weights=w_5) -rdd_p_sZ_5z_MWb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_Mw, stat="sum") - -all.equal(rdd_p_sZ_5z_M, rdd_p_sZ_5z_Mb, check.attributes=FALSE) -all.equal(rdd_p_sZ_5z_MW, rdd_p_sZ_5z_MWb, check.attributes=FALSE) diff --git a/RDDtools/tests/RDDpred.Rout.save b/RDDtools/tests/RDDpred.Rout.save deleted file mode 100644 index df22569..0000000 --- a/RDDtools/tests/RDDpred.Rout.save +++ /dev/null @@ -1,307 +0,0 @@ - -R version 2.15.2 (2012-10-26) -- "Trick or Treat" -Copyright (C) 2012 The R Foundation for Statistical Computing -ISBN 3-900051-07-0 -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> library(RDDtools) -Loading required package: AER -Loading required package: car -Loading required package: lmtest -Loading required package: zoo - -Attaching package: 'zoo' - -The following objects are masked from 'package:base': - - as.Date, as.Date.numeric - -Loading required package: sandwich -Loading required package: survival -Loading required package: splines -KernSmooth 2.23 loaded -Copyright M. P. Wand 1997-2009 - -RDDtools 0.22 -PLEASE NOTE THIS is currently only a development version. -Run vignette('RDDtools') for the documentation -> library(car) -> -> -> #### DATA -> data(Lee2008) -> Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -> -> n_Lee <- nrow(Lee2008) -> -> set.seed(123) -> Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) -> Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) -> -> #### REGS -> bw_IK <- RDDbw_IK(Lee2008_rdd_z) -> w_IK <- RDDtools:::Kernel_tri(Lee2008_rdd_z$x, 0, bw_IK) -> reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) -> reg_para4_cov_slSep_W <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"), weights=w_IK) -> reg_np_cov <- RDDreg_np(RDDobject=Lee2008_rdd_z, covariates="z1", bw=bw_IK, inference="lm") -Warning message: -In RDDreg_np(RDDobject = Lee2008_rdd_z, covariates = "z1", bw = bw_IK, : - covariates not fully implemented for non-para reg -> -> -> -> -> reg_para4_cov_slSep_2Z <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1+z2", covar.opt=list(slope="separate")) -> -> reg_li <- list( reg_para4_cov_slSep=reg_para4_cov_slSep, -+ reg_para4_cov_slSep_W=reg_para4_cov_slSep_W, -+ reg_np_cov=reg_np_cov, -+ reg_para4_cov_slSep_2Z=reg_para4_cov_slSep_2Z) -> -> checkRDDmean <- function(x, n=5){ -+ covDF <- model.frame(x) -+ zDF <- grep("z", colnames(covDF), value=FALSE) -+ hasD <- zDF[-grep(":", colnames(covDF)[zDF])] -+ -+ DF_1 <- covDF[1:n,hasD, drop=FALSE] -+ DF_2 <- data.frame(t(colMeans(DF_1))) -+ -+ pred_1 <- RDDpred(x, covdata=DF_1, stat="mean") -+ pred_2 <- RDDpred(x, covdata=DF_2) -+ all.equal(pred_1, pred_2, check.attributes=FALSE) -+ } -> -> sapply(reg_li, checkRDDmean) - reg_para4_cov_slSep reg_para4_cov_slSep_W reg_np_cov - TRUE TRUE TRUE -reg_para4_cov_slSep_2Z - TRUE -> -> sapply(reg_li, function(x) all.equal(unlist(RDDpred(x)),RDDcoef(x, allInfo=TRUE)[1,1:2], check.attributes=FALSE)) - reg_para4_cov_slSep reg_para4_cov_slSep_W reg_np_cov - TRUE TRUE TRUE -reg_para4_cov_slSep_2Z - TRUE -> -> -> # -> # reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -> # print(reg_para) -> # summary(reg_para) -> # plot(reg_para) -> # -> # formula(reg_para) -> # -> # update(as.formula("y ~ D + `x^1` + `x^1_right`"), reg_para) -> # reg_para_l <- as.lm(reg_para) -> # # update(reg_para_l, y ~ D + `x^1` + `x^1_right`) -> # -> # mf <- model.frame(reg_para) -> # -> # lm("y ~ D + `x^1` + `x^1_right`", mf) -> # a<-lm("y ~ -1 + D +I(1-D) + `x^1` + `x^1_right`", mf) -> # diff(coef(a)[2:1]) -> # coef(reg_para) -> # -> # # deltaMethod(a, "I(1-D) - D", parameterNames=paste("a", 1:4, sep="")) -> # deltaMethod(a, "a1 - a2", parameterNames=paste("a", 1:4, sep="")) -> # coef(summary(reg_para))[2,] -> # -> # reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) -> # -> mf_2 <- model.frame(reg_para4_cov_slSep) -> # formula(reg_para4_cov_slSep) -> # -> aa <- lm("y ~ D + `x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) -> aaa <- lm("y ~ -1+ D + I(1-D)+`x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) -> # -> # diff(coef(aaa)[2:1]) -> # RDDpred(reg_para4_cov_slSep) -> # RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) -> # -> # RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) -> -> ## compare RDDpred and Delta at 1: -> rdd_p_1 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=1)) -> delta_1 <- deltaMethod(aaa, "a1 - a2 + a12", parameterNames=paste("a", 1:12, sep="")) -> rdd_p_1 -$fit - 1 -0.07886429 - -$se.fit -[1] 0.01361366 - -> delta_1 - Estimate SE -a1 - a2 + a12 0.07886429 0.01361366 -> all.equal(unlist(rdd_p_1), drop(as.matrix(delta_1[1:2])), check.attributes=FALSE) -[1] TRUE -> -> ## compare RDDpred and Delta at 0: -> rdd_p_0 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) -> rdd_c_0 <- RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) -> delta_0 <- deltaMethod(aaa, "a1 - a2 ", parameterNames=paste("a", 1:12, sep="")) -> rdd_p_0 -$fit - 1 -0.07644637 - -$se.fit -[1] 0.01324368 - -> rdd_c_0 - Estimate Std. Error t value Pr(>|t|) -D 0.07644637 0.01324368 5.772289 8.178184e-09 -> delta_0 - Estimate SE -a1 - a2 0.07644637 0.01324368 -> all.equal(unlist(rdd_p_0), drop(as.matrix(delta_0[1:2])), check.attributes=FALSE) -[1] TRUE -> all.equal(unlist(rdd_p_0), drop(as.matrix(rdd_c_0[1:2])), check.attributes=FALSE) -[1] TRUE -> -> ## compare RDDpred and Delta at 2 points: -> rdd_p_01_AGG <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.5))) -> rdd_p_01_all <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1))) -> rdd_p_01_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="sum") -> rdd_p_01_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="mean") -> -> delta_01_S <- deltaMethod(aaa, "2*(a1 - a2) +1*a12", parameterNames=paste("a", 1:12, sep="")) -> delta_01_M <- deltaMethod(aaa, "(2*(a1 - a2) +1*a12)/2", parameterNames=paste("a", 1:12, sep="")) -> delta_01_S - Estimate SE -2 * (a1 - a2) + 1 * a12 0.1553107 0.02664323 -> delta_01_M - Estimate SE -(2 * (a1 - a2) + 1 * a12)/2 0.07765533 0.01332161 -> -> all(delta_01_S/2==delta_01_M) -[1] TRUE -> -> ## compare individuals (stat=ident) -> all.equal(rdd_p_01_all$fit, c(delta_0[1,1], delta_1[1,1])) -[1] TRUE -> all.equal(rdd_p_01_all$se.fit, c(delta_0[1,2], delta_1[1,2])) -[1] TRUE -> c(rdd_p_01_M$fit/2, rdd_p_01_AGG$fit) - 1 -0.03882766 0.07765533 -> -> ## compare sum (stat=sum) -> all.equal(unlist(rdd_p_01_S), drop(as.matrix(delta_01_S[1:2])), check.attributes=FALSE) -[1] TRUE -> -> ## compare mean (stat=mean) -> all.equal(unlist(rdd_p_01_M), drop(as.matrix(delta_01_M[1:2])), check.attributes=FALSE) -[1] TRUE -> all.equal(rdd_p_01_M$fit, rdd_p_01_S$fit/2) -[1] TRUE -> all.equal(rdd_p_01_M$fit, rdd_p_01_AGG$fit, check.attributes=FALSE) -[1] TRUE -> all.equal(rdd_p_01_M$se.fit, rdd_p_01_AGG$se.fit, check.attributes=FALSE) -[1] TRUE -> -> ## compare RDDpred and Delta at 5 first points: -> ind_z_pos <- head(which(Lee2008_rdd_z$z1>0),5) -> -> rdd_p_01_5z_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="sum") -> rdd_p_01_5z_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[1:5])), stat="sum") -> rdd_p_01_5zPos_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[ind_z_pos]), stat="sum") -> rdd_p_01_5zPos_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[ind_z_pos])), stat="sum") -> rdd_p_01_5z_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="mean") -> rdd_p_01_5z_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1[1:5])), stat="mean") -> rdd_p_01_ALLz_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1), stat="mean") -> rdd_p_01_ALLz_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1)), stat="mean") -> -> del <- function(x, mean=FALSE) { -+ n <- length(x) -+ res <- paste(c(paste(n, "*(a1-a2) "), paste(x, "*a12", sep="")), collapse=" +") -+ su <- sum(x) -+ sig <- if(sign(su)==1) "+" else NULL -+ res <- paste(n, "*(a1-a2) ", sig, su, "*a12", sep="") -+ if(mean) res <- paste("(", res, ")/", n, sep="") -+ res -+ } -> -> del(x=Lee2008_rdd_z$z1[1:5]) -[1] "5*(a1-a2) +0.967851304699154*a12" -> delta_01_5z_S <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5]), parameterNames=paste("a", 1:12, sep=""), func="RDD") -> delta_01_5z_M <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5], mean=TRUE), parameterNames=paste("a", 1:12, sep=""), func="RDD") -> -> all.equal(unlist(rdd_p_01_5z_S), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) -[1] TRUE -> all.equal(unlist(rdd_p_01_5z_Sb), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) -[1] "Mean relative difference: 3.880226" -> all.equal(unlist(rdd_p_01_5z_M), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) -[1] TRUE -> all.equal(unlist(rdd_p_01_5z_Mb), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) -[1] TRUE -> -> ## All z: -> # all.equal(rdd_p_01_ALLz_M, rdd_p_01_ALLz_Mb, check.attributes=FALSE) -> -> #### Weighted mean!! -> w_5 <- c(0.1, 0.2, 0.4, 0.2, 0.1) -> w <- c(0.4, 0.6) -> rdd_p_01_Sid <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="identity") -> wm <- weighted.mean(rdd_p_01_Sid$fit , w=w) -> -> delta_2z_w <- deltaMethod(aaa, "0.4*(a1 - a2) + 0.4*0.2*a12+0.6*(a1 - a2) + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) -> delta_2z_w2 <- deltaMethod(aaa, "1*(a1 - a2) + 0.4*0.2*a12 + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) -> delta_2z_w3 <- deltaMethod(aaa, "1*(a1 - a2) + a12*(0.4*0.2 + 0.6)", parameterNames=paste("a", 1:12, sep="")) -> all(delta_2z_w==delta_2z_w2) -[1] TRUE -> all.equal(delta_2z_w, delta_2z_w3, check.attributes=FALSE) -[1] TRUE -> all.equal(delta_2z_w[1,1],wm) -[1] TRUE -> -> rdd_p_01_W_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="sum", weights=w) -Warning message: -In RDDpred(reg_para4_cov_slSep, covdata = data.frame(z1 = c(0.2, : - Providing weights for a sum makes little sense?! -> rdd_p_01_W_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="mean", weights=w) -> all.equal(rdd_p_01_W_M$fit,wm) -[1] TRUE -> -> all.equal(unlist(rdd_p_01_W_S), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) -[1] TRUE -> all.equal(unlist(rdd_p_01_W_M), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) -[1] TRUE -> -> -> ###### 2 Z: -> df_2Z_5z <- Lee2008_rdd_z[1:5, c("z1", "z2")] -> df_2Z_5z_M <- data.frame(t(colMeans(df_2Z_5z))) -> df_2Z_5z_Mw <- data.frame(t(apply(df_2Z_5z, 2, weighted.mean, w=w_5))) -> -> rdd_p_sZ_5z_S <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="sum") -> rdd_p_sZ_5z_M <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean") -> rdd_p_sZ_5z_Mb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_M, stat="sum") -> -> rdd_p_sZ_5z_MW <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean", weights=w_5) -> rdd_p_sZ_5z_MWb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_Mw, stat="sum") -> -> all.equal(rdd_p_sZ_5z_M, rdd_p_sZ_5z_Mb, check.attributes=FALSE) -[1] TRUE -> all.equal(rdd_p_sZ_5z_MW, rdd_p_sZ_5z_MWb, check.attributes=FALSE) -[1] TRUE -> -> proc.time() -utilisateur système écoulé - 3.550 0.650 4.251 diff --git a/RDDtools/tests/RDDtools_vs_rdd.R b/RDDtools/tests/RDDtools_vs_rdd.R deleted file mode 100644 index 910cc7c..0000000 --- a/RDDtools/tests/RDDtools_vs_rdd.R +++ /dev/null @@ -1,48 +0,0 @@ - -library(rdd) -library(RDDtools) - -set.seed(1234) -x<-runif(1000,-1,1) -cov<-rnorm(1000) -y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000) - -RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov) - -### Simple estimation: -bw <- IKbandwidth(X=x, Y=y, cutpoint=0) -bw -rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]] -RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm") - -rdd_co <- coef(summary(rdd_mod)) -RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE) -rdd_co -RDDtools_co - -all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE) -all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE) - - -### Covariate estimation: -rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]] -RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate")) - -rdd_co_cov <- coef(summary(rdd_mod_cov)) -RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE) -rdd_co_cov -RDDtools_co_cov - -all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE) - -## Fuzzy -set.seed(123) -selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9)) -RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0) - -RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2) -rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]] - -all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1]) -all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE) - diff --git a/RDDtools/tests/RDDtools_vs_rdd.Rout.save b/RDDtools/tests/RDDtools_vs_rdd.Rout.save deleted file mode 100644 index 2c7973a..0000000 --- a/RDDtools/tests/RDDtools_vs_rdd.Rout.save +++ /dev/null @@ -1,123 +0,0 @@ - -R version 2.15.2 (2012-10-26) -- "Trick or Treat" -Copyright (C) 2012 The R Foundation for Statistical Computing -ISBN 3-900051-07-0 -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> -> library(rdd) -Loading required package: sandwich -Loading required package: lmtest -Loading required package: zoo - -Attaching package: 'zoo' - -The following objects are masked from 'package:base': - - as.Date, as.Date.numeric - -Loading required package: AER -Loading required package: car -Loading required package: survival -Loading required package: splines -Loading required package: Formula -> library(RDDtools) -KernSmooth 2.23 loaded -Copyright M. P. Wand 1997-2009 - -RDDtools 0.22 -PLEASE NOTE THIS is currently only a development version. -Run vignette('RDDtools') for the documentation -> -> set.seed(1234) -> x<-runif(1000,-1,1) -> cov<-rnorm(1000) -> y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000) -> -> RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov) -> -> ### Simple estimation: -> bw <- IKbandwidth(X=x, Y=y, cutpoint=0) -> bw -[1] 0.6442702 -> rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]] -> RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm") -> -> rdd_co <- coef(summary(rdd_mod)) -> RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE) -> rdd_co - Estimate Std. Error t value Pr(>|t|) -(Intercept) 2.3870103 0.3039002 7.8545857 1.670299e-14 -Tr 10.8995093 0.4071983 26.7670789 7.187232e-107 -Xl 0.3076565 1.1003584 0.2795966 7.798762e-01 -Xr 1.0007232 1.0724028 0.9331599 3.510850e-01 -> RDDtools_co - Estimate Std. Error t value Pr(>|t|) -(Intercept) 2.3870103 0.3039002 7.8545857 1.670299e-14 -D 10.8995093 0.4071983 26.7670789 7.187232e-107 -x 0.3076565 1.1003584 0.2795966 7.798762e-01 -x_right 0.6930668 1.5365013 0.4510681 6.520914e-01 -> -> all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE) -[1] TRUE -> all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE) -[1] TRUE -> -> -> ### Covariate estimation: -> rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]] -> RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate")) -> -> rdd_co_cov <- coef(summary(rdd_mod_cov)) -> RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE) -> rdd_co_cov - Estimate Std. Error t value Pr(>|t|) -(Intercept) 2.90737195 0.09660411 30.0957385 5.649434e-142 -Tr 10.20606095 0.13062887 78.1302094 0.000000e+00 -Xl 1.81515024 0.16640546 10.9079970 3.027120e-26 -Xr 1.86396889 0.15068992 12.3695656 8.602692e-33 -cov 3.04154403 0.05189778 58.6064361 0.000000e+00 -Tr:cov -0.03728164 0.06948406 -0.5365496 5.916988e-01 -> RDDtools_co_cov - Estimate Std. Error t value Pr(>|t|) -(Intercept) 2.90737195 0.09660411 30.0957385 5.649434e-142 -D 10.20606095 0.13062887 78.1302094 0.000000e+00 -x 1.81515024 0.16640546 10.9079970 3.027120e-26 -x_right 0.04881865 0.22449550 0.2174594 8.278950e-01 -cov 3.04154403 0.05189778 58.6064361 0.000000e+00 -`cov:D` -0.03728164 0.06948406 -0.5365496 5.916988e-01 -> -> all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE) -[1] TRUE -> -> ## Fuzzy -> set.seed(123) -> selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9)) -> RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0) -> -> RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2) -> rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]] -> -> all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1]) -[1] TRUE -> all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE) -[1] TRUE -> -> -> proc.time() -utilisateur système écoulé - 1.248 0.076 1.325 diff --git a/RDDtools/tests/packageDemo.R b/RDDtools/tests/packageDemo.R deleted file mode 100644 index 547ed88..0000000 --- a/RDDtools/tests/packageDemo.R +++ /dev/null @@ -1,248 +0,0 @@ -library(RDDtools) - - - - -############################################ -### STEP 0: Data Manipulation -############################################ -data(Lee2008) -head(Lee2008) - -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) - -head(Lee2008_rdd) - -summary(Lee2008_rdd) - -## With covariates - -n_Lee <- nrow(Lee2008) - -set.seed(123) -Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) -Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) - -head(Lee2008_rdd_z ) -summary(Lee2008_rdd_z ) - -### Fuzzy -set.seed(123) -ins <- rbinom(n_Lee, 1, prob=ifelse(Lee2008$x<0, 0.1, 0.9)) -Lee2008_rdd_ins <- RDDdata(y=Lee2008$y, x=Lee2008$x, z=ins,cutpoint=0) -table(Lee2008$x<0, ins==0) - -############################################ -### STEP 2: Graphical inspection -############################################ - -### Plot -plot(Lee2008_rdd) -plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04)) -plot(Lee2008_rdd, nplot=1, h=0.1) - -plot(Lee2008_rdd, xlim=c(-0.5, 0.5)) - -# plot(Lee2008_rdd, xlim=c(-0.5, 0.5), type="ggplot") - - -############################################ -### STEP 2: Regression -############################################ - -## few bandwidths: -RDDbw_RSW(Lee2008_rdd) -RDDbw_IK(Lee2008_rdd) - - -###### Parametric regression ###### -# Simple polynomial of order 1: -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -print(reg_para) -summary(reg_para) -plot(reg_para) - -all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check.attributes=FALSE) - -## Difference in means regression: -# Simple polynomial of order 0: -reg_para_0 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=0) -print(reg_para_0) -summary(reg_para_0) -plot(reg_para_0) - - -## Simple polynomial of order 4: -reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) -reg_para4 -plot(reg_para4) -all.equal(unlist(RDDpred(reg_para4)), RDDcoef(reg_para4, allInfo=TRUE)[1:2], check.attributes=FALSE) - -## Restrict sample to bandwidth area: -bw_ik <- RDDbw_IK(Lee2008_rdd) -reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) -reg_para_ik -plot(reg_para_ik) - -all.equal(unlist(RDDpred(reg_para_ik)), RDDcoef(reg_para_ik, allInfo=TRUE)[1:2], check.attributes=FALSE) - -## Fuzzy reg -reg_para_fuzz <- RDDreg_lm(Lee2008_rdd_ins) -coef(reg_para_fuzz) -summary(reg_para_fuzz) - -## Covariates: -reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".") -reg_para4_cov -summary(reg_para4_cov) - -reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(slope="separate")) -summary(reg_para4_cov_slSep) -RDDpred(reg_para4_cov_slSep) -RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 0.2, 0.2), z2=c(0,20,20), z3b=c(0,1,0), z3c=c(0,0,1))) - - -reg_para4_cov_startR <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual")) -reg_para4_cov_startR -summary(reg_para4_cov_startR) - -plot(reg_para4_cov) - -reg_para4_cov_startR_sl2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual", slope="separate")) -summary(reg_para4_cov_startR_sl2) - -reg_para4_cov_2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z3+I(z1^2)") -reg_para4_cov_2 -summary(reg_para4_cov_2) - -###### Non-parametric regression ###### -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -print(reg_nonpara) -summary(reg_nonpara) -plot(x=reg_nonpara) - -reg_nonpara_inflm <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") -print(reg_nonpara_inflm) -summary(reg_nonpara_inflm) -plot(x=reg_nonpara_inflm) - - -reg_nonpara_sameSl <- RDDreg_np(RDDobject=Lee2008_rdd, slope="same") -print(reg_nonpara_sameSl) -summary(reg_nonpara_sameSl) - - -###### PLOT SENSI ###### -plSe_reg_para <- plotSensi(reg_para_ik, order=4:6) -plSe_reg_para_fac <- plotSensi(reg_para_ik, type="facet", order=4:6) -plSe_reg_para -plSe_reg_para_fac - - -plSe_reg_nonpara <- plotSensi(reg_nonpara) -plSe_reg_nonpara - -plSe_reg_nonpara_HC <- plotSensi(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) -plSe_reg_nonpara_HC - -plSe_reg_para_0 <- plotSensi(reg_para_0, plot=FALSE) -plSe_reg_para_0 - -plSe_reg_para_0_gg <- plotSensi(reg_para_0, plot=FALSE, output="ggplot") -str(plSe_reg_para_0_gg) - - -###### Post-inference: ###### - -clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="df-adj") -clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="HC") - - -############################################ -### STEP 3: Validty tests -############################################ - -## Placebo test: -placeb_dat_reg_nonpara <- computePlacebo(reg_nonpara) - -plotPlacebo(placeb_dat_reg_nonpara) -plotPlacebo(placeb_dat_reg_nonpara, device="base") - - -plotPlaceboDens(placeb_dat_reg_nonpara) - -## check invisible return: -ptPl_reg_nonpara <- plotPlacebo(reg_nonpara, plot=FALSE) -ptPl_reg_nonpara - -ptPl_reg_nonpara2 <- plotPlacebo(reg_nonpara, plot=FALSE, output="ggplot") -ptPl_reg_nonpara2 - -# with HC: -ptPl_reg_nonpara_HC <- plotPlacebo(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) -ptPl_reg_nonpara_HC - -ptPl_reg_para_0 <- plotPlacebo(reg_para_0) -ptPl_reg_para_0 - - - -## density tests -dens_test(Lee2008_rdd) -dens_test(reg_para_0, plot=FALSE) -dens_test(reg_nonpara, plot=FALSE)$test.output[c("theta", "se", "z", "p", "binsize", "bw", "cutpoint")] - - -## Covariates tests -covarTest_mean(Lee2008_rdd_z) -covarTest_mean(Lee2008_rdd_z, bw=0.1) -covarTest_dis(Lee2008_rdd_z) -covarTest_dis(Lee2008_rdd_z, bw=0.1) - -covarTest_mean(reg_para4_cov) -covarTest_dis(reg_para4_cov) -#### as npreg - reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE) - reg_nonpara_np - RDDcoef(reg_nonpara_np) - RDDcoef(reg_nonpara_np, allCo=TRUE) - RDDcoef(reg_nonpara_np, allInfo=TRUE) - RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE) - -## Compare with result obtained with a Gaussian kernel: - bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) - reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) - all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check.attributes=FALSE) - - - -#### methods - -regs_all <- list(reg_para=reg_para, - reg_para_0=reg_para_0, - reg_para4=reg_para4, - reg_para_ik=reg_para_ik, - reg_para_fuzz=reg_para_fuzz, - reg_para4_cov=reg_para4_cov, - reg_para4_cov_slSep=reg_para4_cov_slSep, - reg_para4_cov_startR=reg_para4_cov_startR, - reg_para4_cov_startR_sl2=reg_para4_cov_startR_sl2, - reg_nonpara=reg_nonpara, - reg_nonpara_inflm=reg_nonpara_inflm, - reg_nonpara_sameSl=reg_nonpara_sameSl) -capply <- function(x){ - n.obs <- sapply(x, length) - seq.max <- seq_len(max(n.obs)) - t(sapply(x, "[", i = seq.max)) -} - -capply(lapply(regs_all, coef)) -sapply(regs_all, RDDcoef) -RDDpred_issue <- c("reg_para_0", "reg_para_fuzz", "reg_nonpara", "reg_nonpara_sameSl") -sapply(regs_all[!names(regs_all)%in%RDDpred_issue], RDDpred) - -sapply(regs_all, RDDtools:::getCutpoint) -lapply(regs_all, plotSensi, plot=FALSE) - -sapply(regs_all, function(x) dens_test(x, plot=FALSE)[c("p.value", "statistic", "estimate")]) - diff --git a/RDDtools/tests/packageDemo.Rout.save b/RDDtools/tests/packageDemo.Rout.save deleted file mode 100644 index 27473e0..0000000 --- a/RDDtools/tests/packageDemo.Rout.save +++ /dev/null @@ -1,1214 +0,0 @@ - -R version 2.15.2 (2012-10-26) -- "Trick or Treat" -Copyright (C) 2012 The R Foundation for Statistical Computing -ISBN 3-900051-07-0 -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> library(RDDtools) -Loading required package: AER -Loading required package: car -Loading required package: lmtest -Loading required package: zoo - -Attaching package: 'zoo' - -The following objects are masked from 'package:base': - - as.Date, as.Date.numeric - -Loading required package: sandwich -Loading required package: survival -Loading required package: splines -KernSmooth 2.23 loaded -Copyright M. P. Wand 1997-2009 - -RDDtools 0.22 -PLEASE NOTE THIS is currently only a development version. -Run vignette('RDDtools') for the documentation -> -> -> -> -> ############################################ -> ### STEP 0: Data Manipulation -> ############################################ -> data(Lee2008) -> head(Lee2008) - x y -1 0.1049 0.5810 -2 0.1393 0.4611 -3 -0.0736 0.5434 -4 0.0868 0.5846 -5 0.3994 0.5803 -6 0.1681 0.6244 -> -> Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -> -> head(Lee2008_rdd) - x y -1 0.1049 0.5810 -2 0.1393 0.4611 -3 -0.0736 0.5434 -4 0.0868 0.5846 -5 0.3994 0.5803 -6 0.1681 0.6244 -> -> summary(Lee2008_rdd) -### RDDdata object ### - -Cutpoint: 0 -Sample size: - -Full : 6558 - -Left : 2740 - -Right: 3818 -Covariates: no -> -> ## With covariates -> -> n_Lee <- nrow(Lee2008) -> -> set.seed(123) -> Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) -> Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) -> -> head(Lee2008_rdd_z ) - x y z1 z2 z3 -1 0.1049 0.5810 -0.56047565 22.19827 a -2 0.1393 0.4611 -0.23017749 20.63967 a -3 -0.0736 0.5434 1.55870831 20.66365 a -4 0.0868 0.5846 0.07050839 19.47992 c -5 0.3994 0.5803 0.12928774 20.19964 a -6 0.1681 0.6244 1.71506499 20.01448 c -> summary(Lee2008_rdd_z ) -### RDDdata object ### - -Cutpoint: 0 -Sample size: - -Full : 6558 - -Left : 2740 - -Right: 3818 -Covariates: yes -> -> ### Fuzzy -> set.seed(123) -> ins <- rbinom(n_Lee, 1, prob=ifelse(Lee2008$x<0, 0.1, 0.9)) -> Lee2008_rdd_ins <- RDDdata(y=Lee2008$y, x=Lee2008$x, z=ins,cutpoint=0) -> table(Lee2008$x<0, ins==0) - - FALSE TRUE - FALSE 3474 344 - TRUE 283 2457 -> -> ############################################ -> ### STEP 2: Graphical inspection -> ############################################ -> -> ### Plot -> plot(Lee2008_rdd) -> plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04)) -> plot(Lee2008_rdd, nplot=1, h=0.1) -> -> plot(Lee2008_rdd, xlim=c(-0.5, 0.5)) -> -> # plot(Lee2008_rdd, xlim=c(-0.5, 0.5), type="ggplot") -> -> -> ############################################ -> ### STEP 2: Regression -> ############################################ -> -> ## few bandwidths: -> RDDbw_RSW(Lee2008_rdd) -[1] 0.03863514 -> RDDbw_IK(Lee2008_rdd) - h_opt -0.2938561 -> -> -> ###### Parametric regression ###### -> # Simple polynomial of order 1: -> reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) -> print(reg_para) -### RDD regression: parametric ### - Polynomial order: 1 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.1182314 0.0056799 20.816 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_para) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.89406 -0.06189 0.00231 0.07129 0.86396 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.432948 0.004276 101.254 < 2e-16 *** -D 0.118231 0.005680 20.816 < 2e-16 *** -x 0.296906 0.011546 25.714 < 2e-16 *** -x_right 0.045978 0.013501 3.405 0.000665 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1384 on 6554 degrees of freedom -Multiple R-squared: 0.6707, Adjusted R-squared: 0.6706 -F-statistic: 4450 on 3 and 6554 DF, p-value: < 2.2e-16 - -> plot(reg_para) -> -> all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check.attributes=FALSE) -[1] TRUE -> -> ## Difference in means regression: -> # Simple polynomial of order 0: -> reg_para_0 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=0) -> print(reg_para_0) -### RDD regression: parametric ### - Polynomial order: 0 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.3513582 0.0041954 83.748 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_para_0) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.69788 -0.10061 -0.00360 0.09631 0.65348 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.346522 0.003201 108.25 <2e-16 *** -D 0.351358 0.004195 83.75 <2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1676 on 6556 degrees of freedom -Multiple R-squared: 0.5169, Adjusted R-squared: 0.5168 -F-statistic: 7014 on 1 and 6556 DF, p-value: < 2.2e-16 - -> plot(reg_para_0) -> -> -> ## Simple polynomial of order 4: -> reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) -> reg_para4 -### RDD regression: parametric ### - Polynomial order: 4 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.076590 0.013239 5.7851 7.582e-09 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> plot(reg_para4) -> all.equal(unlist(RDDpred(reg_para4)), RDDcoef(reg_para4, allInfo=TRUE)[1:2], check.attributes=FALSE) -[1] TRUE -> -> ## Restrict sample to bandwidth area: -> bw_ik <- RDDbw_IK(Lee2008_rdd) -> reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) -> reg_para_ik -### RDD regression: parametric ### - Polynomial order: 4 - Slopes: separate - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.059164 0.020596 2.8726 0.004098 ** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> plot(reg_para_ik) -> -> all.equal(unlist(RDDpred(reg_para_ik)), RDDcoef(reg_para_ik, allInfo=TRUE)[1:2], check.attributes=FALSE) -[1] TRUE -> -> ## Fuzzy reg -> reg_para_fuzz <- RDDreg_lm(Lee2008_rdd_ins) -> coef(reg_para_fuzz) -(Intercept) D x x_right - 0.41796288 0.14755375 0.29778248 0.04266442 -> summary(reg_para_fuzz) - -Call: -ivreg(formula = y ~ . - ins | . - D, data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.905964 -0.070958 0.004881 0.080950 0.879820 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.417963 0.005074 82.375 < 2e-16 *** -D 0.147554 0.007430 19.860 < 2e-16 *** -x 0.297782 0.012076 24.659 < 2e-16 *** -x_right 0.042664 0.014113 3.023 0.00251 ** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.145 on 6554 degrees of freedom -Multiple R-Squared: 0.6383, Adjusted R-squared: 0.6381 -Wald test: 4051 on 3 and 6554 DF, p-value: < 2.2e-16 - -> -> ## Covariates: -> reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".") -> reg_para4_cov -### RDD regression: parametric ### - Polynomial order: 4 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.076414 0.013244 5.7697 8.302e-09 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_para4_cov) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.87348 -0.06105 0.00116 0.06744 0.71549 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.4459926 0.0193546 23.043 < 2e-16 *** -D 0.0764143 0.0132440 5.770 8.30e-09 *** -x 0.5274483 0.1580923 3.336 0.000854 *** -`x^2` 1.5439437 0.7411612 2.083 0.037277 * -`x^3` 4.2383627 1.2489711 3.393 0.000694 *** -`x^4` 3.0522056 0.6642910 4.595 4.42e-06 *** -x_right 0.0154044 0.2092056 0.074 0.941305 -`x^2_right` -2.2468013 0.9487628 -2.368 0.017907 * -`x^3_right` -3.0056391 1.5522889 -1.936 0.052879 . -`x^4_right` -3.7808696 0.8093116 -4.672 3.05e-06 *** -z1 -0.0003927 0.0016820 -0.233 0.815420 -z2 0.0005547 0.0008365 0.663 0.507323 -z3b -0.0049158 0.0040657 -1.209 0.226671 -z3c -0.0032098 0.0041445 -0.774 0.438673 ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1357 on 6544 degrees of freedom -Multiple R-squared: 0.6839, Adjusted R-squared: 0.6833 -F-statistic: 1089 on 13 and 6544 DF, p-value: < 2.2e-16 - -> -> reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(slope="separate")) -> summary(reg_para4_cov_slSep) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.87319 -0.06121 0.00152 0.06788 0.71590 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.4467816 0.0275921 16.192 < 2e-16 *** -D 0.0744082 0.0365414 2.036 0.041763 * -x 0.5314429 0.1581582 3.360 0.000783 *** -`x^2` 1.5607299 0.7414043 2.105 0.035321 * -`x^3` 4.2599536 1.2492983 3.410 0.000654 *** -`x^4` 3.0604662 0.6644491 4.606 4.18e-06 *** -x_right 0.0128643 0.2092782 0.061 0.950987 -`x^2_right` -2.2682981 0.9489665 -2.390 0.016864 * -`x^3_right` -3.0229759 1.5528584 -1.947 0.051611 . -`x^4_right` -3.7900473 0.8094190 -4.682 2.89e-06 *** -z1 -0.0019210 0.0025907 -0.741 0.458419 -z2 0.0007586 0.0012911 0.588 0.556863 -z3b -0.0144320 0.0062796 -2.298 0.021580 * -z3c -0.0076795 0.0064097 -1.198 0.230918 -`z1:D` 0.0025846 0.0034062 0.759 0.448015 -`z2:D` -0.0003170 0.0016953 -0.187 0.851664 -`z3b:D` 0.0163160 0.0082404 1.980 0.047745 * -`z3c:D` 0.0077248 0.0084028 0.919 0.357967 ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1357 on 6540 degrees of freedom -Multiple R-squared: 0.6841, Adjusted R-squared: 0.6833 -F-statistic: 833.1 on 17 and 6540 DF, p-value: < 2.2e-16 - -> RDDpred(reg_para4_cov_slSep) -$fit - 2 -0.0744082 - -$se.fit -[1] 0.03654137 - -> RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 0.2, 0.2), z2=c(0,20,20), z3b=c(0,1,0), z3c=c(0,0,1))) -$fit -[1] 0.0744082 0.0849006 0.0763094 - -$se.fit -[1] 0.03654137 0.01406868 0.01412509 - -> -> -> reg_para4_cov_startR <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual")) -> reg_para4_cov_startR -### RDD regression: parametric ### - Polynomial order: 4 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.076400 0.013238 5.7713 8.225e-09 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_para4_cov_startR) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.87278 -0.06132 0.00093 0.06743 0.71605 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) -0.096767 0.009724 -9.951 < 2e-16 *** -D 0.076400 0.013238 5.771 8.22e-09 *** -x 0.526732 0.158000 3.334 0.000862 *** -`x^2` 1.542016 0.740778 2.082 0.037416 * -`x^3` 4.237801 1.248388 3.395 0.000691 *** -`x^4` 3.053121 0.663993 4.598 4.34e-06 *** -x_right 0.017573 0.209092 0.084 0.933026 -`x^2_right` -2.251672 0.948170 -2.375 0.017589 * -`x^3_right` -2.994779 1.551609 -1.930 0.053636 . -`x^4_right` -3.786702 0.808771 -4.682 2.90e-06 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1356 on 6548 degrees of freedom -Multiple R-squared: 0.6838, Adjusted R-squared: 0.6834 -F-statistic: 1574 on 9 and 6548 DF, p-value: < 2.2e-16 - -> -> plot(reg_para4_cov) -> -> reg_para4_cov_startR_sl2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual", slope="separate")) -> summary(reg_para4_cov_startR_sl2) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.87001 -0.06145 0.00138 0.06728 0.71762 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.108148 0.009721 11.125 < 2e-16 *** -D -0.275377 0.013234 -20.808 < 2e-16 *** -x 0.534391 0.157954 3.383 0.000721 *** -`x^2` 1.574893 0.740561 2.127 0.033489 * -`x^3` 4.282174 1.248022 3.431 0.000605 *** -`x^4` 3.071545 0.663799 4.627 3.78e-06 *** -x_right 0.011154 0.209031 0.053 0.957445 -`x^2_right` -2.286510 0.947892 -2.412 0.015884 * -`x^3_right` -3.042340 1.551154 -1.961 0.049882 * -`x^4_right` -3.801129 0.808534 -4.701 2.64e-06 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1356 on 6548 degrees of freedom -Multiple R-squared: 0.3453, Adjusted R-squared: 0.3444 -F-statistic: 383.7 on 9 and 6548 DF, p-value: < 2.2e-16 - -> -> reg_para4_cov_2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z3+I(z1^2)") -> reg_para4_cov_2 -### RDD regression: parametric ### - Polynomial order: 4 - Slopes: separate - Number of obs: 6558 (left: 2740, right: 3818) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.076407 0.013244 5.7691 8.331e-09 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_para4_cov_2) - -Call: -lm(formula = y ~ ., data = dat_step1, weights = weights) - -Residuals: - Min 1Q Median 3Q Max --0.87470 -0.06066 0.00094 0.06743 0.71537 - -Coefficients: - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.4574160 0.0101073 45.256 < 2e-16 *** -D 0.0764072 0.0132441 5.769 8.33e-09 *** -x 0.5262757 0.1580735 3.329 0.000875 *** -`x^2` 1.5416896 0.7411354 2.080 0.037549 * -`x^3` 4.2382250 1.2489588 3.393 0.000694 *** -`x^4` 3.0532625 0.6642844 4.596 4.38e-06 *** -x_right 0.0187563 0.2091417 0.090 0.928543 -`x^2_right` -2.2565435 0.9490378 -2.378 0.017449 * -`x^3_right` -2.9839277 1.5519657 -1.923 0.054564 . -`x^4_right` -3.7936046 0.8094722 -4.687 2.84e-06 *** -z3b -0.0049255 0.0040650 -1.212 0.225675 -z3c -0.0032074 0.0041431 -0.774 0.438863 -`I(z1^2)` -0.0004387 0.0011923 -0.368 0.712926 ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -Residual standard error: 0.1357 on 6545 degrees of freedom -Multiple R-squared: 0.6839, Adjusted R-squared: 0.6833 -F-statistic: 1180 on 12 and 6545 DF, p-value: < 2.2e-16 - -> -> ###### Non-parametric regression ###### -> reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) -> print(reg_nonpara) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Coefficient: - Estimate Std. Error z value Pr(>|z|) -D 0.079924 0.009465 8.4443 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_nonpara) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Weighted Residuals: - Min 1Q Median 3Q Max --0.97755 -0.06721 -0.00497 0.04504 0.93761 - - Coefficient: - Estimate Std. Error z value Pr(>|z|) -D 0.079924 0.009465 8.4443 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - - Local R squared: 0.3563 -> plot(x=reg_nonpara) -> -> reg_nonpara_inflm <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") -> print(reg_nonpara_inflm) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.0799245 0.0068213 11.717 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_nonpara_inflm) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Weighted Residuals: - Min 1Q Median 3Q Max --0.97755 -0.06721 -0.00497 0.04504 0.93761 - - Coefficient: - Estimate Std. Error t value Pr(>|t|) -D 0.0799245 0.0068213 11.717 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - - Local R squared: 0.3563 -> plot(x=reg_nonpara_inflm) -> -> -> reg_nonpara_sameSl <- RDDreg_np(RDDobject=Lee2008_rdd, slope="same") -> print(reg_nonpara_sameSl) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Coefficient: - Estimate Std. Error z value Pr(>|z|) -D 0.079779 0.009465 8.4289 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> summary(reg_nonpara_sameSl) -### RDD regression: nonparametric local linear### - Bandwidth: 0.2938561 - Number of obs: 3200 (left: 1594, right: 1606) - - Weighted Residuals: - Min 1Q Median 3Q Max --0.95353 -0.06234 0.00085 0.05138 0.96204 - - Coefficient: - Estimate Std. Error z value Pr(>|z|) -D 0.079779 0.009465 8.4289 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - - Local R squared: 0.3562 -> -> -> ###### PLOT SENSI ###### -> plSe_reg_para <- plotSensi(reg_para_ik, order=4:6) -> plSe_reg_para_fac <- plotSensi(reg_para_ik, type="facet", order=4:6) -> plSe_reg_para - bw order LATE se CI_low CI_high -1 0.1938561 4 0.07247223 0.02382386 0.025778327 0.11916613 -2 0.2438561 4 0.04629929 0.02199743 0.003185119 0.08941345 -3 0.2938561 4 0.05916354 0.02059588 0.018796358 0.09953073 -4 0.3438561 4 0.05275995 0.01937224 0.014791068 0.09072884 -5 0.3938561 4 0.05989365 0.01843283 0.023765971 0.09602133 -6 0.1938561 5 0.08018637 0.02844931 0.024426748 0.13594599 -7 0.2438561 5 0.07228197 0.02645597 0.020429214 0.12413472 -8 0.2938561 5 0.04568221 0.02486321 -0.003048781 0.09441320 -9 0.3438561 5 0.05146888 0.02340468 0.005596542 0.09734122 -10 0.3938561 5 0.04623271 0.02228736 0.002550286 0.08991513 -11 0.1938561 6 0.10243475 0.03299585 0.037764063 0.16710544 -12 0.2438561 6 0.09506766 0.03067462 0.034946512 0.15518880 -13 0.2938561 6 0.08500551 0.02891942 0.028324485 0.14168653 -14 0.3438561 6 0.06514312 0.02737691 0.011485362 0.11880089 -15 0.3938561 6 0.06054718 0.02609533 0.009401274 0.11169308 -> plSe_reg_para_fac - bw order LATE se CI_low CI_high -1 0.1938561 4 0.07247223 0.02382386 0.025778327 0.11916613 -2 0.2438561 4 0.04629929 0.02199743 0.003185119 0.08941345 -3 0.2938561 4 0.05916354 0.02059588 0.018796358 0.09953073 -4 0.3438561 4 0.05275995 0.01937224 0.014791068 0.09072884 -5 0.3938561 4 0.05989365 0.01843283 0.023765971 0.09602133 -6 0.1938561 5 0.08018637 0.02844931 0.024426748 0.13594599 -7 0.2438561 5 0.07228197 0.02645597 0.020429214 0.12413472 -8 0.2938561 5 0.04568221 0.02486321 -0.003048781 0.09441320 -9 0.3438561 5 0.05146888 0.02340468 0.005596542 0.09734122 -10 0.3938561 5 0.04623271 0.02228736 0.002550286 0.08991513 -11 0.1938561 6 0.10243475 0.03299585 0.037764063 0.16710544 -12 0.2438561 6 0.09506766 0.03067462 0.034946512 0.15518880 -13 0.2938561 6 0.08500551 0.02891942 0.028324485 0.14168653 -14 0.3438561 6 0.06514312 0.02737691 0.011485362 0.11880089 -15 0.3938561 6 0.06054718 0.02609533 0.009401274 0.11169308 -> -> -> plSe_reg_nonpara <- plotSensi(reg_nonpara) -> plSe_reg_nonpara - bw LATE se p_value CI_low CI_high -1 0.1938561 0.07369768 0.010505222 2.293943e-12 0.05310782 0.09428753 -2 0.2438561 0.07661912 0.009878428 8.750794e-15 0.05725776 0.09598049 -3 0.2938561 0.07992454 0.009464965 3.060030e-17 0.06137355 0.09847553 -4 0.3438561 0.08182321 0.009054544 1.614710e-19 0.06407663 0.09956979 -5 0.3938561 0.08398642 0.008820583 1.704675e-21 0.06669839 0.10127444 -> -> plSe_reg_nonpara_HC <- plotSensi(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) -> plSe_reg_nonpara_HC - bw LATE se p_value CI_low CI_high -1 0.1938561 0.07369768 0.004630858 5.028543e-57 0.06462136 0.08277399 -2 0.2438561 0.07661912 0.005058104 7.835161e-52 0.06670542 0.08653283 -3 0.2938561 0.07992454 0.005387560 8.698214e-50 0.06936511 0.09048396 -4 0.3438561 0.08182321 0.005704170 1.154034e-46 0.07064324 0.09300318 -5 0.3938561 0.08398642 0.005899981 5.553777e-46 0.07242267 0.09555017 -> -> plSe_reg_para_0 <- plotSensi(reg_para_0, plot=FALSE) -> plSe_reg_para_0 - bw order LATE se CI_low CI_high -1 NA 0 0.35135822 0.004195424 0.34313534 0.35958110 -2 NA 1 0.11823144 0.005679859 0.10709913 0.12936376 -3 NA 2 0.05186868 0.008087038 0.03601838 0.06771898 -> -> plSe_reg_para_0_gg <- plotSensi(reg_para_0, plot=FALSE, output="ggplot") -> str(plSe_reg_para_0_gg) -List of 9 - $ data :'data.frame': 3 obs. of 6 variables: - ..$ bw : num [1:3] NA NA NA - ..$ order : num [1:3] 0 1 2 - ..$ LATE : num [1:3] 0.3514 0.1182 0.0519 - ..$ se : num [1:3] 0.0042 0.00568 0.00809 - ..$ CI_low : num [1:3] 0.343 0.107 0.036 - ..$ CI_high: num [1:3] 0.3596 0.1294 0.0677 - $ layers :List of 3 - ..$ :Classes 'proto', 'environment' - ..$ :Classes 'proto', 'environment' - ..$ :Classes 'proto', 'environment' - $ scales :Reference class 'Scales' [package "ggplot2"] with 1 fields - ..$ scales: list() - ..and 21 methods, of which 9 are possibly relevant: - .. add, clone, find, get_scales, has_scale, initialize, input, n, - .. non_position_scales - $ mapping :List of 2 - ..$ x: symbol order - ..$ y: symbol LATE - $ theme : list() - $ coordinates:List of 1 - ..$ limits:List of 2 - .. ..$ x: NULL - .. ..$ y: NULL - ..- attr(*, "class")= chr [1:2] "cartesian" "coord" - $ facet :List of 1 - ..$ shrink: logi TRUE - ..- attr(*, "class")= chr [1:2] "null" "facet" - $ plot_env : - $ labels :List of 4 - ..$ x : chr "order" - ..$ y : chr "LATE" - ..$ ymin: chr "CI_low" - ..$ ymax: chr "CI_high" - - attr(*, "class")= chr [1:2] "gg" "ggplot" -> -> -> ###### Post-inference: ###### -> -> clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="df-adj") - -t test of coefficients: - - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.4329479 0.0042758 101.2544 < 2.2e-16 *** -D 0.1182314 0.0056799 20.8159 < 2.2e-16 *** -x 0.2969065 0.0115464 25.7142 < 2.2e-16 *** -x_right 0.0459776 0.0135015 3.4054 0.0006663 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -> clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="HC") - -t test of coefficients: - - Estimate Std. Error t value Pr(>|t|) -(Intercept) 0.432948 0.014242 30.3995 < 2.2e-16 *** -D 0.118231 0.015255 7.7502 1.056e-14 *** -x 0.296906 0.063726 4.6591 3.239e-06 *** -x_right 0.045978 0.066170 0.6948 0.4872 ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -> -> -> ############################################ -> ### STEP 3: Validty tests -> ############################################ -> -> ## Placebo test: -> placeb_dat_reg_nonpara <- computePlacebo(reg_nonpara) -> -> plotPlacebo(placeb_dat_reg_nonpara) -> plotPlacebo(placeb_dat_reg_nonpara, device="base") -> -> -> plotPlaceboDens(placeb_dat_reg_nonpara) -> -> ## check invisible return: -> ptPl_reg_nonpara <- plotPlacebo(reg_nonpara, plot=FALSE) -> ptPl_reg_nonpara - cutpoint position LATE se p_value CI_low -1 -0.403200 left -0.024718935 0.016759674 1.402379e-01 -0.05756729 -2 -0.303200 left -0.006342468 0.013352797 6.347928e-01 -0.03251347 -3 -0.203200 left -0.002383527 0.012701432 8.511446e-01 -0.02727788 -4 0.000000 True 0.079924537 0.009464965 3.060030e-17 0.06137355 -5 0.163925 right 0.014895056 0.014567666 3.065567e-01 -0.01365704 -6 0.263925 right 0.005435061 0.011726100 6.430052e-01 -0.01754767 -7 0.363925 right -0.011887068 0.011410501 2.975203e-01 -0.03425124 -8 0.463925 right 0.006736746 0.011407038 5.548032e-01 -0.01562064 -9 0.563925 right 0.010152920 0.012815014 4.282047e-01 -0.01496405 - CI_high bw -1 0.008129422 0.1898052 -2 0.019828532 0.1811187 -3 0.022510822 0.1432704 -4 0.098475528 0.2938561 -5 0.043447156 0.1385116 -6 0.028417795 0.2901109 -7 0.010477102 0.3783845 -8 0.029094130 0.4458358 -9 0.035269885 0.4358020 -> -> ptPl_reg_nonpara2 <- plotPlacebo(reg_nonpara, plot=FALSE, output="ggplot") -> ptPl_reg_nonpara2 - cutpoint position LATE se p_value CI_low -1 -0.403200 left -0.024718935 0.016759674 1.402379e-01 -0.05756729 -2 -0.303200 left -0.006342468 0.013352797 6.347928e-01 -0.03251347 -3 -0.203200 left -0.002383527 0.012701432 8.511446e-01 -0.02727788 -4 0.000000 True 0.079924537 0.009464965 3.060030e-17 0.06137355 -5 0.163925 right 0.014895056 0.014567666 3.065567e-01 -0.01365704 -6 0.263925 right 0.005435061 0.011726100 6.430052e-01 -0.01754767 -7 0.363925 right -0.011887068 0.011410501 2.975203e-01 -0.03425124 -8 0.463925 right 0.006736746 0.011407038 5.548032e-01 -0.01562064 -9 0.563925 right 0.010152920 0.012815014 4.282047e-01 -0.01496405 - CI_high bw -1 0.008129422 0.1898052 -2 0.019828532 0.1811187 -3 0.022510822 0.1432704 -4 0.098475528 0.2938561 -5 0.043447156 0.1385116 -6 0.028417795 0.2901109 -7 0.010477102 0.3783845 -8 0.029094130 0.4458358 -9 0.035269885 0.4358020 -> -> # with HC: -> ptPl_reg_nonpara_HC <- plotPlacebo(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) -> ptPl_reg_nonpara_HC - cutpoint position LATE se p_value CI_low -1 -0.403200 left -0.024718935 0.007273330 6.773866e-04 -0.038974400 -2 -0.303200 left -0.006342468 0.007828474 4.178371e-01 -0.021685995 -3 -0.203200 left -0.002383527 0.007608807 7.540839e-01 -0.017296515 -4 0.000000 True 0.079924537 0.005387560 8.698214e-50 0.069365114 -5 0.163925 right 0.014895056 0.005079721 3.365062e-03 0.004938986 -6 0.263925 right 0.005435061 0.009521907 5.681383e-01 -0.013227534 -7 0.363925 right -0.011887068 0.009213640 1.969952e-01 -0.029945470 -8 0.463925 right 0.006736746 0.009332790 4.703951e-01 -0.011555186 -9 0.563925 right 0.010152920 0.008135788 2.120555e-01 -0.005792932 - CI_high bw -1 -0.010463470 0.1898052 -2 0.009001058 0.1811187 -3 0.012529460 0.1432704 -4 0.090483960 0.2938561 -5 0.024851125 0.1385116 -6 0.024097656 0.2901109 -7 0.006171334 0.3783845 -8 0.025028678 0.4458358 -9 0.026098771 0.4358020 -> -> ptPl_reg_para_0 <- plotPlacebo(reg_para_0) -> ptPl_reg_para_0 - cutpoint position LATE se p_value CI_low CI_high bw -1 -0.403200 left 0.1499229 0.005955616 5.847699e-126 0.1382450 0.1616009 NA -2 -0.303200 left 0.1278332 0.005309990 2.549019e-116 0.1174212 0.1382452 NA -3 -0.203200 left 0.1149348 0.005369786 4.164223e-94 0.1044055 0.1254640 NA -4 0.000000 True 0.3513582 0.004195424 0.000000e+00 0.3431338 0.3595826 NA -5 0.163925 right 0.1737790 0.006081606 6.603886e-163 0.1618555 0.1857025 NA -6 0.263925 right 0.1782578 0.005230703 1.806609e-222 0.1680025 0.1885130 NA -7 0.363925 right 0.1858280 0.004966625 2.751855e-261 0.1760905 0.1955655 NA -8 0.463925 right 0.1996953 0.005054746 1.795863e-286 0.1897850 0.2096056 NA -9 0.563925 right 0.2100091 0.005441955 2.277608e-275 0.1993396 0.2206785 NA -> -> -> -> ## density tests -> dens_test(Lee2008_rdd) - - McCrary Test for no discontinuity of density around cutpoint - -data: Lee2008_rdd -z-val = 1.2952, p-value = 0.1952 -alternative hypothesis: Density is discontinuous around cutpoint -sample estimates: -Discontinuity - 0.1035008 - -> dens_test(reg_para_0, plot=FALSE) - - McCrary Test for no discontinuity of density around cutpoint - -data: reg_para_0 -z-val = 1.2952, p-value = 0.1952 -alternative hypothesis: Density is discontinuous around cutpoint -sample estimates: -Discontinuity - 0.1035008 - -> dens_test(reg_nonpara, plot=FALSE)$test.output[c("theta", "se", "z", "p", "binsize", "bw", "cutpoint")] -$theta -[1] 0.1035008 - -$se -[1] 0.07990827 - -$z -[1] 1.295245 - -$p -[1] 0.1952357 - -$binsize -[1] 0.01124348 - -$bw -[1] 0.2422787 - -$cutpoint -[1] 0 - -> -> -> ## Covariates tests -> covarTest_mean(Lee2008_rdd_z) - mean of x mean of y Difference statistic p.value -z1 0.001423447 0.006434915 0.005011469 -0.2005416 0.8410639 -z2 20.0026 19.97715 -0.02544849 0.5065413 0.6124957 -z3 1.978102 1.989785 0.01168304 -0.5762938 0.5644386 -> covarTest_mean(Lee2008_rdd_z, bw=0.1) - mean of x mean of y Difference statistic p.value -z1 0.04586551 0.04336096 -0.002504545 0.04416868 0.9647773 -z2 19.9098 20.02098 0.1111845 -0.9421677 0.3462983 -z3 1.963605 2.006329 0.04272426 -0.9146029 0.3605844 -> covarTest_dis(Lee2008_rdd_z) - statistic p.value -z1 0.02251666 0.3936811 -z2 0.02684002 0.2006513 -z3 0.007305005 0.9999956 -Warning message: -In ks.test(x[regime], x[!regime], exact = exact) : - p-value will be approximate in the presence of ties -> covarTest_dis(Lee2008_rdd_z, bw=0.1) - statistic p.value -z1 0.03544633 0.8429655 -z2 0.04718864 0.512701 -z3 0.02398646 0.9950799 -Warning message: -In ks.test(x[regime], x[!regime], exact = exact) : - p-value will be approximate in the presence of ties -> -> covarTest_mean(reg_para4_cov) - mean of x mean of y Difference statistic p.value -z1 0.001423447 0.006434915 0.005011469 -0.2005416 0.8410639 -z2 20.0026 19.97715 -0.02544849 0.5065413 0.6124957 -z3 1.978102 1.989785 0.01168304 -0.5762938 0.5644386 -> covarTest_dis(reg_para4_cov) - statistic p.value -z1 0.02251666 0.3936811 -z2 0.02684002 0.2006513 -z3 0.007305005 0.9999956 -Warning message: -In ks.test(x[regime], x[!regime], exact = exact) : - p-value will be approximate in the presence of ties -> #### as npreg -> reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE) -> reg_nonpara_np - -Regression Data: 6558 training points, and 2 evaluation points, in 3 variable(s) - x D Dx -Bandwidth(s): 0.2938561 19998 19998 - -Kernel Regression Estimator: Local-Linear -Bandwidth Type: Fixed - -Continuous Kernel Type: Second-Order Gaussian -No. Continuous Explanatory Vars.: 3 - -> RDDcoef(reg_nonpara_np) -[1] 0.08329576 -> RDDcoef(reg_nonpara_np, allCo=TRUE) -[1] 0.454912436 0.083295755 0.391398059 0.004460978 -> RDDcoef(reg_nonpara_np, allInfo=TRUE) - Estimate Std. Error z value Pr(>|z|) -D 0.08329576 0.00353085 23.59085 4.784535e-123 -> RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE) - Estimate Std. Error z value Pr(>|z|) -(Intercept) 0.454912436 0.001765425 257.67872 0.000000e+00 -D 0.083295755 0.003530850 23.59085 4.784535e-123 -x_left 0.391398059 0.003995962 97.94840 0.000000e+00 -x_right 0.004460978 0.003995962 97.94840 0.000000e+00 -> -> ## Compare with result obtained with a Gaussian kernel: -> bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) -> reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) -> all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check.attributes=FALSE) -[1] TRUE -> -> -> -> #### methods -> -> regs_all <- list(reg_para=reg_para, -+ reg_para_0=reg_para_0, -+ reg_para4=reg_para4, -+ reg_para_ik=reg_para_ik, -+ reg_para_fuzz=reg_para_fuzz, -+ reg_para4_cov=reg_para4_cov, -+ reg_para4_cov_slSep=reg_para4_cov_slSep, -+ reg_para4_cov_startR=reg_para4_cov_startR, -+ reg_para4_cov_startR_sl2=reg_para4_cov_startR_sl2, -+ reg_nonpara=reg_nonpara, -+ reg_nonpara_inflm=reg_nonpara_inflm, -+ reg_nonpara_sameSl=reg_nonpara_sameSl) -> capply <- function(x){ -+ n.obs <- sapply(x, length) -+ seq.max <- seq_len(max(n.obs)) -+ t(sapply(x, "[", i = seq.max)) -+ } -> -> capply(lapply(regs_all, coef)) - (Intercept) D x x_right -reg_para 0.43294793 0.11823144 0.2969065 0.04597763 -reg_para_0 0.34652219 0.35135822 NA NA -reg_para4 0.45416747 0.07659014 0.5235953 1.52921601 -reg_para_ik 0.46217139 0.05916354 0.5914869 -0.66227641 -reg_para_fuzz 0.41796288 0.14755375 0.2977825 0.04266442 -reg_para4_cov 0.44599255 0.07641430 0.5274483 1.54394367 -reg_para4_cov_slSep 0.44678156 0.07440820 0.5314429 1.56072992 -reg_para4_cov_startR -0.09676677 0.07640039 0.5267323 1.54201574 -reg_para4_cov_startR_sl2 0.10814791 -0.27537652 0.5343906 1.57489269 -reg_nonpara 0.07992454 NA NA NA -reg_nonpara_inflm 0.07992454 NA NA NA -reg_nonpara_sameSl 0.07977915 NA NA NA - -reg_para NA NA NA NA NA -reg_para_0 NA NA NA NA NA -reg_para4 4.220147 3.045197 0.01951445 -2.233991 -2.983991 -reg_para_ik -16.108618 -41.085077 0.31568244 -2.902856 21.503533 -reg_para_fuzz NA NA NA NA NA -reg_para4_cov 4.238363 3.052206 0.01540441 -2.246801 -3.005639 -reg_para4_cov_slSep 4.259954 3.060466 0.01286431 -2.268298 -3.022976 -reg_para4_cov_startR 4.237801 3.053121 0.01757250 -2.251672 -2.994779 -reg_para4_cov_startR_sl2 4.282174 3.071545 0.01115439 -2.286510 -3.042340 -reg_nonpara NA NA NA NA NA -reg_nonpara_inflm NA NA NA NA NA -reg_nonpara_sameSl NA NA NA NA NA - -reg_para NA NA NA NA -reg_para_0 NA NA NA NA -reg_para4 -3.775626 NA NA NA -reg_para_ik 49.167026 NA NA NA -reg_para_fuzz NA NA NA NA -reg_para4_cov -3.780870 -0.0003926535 0.0005546690 -0.004915837 -reg_para4_cov_slSep -3.790047 -0.0019209726 0.0007585763 -0.014431955 -reg_para4_cov_startR -3.786702 NA NA NA -reg_para4_cov_startR_sl2 -3.801129 NA NA NA -reg_nonpara NA NA NA NA -reg_nonpara_inflm NA NA NA NA -reg_nonpara_sameSl NA NA NA NA - -reg_para NA NA NA NA -reg_para_0 NA NA NA NA -reg_para4 NA NA NA NA -reg_para_ik NA NA NA NA -reg_para_fuzz NA NA NA NA -reg_para4_cov -0.003209824 NA NA NA -reg_para4_cov_slSep -0.007679517 0.002584555 -0.0003170247 0.01631598 -reg_para4_cov_startR NA NA NA NA -reg_para4_cov_startR_sl2 NA NA NA NA -reg_nonpara NA NA NA NA -reg_nonpara_inflm NA NA NA NA -reg_nonpara_sameSl NA NA NA NA - -reg_para NA -reg_para_0 NA -reg_para4 NA -reg_para_ik NA -reg_para_fuzz NA -reg_para4_cov NA -reg_para4_cov_slSep 0.007724786 -reg_para4_cov_startR NA -reg_para4_cov_startR_sl2 NA -reg_nonpara NA -reg_nonpara_inflm NA -reg_nonpara_sameSl NA -> sapply(regs_all, RDDcoef) - reg_para reg_para_0 reg_para4 - 0.11823144 0.35135822 0.07659014 - reg_para_ik reg_para_fuzz reg_para4_cov - 0.05916354 0.14755375 0.07641430 - reg_para4_cov_slSep reg_para4_cov_startR reg_para4_cov_startR_sl2 - 0.07440820 0.07640039 -0.27537652 - reg_nonpara reg_nonpara_inflm reg_nonpara_sameSl - 0.07992454 0.07992454 0.07977915 -> RDDpred_issue <- c("reg_para_0", "reg_para_fuzz", "reg_nonpara", "reg_nonpara_sameSl") -> sapply(regs_all[!names(regs_all)%in%RDDpred_issue], RDDpred) - reg_para reg_para4 reg_para_ik reg_para4_cov reg_para4_cov_slSep -fit 0.1182314 0.07659014 0.05916354 0.0764143 0.0744082 -se.fit 0.005679859 0.01323924 0.02059588 0.01324397 0.03654137 - reg_para4_cov_startR reg_para4_cov_startR_sl2 reg_nonpara_inflm -fit 0.07640039 -0.2753765 0.07992454 -se.fit 0.01323793 0.01323405 0.006821266 -> -> sapply(regs_all, RDDtools:::getCutpoint) - reg_para reg_para_0 reg_para4 - 0 0 0 - reg_para_ik reg_para_fuzz reg_para4_cov - 0 0 0 - reg_para4_cov_slSep reg_para4_cov_startR reg_para4_cov_startR_sl2 - 0 0 0 - reg_nonpara reg_nonpara_inflm reg_nonpara_sameSl - 0 0 0 -> lapply(regs_all, plotSensi, plot=FALSE) -$reg_para - bw order LATE se CI_low CI_high -1 NA 0 0.35135822 0.004195424 0.34313534 0.35958110 -2 NA 1 0.11823144 0.005679859 0.10709913 0.12936376 -3 NA 2 0.05186868 0.008087038 0.03601838 0.06771898 -4 NA 3 0.11149993 0.010654624 0.09061725 0.13238261 - -$reg_para_0 - bw order LATE se CI_low CI_high -1 NA 0 0.35135822 0.004195424 0.34313534 0.35958110 -2 NA 1 0.11823144 0.005679859 0.10709913 0.12936376 -3 NA 2 0.05186868 0.008087038 0.03601838 0.06771898 - -$reg_para4 - bw order LATE se CI_low CI_high -1 NA 0 0.35135822 0.004195424 0.34313534 0.35958110 -2 NA 1 0.11823144 0.005679859 0.10709913 0.12936376 -3 NA 2 0.05186868 0.008087038 0.03601838 0.06771898 -4 NA 3 0.11149993 0.010654624 0.09061725 0.13238261 -5 NA 4 0.07659014 0.013239238 0.05064171 0.10253857 -6 NA 5 0.04333404 0.015859294 0.01225039 0.07441768 -7 NA 6 0.06722268 0.018524698 0.03091494 0.10353042 - -$reg_para_ik - bw order LATE se CI_low CI_high -1 0.1938561 0 0.16022027 0.004798330 0.150815718 0.16962483 -2 0.2438561 0 0.17760973 0.004510194 0.168769917 0.18644955 -3 0.2938561 0 0.19564717 0.004321442 0.187177301 0.20411704 -4 0.3438561 0 0.21248892 0.004135393 0.204383701 0.22059414 -5 0.3938561 0 0.22678759 0.004030799 0.218887370 0.23468781 -6 0.1938561 1 0.07701965 0.009222181 0.058944512 0.09509480 -7 0.2438561 1 0.08196492 0.008512997 0.065279753 0.09865009 -8 0.2938561 1 0.08233778 0.008023551 0.066611911 0.09806365 -9 0.3438561 1 0.08638108 0.007597806 0.071489657 0.10127251 -10 0.3938561 1 0.08860994 0.007320947 0.074261148 0.10295873 -11 0.1938561 2 0.06844548 0.014023370 0.040960175 0.09593078 -12 0.2438561 2 0.06854083 0.012874835 0.043306620 0.09377505 -13 0.2938561 2 0.07613674 0.012090479 0.052439841 0.09983365 -14 0.3438561 2 0.07460742 0.011398876 0.052266039 0.09694881 -15 0.3938561 2 0.07653994 0.010899510 0.055177294 0.09790259 -16 0.1938561 3 0.04015671 0.018945380 0.003024450 0.07728898 -17 0.2438561 3 0.05688732 0.017417241 0.022750156 0.09102449 -18 0.2938561 3 0.05385541 0.016296871 0.021914126 0.08579669 -19 0.3438561 3 0.06444855 0.015335541 0.034391440 0.09450566 -20 0.3938561 3 0.06563911 0.014610684 0.037002693 0.09427552 -21 0.1938561 4 0.07247223 0.023823856 0.025778327 0.11916613 -22 0.2438561 4 0.04629929 0.021997428 0.003185119 0.08941345 -23 0.2938561 4 0.05916354 0.020595882 0.018796358 0.09953073 -24 0.3438561 4 0.05275995 0.019372237 0.014791068 0.09072884 -25 0.3938561 4 0.05989365 0.018432829 0.023765971 0.09602133 -26 0.1938561 5 0.08018637 0.028449309 0.024426748 0.13594599 -27 0.2438561 5 0.07228197 0.026455973 0.020429214 0.12413472 -28 0.2938561 5 0.04568221 0.024863208 -0.003048781 0.09441320 -29 0.3438561 5 0.05146888 0.023404684 0.005596542 0.09734122 -30 0.3938561 5 0.04623271 0.022287360 0.002550286 0.08991513 -31 0.1938561 6 0.10243475 0.032995854 0.037764063 0.16710544 -32 0.2438561 6 0.09506766 0.030674617 0.034946512 0.15518880 -33 0.2938561 6 0.08500551 0.028919420 0.028324485 0.14168653 -34 0.3438561 6 0.06514312 0.027376913 0.011485362 0.11880089 -35 0.3938561 6 0.06054718 0.026095329 0.009401274 0.11169308 - -$reg_para_fuzz - bw order LATE se CI_low CI_high -1 NA 0 0.4355955 0.006528241 0.4228004 0.4483906 -2 NA 1 0.1475538 0.007429542 0.1329921 0.1621154 -3 NA 2 0.0656055 0.010332691 0.0453538 0.0858572 -4 NA 3 0.1404807 0.014040362 0.1129621 0.1679993 - -$reg_para4_cov - bw order LATE se CI_low CI_high -1 NA 0 0.35142357 0.004196158 0.34319925 0.35964788 -2 NA 1 0.11827016 0.005681765 0.10713410 0.12940621 -3 NA 2 0.05189855 0.008088796 0.03604480 0.06775230 -4 NA 3 0.11134675 0.010660840 0.09045189 0.13224161 -5 NA 4 0.07641430 0.013243972 0.05045659 0.10237201 -6 NA 5 0.04315957 0.015865526 0.01206371 0.07425543 -7 NA 6 0.06694689 0.018532763 0.03062334 0.10327044 - -$reg_para4_cov_slSep - bw order LATE se CI_low CI_high -1 NA 0 0.33478449 0.04249791 0.251490112 0.4180789 -2 NA 1 0.11802512 0.03533592 0.048767990 0.1872822 -3 NA 2 0.05691513 0.03538549 -0.012439161 0.1262694 -4 NA 3 0.10861931 0.03581943 0.038414520 0.1788241 -5 NA 4 0.07440820 0.03654137 0.002788442 0.1460280 -6 NA 5 0.04510758 0.03740477 -0.028204423 0.1184196 -7 NA 6 0.06879956 0.03859513 -0.006845514 0.1444446 - -$reg_para4_cov_startR - bw order LATE se CI_low CI_high -1 NA 0 0.35138670 0.004194893 0.34316486 0.35960854 -2 NA 1 0.11831288 0.005679290 0.10718168 0.12944409 -3 NA 2 0.05190991 0.008086098 0.03606145 0.06775837 -4 NA 3 0.11141562 0.010653686 0.09053478 0.13229646 -5 NA 4 0.07640039 0.013237935 0.05045452 0.10234627 -6 NA 5 0.04317996 0.015857875 0.01209909 0.07426082 -7 NA 6 0.06694821 0.018523221 0.03064337 0.10325306 - -$reg_para4_cov_startR_sl2 - bw order LATE se CI_low CI_high -1 NA 0 -6.185850e-19 0.004193136 -0.008218395 0.008218395 -2 NA 1 -2.329820e-01 0.005676838 -0.244108359 -0.221855561 -3 NA 2 -2.992057e-01 0.008083027 -0.315048158 -0.283363275 -4 NA 3 -2.402482e-01 0.010650758 -0.261123335 -0.219373129 -5 NA 4 -2.753765e-01 0.013234053 -0.301314786 -0.249438251 -6 NA 5 -3.078751e-01 0.015854218 -0.338948842 -0.276801448 -7 NA 6 -2.843114e-01 0.018519301 -0.320608600 -0.248014273 - -$reg_nonpara - bw LATE se p_value CI_low CI_high -1 0.1938561 0.07369768 0.010505222 2.293943e-12 0.05310782 0.09428753 -2 0.2438561 0.07661912 0.009878428 8.750794e-15 0.05725776 0.09598049 -3 0.2938561 0.07992454 0.009464965 3.060030e-17 0.06137355 0.09847553 -4 0.3438561 0.08182321 0.009054544 1.614710e-19 0.06407663 0.09956979 -5 0.3938561 0.08398642 0.008820583 1.704675e-21 0.06669839 0.10127444 - -$reg_nonpara_inflm - bw LATE se p_value CI_low CI_high -1 0.1938561 0.07369768 0.008226172 6.804965e-19 0.05757468 0.08982068 -2 0.2438561 0.07661912 0.007390799 1.010948e-24 0.06213342 0.09110482 -3 0.2938561 0.07992454 0.006821266 4.467779e-31 0.06655510 0.09329397 -4 0.3438561 0.08182321 0.006393962 1.014472e-36 0.06929127 0.09435515 -5 0.3938561 0.08398642 0.006131746 8.631145e-42 0.07196841 0.09600442 - -$reg_nonpara_sameSl - bw LATE se p_value CI_low CI_high -1 0.1938561 0.07367558 0.010505222 2.328712e-12 0.05308572 0.09426543 -2 0.2438561 0.07652761 0.009878428 9.413189e-15 0.05716625 0.09588898 -3 0.2938561 0.07977915 0.009464965 3.489595e-17 0.06122816 0.09833014 -4 0.3438561 0.08161463 0.009054544 1.992826e-19 0.06386805 0.09936121 -5 0.3938561 0.08370026 0.008820583 2.328193e-21 0.06641223 0.10098828 - -> -> sapply(regs_all, function(x) dens_test(x, plot=FALSE)[c("p.value", "statistic", "estimate")]) - reg_para reg_para_0 reg_para4 reg_para_ik reg_para_fuzz -p.value 0.1952357 0.1952357 0.1952357 0.1952357 0.1952357 -statistic 1.295245 1.295245 1.295245 1.295245 1.295245 -estimate 0.1035008 0.1035008 0.1035008 0.1035008 0.1035008 - reg_para4_cov reg_para4_cov_slSep reg_para4_cov_startR -p.value 0.1952357 0.1952357 0.1952357 -statistic 1.295245 1.295245 1.295245 -estimate 0.1035008 0.1035008 0.1035008 - reg_para4_cov_startR_sl2 reg_nonpara reg_nonpara_inflm -p.value 0.1952357 0.1952357 0.1952357 -statistic 1.295245 1.295245 1.295245 -estimate 0.1035008 0.1035008 0.1035008 - reg_nonpara_sameSl -p.value 0.1952357 -statistic 1.295245 -estimate 0.1035008 -> -> -> proc.time() -utilisateur système écoulé - 11.760 0.304 12.503 diff --git a/RDDtools/tests/simple_MC.R b/RDDtools/tests/simple_MC.R deleted file mode 100644 index e88c6f3..0000000 --- a/RDDtools/tests/simple_MC.R +++ /dev/null @@ -1,65 +0,0 @@ - -library(RDDtools) -library(rdd) - -## simple MC: -set.seed(123) - -MC_simple <- function(n=200, CATE=0.3, HATE=0.1){ - x <- rnorm(n, mean=20, sd=5) - D <- x>= 20 - y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1) - cat("effect", CATE+HATE*20, "\n") - RDDdata(x=x, y=y, cutpoint=20) - -} - -input_mc <- MC_simple(n=1000, CATE=0.4) -plot(input_mc) - -RDD_bw <- RDDbw_IK(input_mc) - -RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw) -RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw) -RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm") -RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm") -RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw) -RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw) -rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw) - - -printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model))) -printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model))) -printCoefmat(coef(summary(RDD_lm_sep))) -printCoefmat(coef(summary(RDD_lm_same))) -printCoefmat(coef(summary(rdd_RDe $model[[1]]))) - - -## few checks: -plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5) -plotPlacebo(RDD_np_sep) - -plotSensi(RDD_np_same, from=5, to=20, by=0.5) -plotPlacebo(RDD_np_same) - -a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5) -plotPlacebo(RDD_lm_sep) - -plotSensi(RDD_lm_same, from=5, to=20, by=0.5) -plotPlacebo(RDD_lm_same) - -#### Other MCs: -set.seed(123) -head(gen_MC_IK()) - -set.seed(123) -head(gen_MC_IK(output="RDDdata")) - -set.seed(123) -head(gen_MC_IK(version=2)) - -set.seed(123) -head(gen_MC_IK(version=3)) - -set.seed(123) -head(gen_MC_IK(version=4)) diff --git a/RDDtools/tests/simple_MC.Rout.save b/RDDtools/tests/simple_MC.Rout.save deleted file mode 100644 index f4e5548..0000000 --- a/RDDtools/tests/simple_MC.Rout.save +++ /dev/null @@ -1,179 +0,0 @@ - -R version 2.15.2 (2012-10-26) -- "Trick or Treat" -Copyright (C) 2012 The R Foundation for Statistical Computing -ISBN 3-900051-07-0 -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> -> library(RDDtools) -Loading required package: AER -Loading required package: car -Loading required package: lmtest -Loading required package: zoo - -Attaching package: 'zoo' - -The following objects are masked from 'package:base': - - as.Date, as.Date.numeric - -Loading required package: sandwich -Loading required package: survival -Loading required package: splines -KernSmooth 2.23 loaded -Copyright M. P. Wand 1997-2009 - -RDDtools 0.22 -PLEASE NOTE THIS is currently only a development version. -Run vignette('RDDtools') for the documentation -> library(rdd) -Loading required package: Formula -> -> ## simple MC: -> set.seed(123) -> -> MC_simple <- function(n=200, CATE=0.3, HATE=0.1){ -+ x <- rnorm(n, mean=20, sd=5) -+ D <- x>= 20 -+ y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1) -+ cat("effect", CATE+HATE*20, "\n") -+ RDDdata(x=x, y=y, cutpoint=20) -+ -+ } -> -> input_mc <- MC_simple(n=1000, CATE=0.4) -effect 2.4 -> plot(input_mc) -> -> RDD_bw <- RDDbw_IK(input_mc) -> -> RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw) -> RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw) -> RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm") -> RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm") -> RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw) -> RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw) -> rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw) -> -> -> printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model))) - Estimate Std. Error t value Pr(>|t|) -(Intercept) 6.7943125 0.0074768 908.722 < 2.2e-16 *** -D 2.4175554 0.0106230 227.578 < 2.2e-16 *** -x 0.2984534 0.0022980 129.876 < 2.2e-16 *** -x_right 0.1007346 0.0032831 30.683 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model))) - Estimate Std. Error t value Pr(>|t|) -(Intercept) 6.9205374 0.0088024 786.21 < 2.2e-16 *** -D 2.4225702 0.0149756 161.77 < 2.2e-16 *** -x 0.3478051 0.0023140 150.31 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> printCoefmat(coef(summary(RDD_lm_sep))) - Estimate Std. Error t value Pr(>|t|) -(Intercept) 6.7962504 0.0079252 857.552 < 2.2e-16 *** -D 2.4109453 0.0112070 215.129 < 2.2e-16 *** -x 0.2992111 0.0017938 166.802 < 2.2e-16 *** -x_right 0.1018062 0.0025548 39.849 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> printCoefmat(coef(summary(RDD_lm_same))) - Estimate Std. Error t value Pr(>|t|) -(Intercept) 6.9762180 0.0106354 655.95 < 2.2e-16 *** -D 2.4137377 0.0183016 131.89 < 2.2e-16 *** -x 0.3494005 0.0020859 167.51 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> printCoefmat(coef(summary(rdd_RDe $model[[1]]))) - Estimate Std. Error t value Pr(>|t|) -(Intercept) 6.7943125 0.0074768 908.72 < 2.2e-16 *** -Tr 2.4175554 0.0106230 227.58 < 2.2e-16 *** -Xl 0.2984534 0.0022980 129.88 < 2.2e-16 *** -Xr 0.3991880 0.0023448 170.24 < 2.2e-16 *** ---- -Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 -> -> -> ## few checks: -> plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5) -> plotPlacebo(RDD_np_sep) -> -> plotSensi(RDD_np_same, from=5, to=20, by=0.5) -> plotPlacebo(RDD_np_same) -> -> a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5) -> plotPlacebo(RDD_lm_sep) -> -> plotSensi(RDD_lm_same, from=5, to=20, by=0.5) -> plotPlacebo(RDD_lm_same) -> -> #### Other MCs: -> set.seed(123) -> head(gen_MC_IK()) - x y -1 -0.5604223 0.0192401 -2 -0.4325322 0.2071696 -3 0.4824464 0.8091620 -4 -0.3013330 0.4993961 -5 -0.2740911 0.4570206 -6 -0.1112708 0.3558237 -> -> set.seed(123) -> head(gen_MC_IK(output="RDDdata")) - x y -1 -0.5604223 0.0192401 -2 -0.4325322 0.2071696 -3 0.4824464 0.8091620 -4 -0.3013330 0.4993961 -5 -0.2740911 0.4570206 -6 -0.1112708 0.3558237 -> -> set.seed(123) -> head(gen_MC_IK(version=2)) - x y -1 -0.5604223 0.775848845 -2 -0.4325322 0.486922823 -3 0.4824464 1.011047103 -4 -0.3013330 0.416130145 -5 -0.2740911 0.317010484 -6 -0.1112708 -0.009950054 -> -> set.seed(123) -> head(gen_MC_IK(version=3)) - x y -1 -0.5604223 -3.6512588 -2 -0.4325322 -1.5947076 -3 0.4824464 0.8091620 -4 -0.3013330 -0.2635494 -5 -0.2740911 -0.1648652 -6 -0.1112708 0.2298459 -> -> set.seed(123) -> head(gen_MC_IK(version=4)) - x y -1 -0.5604223 -2.709039228 -2 -0.4325322 -1.033455253 -3 0.4824464 1.507425459 -4 -0.3013330 0.008855458 -5 -0.2740911 0.060512581 -6 -0.1112708 0.266989475 -> -> proc.time() -utilisateur système écoulé - 1.23 0.07 1.46 diff --git a/RDDtools/vignettes/RDD_refs.bib b/RDDtools/vignettes/RDD_refs.bib deleted file mode 100644 index 448cca5..0000000 --- a/RDDtools/vignettes/RDD_refs.bib +++ /dev/null @@ -1,143 +0,0 @@ -% This file was created with JabRef 2.7b. -% Encoding: UTF-8 - -@TECHREPORT{CalonicoCattaneoEtAl2012, - author = {Sebastian Calonico and Matias D. Cattaneo and Rocio Titiunik}, - title = {Robust Nonparametric Bias-Corrected Inference in the Regression Discontinuity - Design}, - year = {2012}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{ChengFanEtAl1997, - author = {Cheng, M.-Y. and Fan, J. and Marron, J. S.}, - title = {On Automatic Boundary Corrections}, - journal = {Annals of Statistics}, - year = {1997}, - volume = {25}, - pages = {1691-1708}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@BOOK{FanGijbels1996, - title = {Local Polynomial Modeling and its Implications}, - publisher = {Boca Raton: Chapman and Hall/CRC, Monographs on Statistics and Applied - Probability no. 66}, - year = {1996}, - author = {Fan, J. and Gijbels, I.}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{FanGijbels1992, - author = {Fan, J. and Gijbels, I.}, - title = {Variable Bandwidth and Local Linear Regression Smoothers}, - journal = {Annals of Statistics}, - year = {1992}, - volume = {20}, - pages = {2008-2036}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{ImbensKalyanaraman2012, - author = {Guido Imbens And Karthik Kalyanaraman}, - title = {Optimal Bandwidth Choice for the Regression Discontinuity Estimator}, - journal = {Review of Economic Studies}, - year = {2012}, - volume = {79}, - pages = {933-959}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{ImbensLemieux2008, - author = {Imbens, Guido W. and Lemieux, Thomas}, - title = {Regression discontinuity designs: A guide to practice}, - journal = {Journal of Econometrics}, - year = {2008}, - volume = {142}, - pages = {615-635}, - number = {2}, - month = {February}, - abstract = {In Regression Discontinuity (RD) designs for evaluating causal effects - of interventions, assignment to a treatment is determined at least - partly by the value of an observed covariate lying on either side - of a fixed threshold. These designs were first introduced in the - evaluation literature by Thistlewaite and Campbell (1960). With the - exception of a few unpublished theoretical papers, these methods - did not attract much attention in the economics literature until - recently. Starting in the late 1990s, there has been a large number - of studies in economics applying and extending RD methods. In this - paper we review some of the practical and theoretical issues involved - in the implementation of RD methods.

(This abstract was borrowed - from another version of this item.)}, - owner = {matifou}, - timestamp = {2014.05.21}, - url = {http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html} -} - -@ARTICLE{Lee2008, - author = {David S. Lee}, - title = {Randomized experiments from non-random selection in U.S. House elections}, - journal = {Journal of Econometrics}, - year = {2008}, - volume = {142}, - pages = {675-697}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{LeeLemieux2010, - author = {Lee, David S. and Thomas Lemieux}, - title = {Regression Discontinuity Designs in Economics}, - journal = {Journal of Economic Literature}, - year = {2010}, - volume = {48(2)}, - pages = {281-355}, - owner = {mat}, - timestamp = {2012.11.19} -} - -@ARTICLE{McCrary2008, - author = {McCrary, Justin}, - title = {Manipulation of the Running Variable in the Regression Discontinuity - Design: A Density Test}, - journal = {Journal of Econometrics}, - year = {2008}, - volume = {142}, - pages = {698-714}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@TECHREPORT{Porter2003, - author = {Porter, Jack}, - title = {Estimation in the Regression Discontinuity Model}, - institution = {University of Wisconsin, Madison, Department of Economics}, - year = {2003}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@ARTICLE{RuppertSheatherEtAl1995, - author = {Ruppert, D. and Sheather, S. J. and Wand, M. P.}, - title = {An effective bandwidth selector for local least squares regression}, - journal = {Journal of the American Statistical Association}, - year = {1995}, - volume = {90}, - pages = {1257-1270}, - owner = {mat}, - timestamp = {2013.04.17} -} - -@comment{jabref-meta: selector_publisher:} - -@comment{jabref-meta: selector_author:} - -@comment{jabref-meta: selector_journal:} - -@comment{jabref-meta: selector_keywords:} - diff --git a/RDDtools/vignettes/RDDtools.lyx b/RDDtools/vignettes/RDDtools.lyx deleted file mode 100644 index 0f0cfce..0000000 --- a/RDDtools/vignettes/RDDtools.lyx +++ /dev/null @@ -1,2394 +0,0 @@ -#LyX 2.1 created this file. For more info see http://www.lyx.org/ -\lyxformat 474 -\begin_document -\begin_header -\textclass jss -\begin_preamble - -\usepackage{amsmath} -\usepackage{nameref} - -%the following commands are used only for articles and codesnippets - -\author{Matthieu Stigler\\Affiliation IHEID} -\title{\pkg{RDDtools}: an overview } - -% the same as above, without any formatting -\Plainauthor{Matthieu Stigler} -\Plaintitle{\pkg{RDDtools}: a toolbox to practice } -%if necessary, provide a short title -\Shorttitle{\pkg{RDDtools}: a toolbox to practice } - -\Abstract{\pkg{RDDtools} is a R package for sharp regression discontinuity design (RDD). It offers various estimators, tests and graphical procedures following the guidelines of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. This note illustrate how to use the package, using the well-known dataset of \citet{Lee2008}. - - -NOTE THAT this is a preliminary note, on a preliminary package still under development. Changes of the function names, arguments and output are to be expected, as well as possible mistakes and inconsistencies. Please report any mistakes or suggestion to \email{Matthieu.Stigler@iheid.ch}} -%at least one keyword is needed -\Keywords{Regression discontinuity design, non-parametric analysis, \pkg{RDDtools}, \proglang{R}} -%the same as above, without any formatting -\Plainkeywords{Regression discontinuity design, non-parametric analysis,RDDtools, R} - -%the following commands are used only for book or software reviews - -%\Reviewer{Some Author\\University of Somewhere} -%\Plainreviewer{Some Author} - - -%without any formatting -%\Plaintitle{LyX and R: Secrets of the LyX Master} -%\Shorttitle{LyX and R} - - - -%The address of at least one author should be given in the following format -\Address{ - Matthieu Stigler\\ - Centre for Finance and development\\ - IHEID\\ - Geneva\\ - E-mail: \email{Matthieu.Stigler@iheid.ch} -} -%you can add a telephone and fax number before the e-mail in the format -%Telephone: +12/3/4567-89 -%Fax: +12/3/4567-89 - -%if you use Sweave, include the following line (with % symbols): -%% need no \usepackage{Sweave.sty} - -%% Arg min operator: -\DeclareMathOperator*{\argmi}{arg\,min} -\newcommand{\argmin}[1]{\underset{#1}{\argmi}} - -\DeclareMathOperator*{\Ker}{\mathcal{K}} -\end_preamble -\options nojss -\use_default_options false -\begin_modules -knitr -\end_modules -\maintain_unincluded_children false -\language english -\language_package default -\inputencoding auto -\fontencoding global -\font_roman default -\font_sans default -\font_typewriter default -\font_math auto -\font_default_family default -\use_non_tex_fonts false -\font_sc false -\font_osf false -\font_sf_scale 100 -\font_tt_scale 100 -\graphics default -\default_output_format default -\output_sync 0 -\bibtex_command default -\index_command default -\paperfontsize default -\spacing single -\use_hyperref false -\papersize default -\use_geometry false -\use_package amsmath 0 -\use_package amssymb 2 -\use_package cancel 0 -\use_package esint 0 -\use_package mathdots 1 -\use_package mathtools 0 -\use_package mhchem 1 -\use_package stackrel 0 -\use_package stmaryrd 0 -\use_package undertilde 0 -\cite_engine natbib -\cite_engine_type authoryear -\biblio_style plainnat -\use_bibtopic false -\use_indices false -\paperorientation portrait -\suppress_date false -\justification true -\use_refstyle 0 -\index Index -\shortcut idx -\color #008000 -\end_index -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation indent -\paragraph_indentation default -\quotes_language english -\papercolumns 1 -\papersides 1 -\paperpagestyle default -\tracking_changes false -\output_changes false -\html_math_output 0 -\html_css_as_file 0 -\html_be_strict false -\end_header - -\begin_body - -\begin_layout Standard -\begin_inset CommandInset toc -LatexCommand tableofcontents - -\end_inset - - -\end_layout - -\begin_layout Section -Introduction -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -addcontentsline{toc}{section}{Introduction} -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Introduction to RDD -\end_layout - -\begin_layout Standard -\begin_inset Note Note -status open - -\begin_layout Plain Layout -The Regression Discontinuity Design (RDD) method is a method for impact - evaluation in situations where attribution of the programme cannot be assumed - to be random, yet is done based on a known selection rule. - Examples of such situations are scholarships attributed based on a score - (the seminal example due to -\begin_inset CommandInset citation -LatexCommand citealp -key "ThistlewaiteCampbell1960" - -\end_inset - -), a maximum number of children in a classroom -\begin_inset CommandInset citation -LatexCommand citep -key "AngristLavy1999" - -\end_inset - -, majority rules for election -\begin_inset CommandInset citation -LatexCommand citep -key "Lee2008" - -\end_inset - - or the choice of an HIV training programme targetting small schools -\begin_inset CommandInset citation -LatexCommand citep -key "ArcandWouabe2010" - -\end_inset - -. - The underlying idea is that, although -\end_layout - -\begin_layout Plain Layout -to exploit the discontinuies in the programme attribution introduced by - the rule to assume that around the discontinuity point. - -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Introduction to RDDtools -\end_layout - -\begin_layout Standard -The R package -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{RDDtools} -\end_layout - -\end_inset - - aims at offering a complete a toolbox for regression discontinuity design, - following the step-by-step recommendations of -\begin_inset CommandInset citation -LatexCommand citet -key "ImbensLemieux2008" - -\end_inset - - and -\begin_inset CommandInset citation -LatexCommand citet -key "LeeLemieux2010" - -\end_inset - -. - Summarising the approaches advocated in the two papers, a RDD analysis - comprises of following steps: -\end_layout - -\begin_layout Enumerate -Graphical representation of the data -\end_layout - -\begin_layout Enumerate -Estimation -\end_layout - -\begin_layout Enumerate -Validity tests -\end_layout - -\begin_layout Standard -We add to this list a step that is too often forgotten, yet can be very - burdensome: data preparation. - Hence, this list is extended with the fundamental step 0, which involves - preparing the data in the right way. - -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{RDDtools} -\end_layout - -\end_inset - - offers an object-oriented way to analysis, building on the R mechanism - of S3 methods and classes. - Concretely, this implies that the user has to specify the input data only - once, and that most of the functions can be called directly on the new - object of class -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata} -\end_layout - -\end_inset - -. -\end_layout - -\begin_layout Section -Step 0: data input -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -addcontentsline{toc}{section}{Step 0: data input} -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -As first step of the analysis, the user has to specify the input data into - the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata} -\end_layout - -\end_inset - - function, which takes following arguments: -\end_layout - -\begin_layout Description -y The outcome variable -\end_layout - -\begin_layout Description -x The forcing variable -\end_layout - -\begin_layout Description -cutpoint The cutpoint/threshold (note only one cutpoint can be given) -\end_layout - -\begin_layout Description -covar Eventual covariates -\end_layout - -\begin_layout Standard -The RDDdata function returns an object of class -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata} -\end_layout - -\end_inset - -, as well as of the usual -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -proglang{R} -\end_layout - -\end_inset - - class -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{data.frame} -\end_layout - -\end_inset - -. - -\end_layout - -\begin_layout Standard -To illustrate this, we show how to use this with the benchmark dataset of - -\begin_inset CommandInset citation -LatexCommand citet -key "Lee2008" - -\end_inset - -, adding randomly generated covariates for the sake of illustration. - The dataset is shipped with the package, and is available under the name - -\emph on -Lee2008. - -\emph default -Using the R -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{head} -\end_layout - -\end_inset - - function, we look at the first rows of the dataset: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -library(RDDtools) -\end_layout - -\begin_layout Plain Layout - -data(Lee2008) -\end_layout - -\begin_layout Plain Layout - -head(Lee2008) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The data is already clean, so the only step required is to fit it into the - RDDdata function, adding however the information on the cutpoint. - For illustration purpose, we add also some random covariates as a matrix - Z: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -n_Lee <- nrow(Lee2008) -\end_layout - -\begin_layout Plain Layout - -Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), -\end_layout - -\begin_layout Plain Layout - -z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) -\end_layout - -\begin_layout Plain Layout - -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -We now have an object -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{Lee2008_rdd} -\end_layout - -\end_inset - - of class -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata} -\end_layout - -\end_inset - - (and -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{data.frame} -\end_layout - -\end_inset - -). - It has a specific -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{summary} -\end_layout - -\end_inset - - method, which gives a few summary informations about the dataset: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -summary(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Another function for -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata} -\end_layout - -\end_inset - - objects is the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plot()} -\end_layout - -\end_inset - - function, discussed in the next section. - -\end_layout - -\begin_layout Section -Step 1: Graphical representation -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -addcontentsline{toc}{section}{Step 1: Graphical representation} -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Once the dataset has been formatted with the RDDdata function, it can be - used directly for simple illustration. - Indeed, as recommended by -\begin_inset CommandInset citation -LatexCommand citet -key "LeeLemieux2010" - -\end_inset - -, it is always good to show the raw data first, if ones wishes to convince - that there is a discontinuity. - This is simply done using the standard R plot() function, which has been - customised for RDDdata objects. - The function shows a scatter plot of the outcome variable against the forcing - variable. - Following -\begin_inset CommandInset citation -LatexCommand citet -key "LeeLemieux2010" - -\end_inset - -, not all single datapoints are shown: instead, a -\begin_inset Quotes eld -\end_inset - -binned -\begin_inset Quotes erd -\end_inset - - scatterplot is shown, using non-overlapping averages: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plot(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The bandwidth for the bins (also called binwidth) can be set by the user - with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{h} -\end_layout - -\end_inset - - argument. - If this it is not provided by the user, the function uses by default the - global bandwidth of -\begin_inset CommandInset citation -LatexCommand citet -key "RuppertSheatherEtAl1995" - -\end_inset - -, implemented in the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDbw_RSW()} -\end_layout - -\end_inset - - function. - -\end_layout - -\begin_layout Standard -Another argument that might be useful for the user is the option -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{nplot} -\end_layout - -\end_inset - -, which allows to plot multiple plots with different bandwidths: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04)) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Note however that experience shows that showing multiple plots have the - effect to shrink considerably the y axis, reducing the visual impression - of discontinuity. - -\end_layout - -\begin_layout Section -Step 2: Estimation -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -addcontentsline{toc}{section}{Step 2: Estimation} -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -RDDtools offers currently two estimators: -\end_layout - -\begin_layout Itemize -the simple parametric estimator: function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_lm()} -\end_layout - -\end_inset - -. - -\end_layout - -\begin_layout Itemize -the non-parametric local-linear estimator: function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_np()} -\end_layout - -\end_inset - -. - -\end_layout - -\begin_layout Standard -These two functions share some common arguments, which are: -\end_layout - -\begin_layout Description -RDDobject: the input data as obtained with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDdata()} -\end_layout - -\end_inset - - function -\end_layout - -\begin_layout Description -bw: the bandwidth. - -\end_layout - -\begin_layout Description -covariates: this will allow to add covariates in the analysis. - Note that it is presently NOT used. - -\end_layout - -\begin_layout Standard -The bandwidth argument has a different behaviour in the parametric and non-param -etric way: while the parametric estimation can be done without bandwidth, - the non-parametric estimator is by definition based on a bandwidth. - This means that the default behaviours are different: if no bandwidth is - given for the parametric model, the model will be simply estimated withut - bandwidth, that is covering the full sample on both sides of the cutpoint. - On the other side, if no bandwidth is provided in the non-parametric case, - a bandwidth will still be computed automatically using the method advocated - by -\begin_inset CommandInset citation -LatexCommand citet -key "ImbensKalyanaraman2012" - -\end_inset - -. - -\end_layout - -\begin_layout Subsection -Parametric -\end_layout - -\begin_layout Standard -The parametric estimator simply estimates a function over the whole sample - (hence called -\emph on -pooled regression -\emph default - by -\begin_inset CommandInset citation -LatexCommand citealp -key "LeeLemieux2010" - -\end_inset - -): -\end_layout - -\begin_layout Standard -\begin_inset Formula -\begin{equation} -Y=\alpha+\tau D+\beta(X-c)+\epsilon\label{eq:ParamStandard} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Standard -where D is a dummy variable, indicating whether the observations are above - (or equal to) the cutoff point, i.e. - -\begin_inset Formula $D=I(X\geq c)$ -\end_inset - -. - The parameter of interest is -\begin_inset Formula $\tau$ -\end_inset - -, which represents the difference in intercepts -\begin_inset Formula $\alpha_{r}-\alpha_{l}$ -\end_inset - -, i.e. - the discontinuity. - Note that equation -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:ParamStandard" - -\end_inset - - imposes the slope to be equal on both sides of the cutoff point. - While such restriction should hold locally around the threshold (due to - the assumption of random assignment around the cutoff point), the parametric - regression is done by default using the whole sample, so the restriction - is unlikely to hold. - In this case, one should rather estimate: -\end_layout - -\begin_layout Standard -\begin_inset Formula -\begin{equation} -Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon\label{eq:Param2slopes} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Standard -so that -\begin_inset Formula $\beta_{1}=\beta_{l}$ -\end_inset - -, and -\begin_inset Formula $\beta_{2}=\beta_{r}-\beta_{l}$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -The two estimators are available with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_lm()} -\end_layout - -\end_inset - - function, the choice between the specifications being made through the - -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{slope=c("separate", "same")} -\end_layout - -\end_inset - - argument: -\end_layout - -\begin_layout Description -separate: the default, estimates different slopes, i.e. - equation -\begin_inset space ~ -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:Param2slopes" - -\end_inset - -. -\end_layout - -\begin_layout Description -same: Estimates a common slope, i.e. - equation -\begin_inset space ~ -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:ParamStandard" - -\end_inset - -. -\end_layout - -\begin_layout Standard -Note that the order of X has been set as 1 in both cases. - If the function shows moderate non-linearity, this can be potentially captured - by adding further power of X, leading to (for the separate slope equation:) -\end_layout - -\begin_layout Standard -\begin_inset Formula -\begin{equation} -Y=\alpha+\tau D+\beta_{1}^{1}(X-c)+\beta_{2}^{1}D(X-c)+\ldots+\beta_{1}^{p}(X-c)^{p}+\beta_{2}^{p}D(X-c)^{p}+\epsilon\label{eq:ParamSlopesPowers} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Standard -The order of the polynomial can be adjusted with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{order} -\end_layout - -\end_inset - - argument. - -\end_layout - -\begin_layout Standard -Finally, the estimator can be restricted to a (symmetric) window around - the cutoff point, as is done usually in practice. - This is done using the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{bw} -\end_layout - -\end_inset - - option. - -\end_layout - -\begin_layout Standard -In summary, the function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_lm()} -\end_layout - -\end_inset - - has three main options: -\end_layout - -\begin_layout Description -slope: Whether to use different slopes on each side of the cutoff (default) - or not. -\end_layout - -\begin_layout Description -order: Order of the polynomial in X. - Default to 1. -\end_layout - -\begin_layout Description -bw: Eventual window to estimate the data. - Default to full data. - -\end_layout - -\begin_layout Standard -We show now the different applications, still using the Lee dataset: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -reg_linear_1 <- RDDreg_lm(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -We now estimate different versions, first restricting the slope to be the - same, then changing the order, and finally using a smaller window: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -reg_linear_2 <- RDDreg_lm(Lee2008_rdd, slope="same") -\end_layout - -\begin_layout Plain Layout - -reg_linear_3 <- RDDreg_lm(Lee2008_rdd, order=3) -\end_layout - -\begin_layout Plain Layout - -reg_linear_4 <- RDDreg_lm(Lee2008_rdd, bw=0.4) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Model's output is shown with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{print()} -\end_layout - -\end_inset - - and -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{summary()} -\end_layout - -\end_inset - - function: while the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{print()} -\end_layout - -\end_inset - - function just shows few informations and the LATE estimate, the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{summary()} -\end_layout - -\end_inset - - function shows the full output of the underlying regression model: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -reg_linear_1 -\end_layout - -\begin_layout Plain Layout - -summary(reg_linear_1) -\end_layout - -\begin_layout Plain Layout - -reg_linear_2 -\end_layout - -\begin_layout Plain Layout - -reg_linear_3 -\end_layout - -\begin_layout Plain Layout - -reg_linear_4 -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Finally, a -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plot()} -\end_layout - -\end_inset - - function adds the estimated curve to the binned plot. - Here we show the difference between the model estimated with polynomial - of order 1 and order 3: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -par(mfrow=c(2,1)) -\end_layout - -\begin_layout Plain Layout - -plot(reg_linear_1) -\end_layout - -\begin_layout Plain Layout - -plot(reg_linear_3) -\end_layout - -\begin_layout Plain Layout - -par(mfrow=c(1,1)) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Non-parametric -\end_layout - -\begin_layout Standard -Although the parametric estimator is often used in practice, another estimator - has important appeal, in this context where one is interested in estimating - a regression just around a cutoff. - In this case, non-parametric estimators such as the local-linear kernel - regression of -\begin_inset CommandInset citation -LatexCommand citet -key "FanGijbels1992,FanGijbels1996" - -\end_inset - -, which aim at estimating a regression locally at each point, have interesting - features, as advocated by -\begin_inset CommandInset citation -LatexCommand citet -key "Porter2003" - -\end_inset - -. - A local linear regression amounts to do a simple weighted linear regression, - where the weights are given by a kernel function. - Formally, the local-linear estimator (LLE) is given by its estimating equation: -\end_layout - -\begin_layout Standard -\begin_inset Note Note -status open - -\begin_layout Plain Layout - -\backslash -hat{ -\backslash -alpha(c)}, -\backslash -hat{ -\backslash -beta(c)}, -\backslash -hat{ -\backslash -tau(c)} = -\backslash -argmin{ -\backslash -alpha, -\backslash -beta, -\backslash -tau} -\backslash -sum_{i=1}^n -\backslash -left(Y_i - -\backslash -alpha - -\backslash -tau D - -\backslash -beta (X_i-c) -\backslash -right )^2 K( -\backslash -frac{X_i-c}{h}) -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard - -\family roman -\series medium -\shape up -\size normal -\emph off -\bar no -\strikeout off -\uuline off -\uwave off -\noun off -\color none -\begin_inset Formula -\begin{equation} -\hat{\alpha}(c),\hat{\beta}(c),\hat{\tau}(c)=\argmin{\alpha,\beta,\tau}\sum_{i=1}^{n}\left(Y_{i}-\alpha-\tau D-\beta(X_{i}-c)\right)^{2}\mathcal{K}\left(\frac{X_{i}-c}{h}\right)\label{eq:LLEform} -\end{equation} - -\end_inset - - -\end_layout - -\begin_layout Standard -where -\begin_inset Formula $\mathcal{K}(\cdot)$ -\end_inset - - is a kernel function attributing weights to each point according to their - distance to the point c. - Note that the parameters -\begin_inset Formula $\alpha$ -\end_inset - -, -\begin_inset Formula $\beta$ -\end_inset - - and -\begin_inset Formula $\tau$ -\end_inset - - are written as of function of -\begin_inset Formula $c$ -\end_inset - - to emphasize the fact that these are -\emph on -local -\emph default - estimate, unlike in the parametric rate. - The kernel used in RDDtools here is the triangular kernel (also called - -\emph on -edge -\emph default - function sometimes): -\begin_inset Formula $K(x)=I(|x|\leq1)(1-|x|)$ -\end_inset - -. - This choice, which departs from the the suggestion of -\begin_inset CommandInset citation -LatexCommand citet -key "LeeLemieux2010" - -\end_inset - -, is driven by the fact that the triangular kernel was shown to be optimal - when one estimates a parameter at a boundary, which is precisely our case - here -\begin_inset CommandInset citation -LatexCommand citep -key "ChengFanEtAl1997" - -\end_inset - -. - Unlike the package -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{rdd} -\end_layout - -\end_inset - -, we do not offer other kernels in -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{RDDtools} -\end_layout - -\end_inset - -, since the kernel selected is optimal, and changing the kernel is found - to have little impact compared to changing the bandwidths. -\end_layout - -\begin_layout Standard -Note that using the LLE estimator reduces to do a weighted OLS (WOLS) at - each point -\begin_inset Foot -status open - -\begin_layout Plain Layout -See -\begin_inset CommandInset citation -LatexCommand citep -after "equ. 3.4, page 58" -key "FanGijbels1996" - -\end_inset - -. - -\end_layout - -\end_inset - -, which allows to use the usual regression function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{lm()} -\end_layout - -\end_inset - - in R, specifying the weights as given by the kernel. - However, although this is a WOLS, the variance of the LLE is not the same - as that of the WOLS, unless one is ready to assume that the bandwidth used - is the true -\emph on -bandwidth -\emph default - -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -A second option is use a smaller bandwidth, in which case standard inference - can be applied. - This has however the drawback of using a sub-optimal bandwidth, with a - slower rate of convergence. - -\end_layout - -\end_inset - -. - However, most, if not all, papers in the literature do use the standard - WOLS inference, eventually adjusted for heteroskedasticity. - This is also done currently in the RDDtools package, although we intend - to do this following the work of -\begin_inset CommandInset citation -LatexCommand citet -key "CalonicoCattaneoEtAl2012" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -Another question arises is the choice of the bandwidth, which is a crucial - question since this choice has a huge impact on the estimation. - Typically, decreasing the bandwidth will reduce the bias of the estimator, - but increase its variance. - One way of choosing the bandwidth is then to try to minimise the mean-squared - error (MSE) of the estimator, which allows to trade-off bias and variance. - This approach is pursued by -\begin_inset CommandInset citation -LatexCommand citet -key "ImbensKalyanaraman2012" - -\end_inset - -, and is available in -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{RDDtools} -\end_layout - -\end_inset - - with the function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDbw_IK()} -\end_layout - -\end_inset - -. - This function takes simply a RDDdata object as input, and returns the optimal - value according to the MSE criterion. - -\end_layout - -\begin_layout Standard -As an illustration, we use now the non-parametric estimator for the Lee - dataset, estimating first the bandwidth and then the discontinuity with - -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_np()} -\end_layout - -\end_inset - -: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -bw_IK <- RDDbw_IK(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -bw_IK -\end_layout - -\begin_layout Plain Layout - -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_IK) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The output, of class -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_np} -\end_layout - -\end_inset - -, has the usual -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{print()} -\end_layout - -\end_inset - -, -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{summary()} -\end_layout - -\end_inset - - and -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plot()} -\end_layout - -\end_inset - - functions: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -reg_nonpara -\end_layout - -\begin_layout Plain Layout - -summary(reg_nonpara) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plot()} -\end_layout - -\end_inset - - function shows the point estimates -\begin_inset Foot -status collapsed - -\begin_layout Plain Layout -Note that the estimates are obtained with the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{locpoly()} -\end_layout - -\end_inset - - function from package -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{KernSmooth} -\end_layout - -\end_inset - -. - This has however the disadvantage that it is not the same kernel used as - in the previously, since the locpoly function uses a gaussian kernel, while - we use a triangular one. - Since this is only for visual purpose, the difference should however not - be perceptible. - Furthermore, using the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{locpoly()} -\end_layout - -\end_inset - - function has the advantage that the algorithm is way faster, since the - authors did implement a fast binned implementation, see -\begin_inset CommandInset citation -LatexCommand citet -after "section 3.6" -key "FanGijbels1996" - -\end_inset - -. - -\end_layout - -\end_inset - - over a grid defined within the bandwidth range, i.e. - the parameter -\begin_inset Formula $\alpha(x)$ -\end_inset - - from equation -\begin_inset space ~ -\end_inset - - -\begin_inset CommandInset ref -LatexCommand ref -reference "eq:LLEform" - -\end_inset - - such as -\begin_inset Formula $\alpha(x)\quad$ -\end_inset - - -\begin_inset Formula $\forall$ -\end_inset - - -\begin_inset Formula $[x-bw;x+bw]$ -\end_inset - -. - This should not be confused with the line drawn in the parametric plots, - which show the curve -\begin_inset Formula $y=f(x)=\hat{\alpha}+\hat{\beta}(x-c)+\hat{\tau}D$ -\end_inset - -. - -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plot(reg_nonpara) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Assessing the sensibility of the estimator -\end_layout - -\begin_layout Standard -Both the parametric and non-parametric estimators are dependent on the choice - of extra-parameters such as the polynomial order, or the bandwidth. - It is however known that this choice can have a big impact, especially - in the case of the bandwidth choice for the non-parametric case. - A simple way to assess the sensitivity of the results is to plot the value - of the estimate against multiple bandwidths. - This is the purpose of the function -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plotSensi()} -\end_layout - -\end_inset - -, which work both on -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_lm()} -\end_layout - -\end_inset - - as well as -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_np()} -\end_layout - -\end_inset - -. - In the former case, the function will assess the sensitivity against the - polynomial order (eventually the bandwidth if it was specified), while - in the latter case against the bandwidth. - -\end_layout - -\begin_layout Standard -We illustrate this on the previous non-parametric estimator: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plotSensi(reg_nonpara, device="ggplot") -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -and we illustrate it also on the parametric estimator where a bandwidth - was specified: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plotSensi(reg_linear_4, device="ggplot") -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Section -Step 3: Validity tests -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -addcontentsline{toc}{section}{Step 3: Validity tests} -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Once the discontinuity estimated and its sensitivity against the bandwidth - choice assessed, the last step in the analysis is to proceed to a few validity - tests. - -\end_layout - -\begin_layout Subsection -Placebo tests -\end_layout - -\begin_layout Standard -A way to convince its readers that the discontinuity one has found is a - true one is to show that it is not the a spurious result one could have - obtained at a random cutoff. - Hence, as advocated by -\begin_inset CommandInset citation -LatexCommand citet -key "ImbensLemieux2008" - -\end_inset - -, one can run placebo tests, where one estimates a discontinuity but at - a different point than the true cutoff. - This is available through the -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{plotPlacebo()} -\end_layout - -\end_inset - - function, which works on -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_lm} -\end_layout - -\end_inset - - or -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -code{RDDreg_np} -\end_layout - -\end_inset - - objects. - An important question is on which point this should be tested. - The fact is that the sample should not contain the cutoff point (so that - the presence of a discontinuity at its point does not impact the estimates - at other points), and be far away from that cutoff (as well as from the - min and max of the whole distribution) so that it contains a fair amount - of points at both sides for estimation. - The default is then to run for points on the left within the first and - last quartiles of the left sample, and the same on the right. -\end_layout - -\begin_layout Standard -We illustrate this on the non-parametric estimator: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -plotPlacebo(reg_nonpara, device="ggplot") -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Subsection -Forcing variable -\end_layout - -\begin_layout Standard -One of the cases where the assumptions underlying the RDD analysis might - be incorrect is when participants are allowed to manipulate the variable - that lead to treatment, i.e. - are able to affect whether they are treated or not. - This question is usually answered factually, looking at the context of - the experiment. - One can however also test whether the forcing variable itself shows a trace - of manipulation, which would result into a discontinuity of its density, - as suggested by -\begin_inset CommandInset citation -LatexCommand citet -key "McCrary2008" - -\end_inset - -. - -\end_layout - -\begin_layout Standard -The test was implemented by D Dimmery in package -\begin_inset ERT -status open - -\begin_layout Plain Layout - - -\backslash -pkg{rdd} -\end_layout - -\end_inset - -, and is simply wrapped by the function dens_test(), so that it works directly - on a RDDdata object: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -dens_test(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -The test automatically returns a plot, showing the density estimates at - the left and right of the cutoff, together with the confidence intervals - of these estimates. - One rejects the null hypothesis of no discontinuity if visually the confidence - intervals do not overlap. - -\end_layout - -\begin_layout Subsection -Baseline Covariates -\end_layout - -\begin_layout Standard -Another crucial assumption in RDD is that treatment is randomly distributed - around the cutoff, so that individuals around are similar. - This can be easily tested, as is done in the Randomised Control Trial (RCT) - case, by running test for balanced covariates. - Two kinds of tests have been implemented, allowing to test equality in - means (t-test) or in distribution (Kolmogorov-Smirnov). - As this is a typical case of multiple testing, both functions offers the - possibility to adjust the p-values with various procedures such as the - Bonferoni, Holmes or the more recent Benjamini-Hochberg procedures. - -\end_layout - -\begin_layout Standard -We run here the equality in means test: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -covarTest_mean(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -as well as the equality in distribution test: -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -<<>>= -\end_layout - -\begin_layout Plain Layout - -covarTest_dis(Lee2008_rdd) -\end_layout - -\begin_layout Plain Layout - -@ -\end_layout - -\end_inset - - -\end_layout - -\begin_layout Standard -Since the covariates were generated randomly with a single parameter, we - would expect that no equality test is rejected. - -\end_layout - -\begin_layout Section -Conclusion -\end_layout - -\begin_layout Standard -\begin_inset CommandInset bibtex -LatexCommand bibtex -bibfiles "RDD_refs" -options "bibtotoc,econometrica" - -\end_inset - - -\end_layout - -\begin_layout Standard -\begin_inset ERT -status open - -\begin_layout Plain Layout - -% -\backslash -addcontentsline{toc}{section}{ -\backslash -refname} -\end_layout - -\begin_layout Plain Layout - -% -\backslash -bibliography{./RDDrefs} -\end_layout - -\begin_layout Plain Layout - -% -\backslash -bibliography{/home/mat/Dropbox/Documents/Ordi/Bibtex/GeneralBiblio,/home/mat/Dro -pbox/Documents/Ordi/Bibtex/biblioFAO_mat} -\end_layout - -\end_inset - - -\end_layout - -\end_body -\end_document diff --git a/RDDtools/vignettes/RDDtools.pdf b/RDDtools/vignettes/RDDtools.pdf deleted file mode 100644 index c52136a..0000000 Binary files a/RDDtools/vignettes/RDDtools.pdf and /dev/null differ diff --git a/RDDtools/vignettes/RDDtools.tex b/RDDtools/vignettes/RDDtools.tex deleted file mode 100644 index 6a7a261..0000000 --- a/RDDtools/vignettes/RDDtools.tex +++ /dev/null @@ -1,887 +0,0 @@ -%% LyX 2.1.0 created this file. For more info, see http://www.lyx.org/. -%% Do not edit unless you really know what you are doing. -\documentclass[english,nojss]{jss}\usepackage[]{graphicx}\usepackage[]{color} -%% maxwidth is the original width if it is less than linewidth -%% otherwise use linewidth (to make sure the graphics do not exceed the margin) -\makeatletter -\def\maxwidth{ % - \ifdim\Gin@nat@width>\linewidth - \linewidth - \else - \Gin@nat@width - \fi -} -\makeatother - -\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} -\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% -\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% -\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% -\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% -\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% -\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% -\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% -\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% - -\usepackage{framed} -\makeatletter -\newenvironment{kframe}{% - \def\at@end@of@kframe{}% - \ifinner\ifhmode% - \def\at@end@of@kframe{\end{minipage}}% - \begin{minipage}{\columnwidth}% - \fi\fi% - \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep - \colorbox{shadecolor}{##1}\hskip-\fboxsep - % There is no \\@totalrightmargin, so: - \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% - \MakeFramed {\advance\hsize-\width - \@totalleftmargin\z@ \linewidth\hsize - \@setminipage}}% - {\par\unskip\endMakeFramed% - \at@end@of@kframe} -\makeatother - -\definecolor{shadecolor}{rgb}{.97, .97, .97} -\definecolor{messagecolor}{rgb}{0, 0, 0} -\definecolor{warningcolor}{rgb}{1, 0, 1} -\definecolor{errorcolor}{rgb}{1, 0, 0} -\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX - -\usepackage{alltt} -\usepackage[T1]{fontenc} -\usepackage[latin9]{inputenc} -\usepackage{amssymb} -\usepackage[authoryear]{natbib} - -\makeatletter -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands. - %\usepackage{Sweave} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands. - -\usepackage{amsmath} -\usepackage{nameref} - -%the following commands are used only for articles and codesnippets - -\author{Matthieu Stigler\\Affiliation IHEID} -\title{\pkg{RDDtools}: an overview } - -% the same as above, without any formatting -\Plainauthor{Matthieu Stigler} -\Plaintitle{\pkg{RDDtools}: a toolbox to practice } -%if necessary, provide a short title -\Shorttitle{\pkg{RDDtools}: a toolbox to practice } - -\Abstract{\pkg{RDDtools} is a R package for sharp regression discontinuity design (RDD). It offers various estimators, tests and graphical procedures following the guidelines of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. This note illustrate how to use the package, using the well-known dataset of \citet{Lee2008}. - - -NOTE THAT this is a preliminary note, on a preliminary package still under development. Changes of the function names, arguments and output are to be expected, as well as possible mistakes and inconsistencies. Please report any mistakes or suggestion to \email{Matthieu.Stigler@iheid.ch}} -%at least one keyword is needed -\Keywords{Regression discontinuity design, non-parametric analysis, \pkg{RDDtools}, \proglang{R}} -%the same as above, without any formatting -\Plainkeywords{Regression discontinuity design, non-parametric analysis,RDDtools, R} - -%the following commands are used only for book or software reviews - -%\Reviewer{Some Author\\University of Somewhere} -%\Plainreviewer{Some Author} - -%the following commands are used only for book reviews -%\Booktitle{LyX and \proglang{R}: Secrets of the LyX Master} -%\Bookauthor{Book Author} -%\Pubyear{2008} -%\ISBN{0-12345-678-9} -%\Pages{500} - -%the following command is used only for software reviews -%\Softwaretitle{\proglang{gretl 1.7.4}} - -%the following commands are used only for book or software reviews -%\Publisher{LyX Publishing Inc.} -%\Pubaddress{LyX City} -%\Price{USD 59.95 (P), USD 99.95 (H)} -%\URL{http://www.lyx.org/} - -%without any formatting -%\Plaintitle{LyX and R: Secrets of the LyX Master} -%\Shorttitle{LyX and R} - -%the following commands are used for articles, codesnippets, book reviews and software reviews - -%publication information -%do not use these commands before the article has been accepted -%\Volume{00} -%\Issue{0} -%\Month{Month} -%\Year{2000} -%\Submitdate{2000-00-00} -%\Acceptdate{2000-00-00} - -%The address of at least one author should be given in the following format -\Address{ - Matthieu Stigler\\ - Centre for Finance and development\\ - IHEID\\ - Geneva\\ - E-mail: \email{Matthieu.Stigler@iheid.ch} -} -%you can add a telephone and fax number before the e-mail in the format -%Telephone: +12/3/4567-89 -%Fax: +12/3/4567-89 - -%if you use Sweave, include the following line (with % symbols): -%% need no \usepackage{Sweave.sty} - -%% Arg min operator: -\DeclareMathOperator*{\argmi}{arg\,min} -\newcommand{\argmin}[1]{\underset{#1}{\argmi}} - -\DeclareMathOperator*{\Ker}{\mathcal{K}} - -\makeatother - -\usepackage{babel} -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -\begin{document} -\tableofcontents{} - - -\section{Introduction} - -\addcontentsline{toc}{section}{Introduction} - - -\subsection{Introduction to RDD} - - - - -\subsection{Introduction to RDDtools} - -The R package \pkg{RDDtools} aims at offering a complete a toolbox -for regression discontinuity design, following the step-by-step recommendations -of \citet{ImbensLemieux2008} and \citet{LeeLemieux2010}. Summarising -the approaches advocated in the two papers, a RDD analysis comprises -of following steps: -\begin{enumerate} -\item Graphical representation of the data -\item Estimation -\item Validity tests -\end{enumerate} -We add to this list a step that is too often forgotten, yet can be -very burdensome: data preparation. Hence, this list is extended with -the fundamental step 0, which involves preparing the data in the right -way. - -\pkg{RDDtools} offers an object-oriented way to analysis, building -on the R mechanism of S3 methods and classes. Concretely, this implies -that the user has to specify the input data only once, and that most -of the functions can be called directly on the new object of class -\code{RDDdata}. - - -\section{Step 0: data input} - -\addcontentsline{toc}{section}{Step 0: data input} - -As first step of the analysis, the user has to specify the input data -into the \code{RDDdata} function, which takes following arguments: -\begin{description} -\item [{y}] The outcome variable -\item [{x}] The forcing variable -\item [{cutpoint}] The cutpoint/threshold (note only one cutpoint can be -given) -\item [{covar}] Eventual covariates -\end{description} -The RDDdata function returns an object of class \code{RDDdata}, as -well as of the usual \proglang{R} class \code{data.frame}. - -To illustrate this, we show how to use this with the benchmark dataset -of \citet{Lee2008}, adding randomly generated covariates for the -sake of illustration. The dataset is shipped with the package, and -is available under the name \emph{Lee2008. }Using the R \code{head} -function, we look at the first rows of the dataset: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{library}\hlstd{(RDDtools)} -\hlkwd{data}\hlstd{(Lee2008)} -\hlkwd{head}\hlstd{(Lee2008)} -\end{alltt} -\begin{verbatim} -## x y -## 1 0.1049 0.5810 -## 2 0.1393 0.4611 -## 3 -0.0736 0.5434 -## 4 0.0868 0.5846 -## 5 0.3994 0.5803 -## 6 0.1681 0.6244 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -The data is already clean, so the only step required is to fit it -into the RDDdata function, adding however the information on the cutpoint. -For illustration purpose, we add also some random covariates as a -matrix Z: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{n_Lee} \hlkwb{<-} \hlkwd{nrow}\hlstd{(Lee2008)} -\hlstd{Z} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{z1} \hlstd{=} \hlkwd{rnorm}\hlstd{(n_Lee),} \hlkwc{z2} \hlstd{=} \hlkwd{rnorm}\hlstd{(n_Lee,} \hlkwc{mean} \hlstd{=} \hlnum{20}\hlstd{,} \hlkwc{sd} \hlstd{=} \hlnum{2}\hlstd{),} \hlkwc{z3} \hlstd{=} \hlkwd{sample}\hlstd{(letters[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{],} - \hlkwc{size} \hlstd{= n_Lee,} \hlkwc{replace} \hlstd{=} \hlnum{TRUE}\hlstd{))} -\hlstd{Lee2008_rdd} \hlkwb{<-} \hlkwd{RDDdata}\hlstd{(}\hlkwc{y} \hlstd{= Lee2008}\hlopt{$}\hlstd{y,} \hlkwc{x} \hlstd{= Lee2008}\hlopt{$}\hlstd{x,} \hlkwc{covar} \hlstd{= Z,} \hlkwc{cutpoint} \hlstd{=} \hlnum{0}\hlstd{)} -\end{alltt} -\end{kframe} -\end{knitrout} - - -We now have an object \code{Lee2008_rdd} of class \code{RDDdata} -(and \code{data.frame}). It has a specific \code{summary} method, -which gives a few summary informations about the dataset: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{summary}\hlstd{(Lee2008_rdd)} -\end{alltt} -\begin{verbatim} -## ### RDDdata object ### -## -## Cutpoint: 0 -## Sample size: -## -Full : 6558 -## -Left : 2740 -## -Right: 3818 -## Covariates: yes -\end{verbatim} -\end{kframe} -\end{knitrout} - - -Another function for \code{RDDdata} objects is the \code{plot()} -function, discussed in the next section. - - -\section{Step 1: Graphical representation} - -\addcontentsline{toc}{section}{Step 1: Graphical representation} - -Once the dataset has been formatted with the RDDdata function, it -can be used directly for simple illustration. Indeed, as recommended -by \citet{LeeLemieux2010}, it is always good to show the raw data -first, if ones wishes to convince that there is a discontinuity. This -is simply done using the standard R plot() function, which has been -customised for RDDdata objects. The function shows a scatter plot -of the outcome variable against the forcing variable. Following \citet{LeeLemieux2010}, -not all single datapoints are shown: instead, a ``binned'' scatterplot -is shown, using non-overlapping averages: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(Lee2008_rdd)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-4} - -\end{knitrout} - - -The bandwidth for the bins (also called binwidth) can be set by the -user with the \code{h} argument. If this it is not provided by the -user, the function uses by default the global bandwidth of \citet{RuppertSheatherEtAl1995}, -implemented in the \code{RDDbw_RSW()} function. - -Another argument that might be useful for the user is the option \code{nplot}, -which allows to plot multiple plots with different bandwidths: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(Lee2008_rdd,} \hlkwc{nplot} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{h} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0.02}\hlstd{,} \hlnum{0.03}\hlstd{,} \hlnum{0.04}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-5} - -\end{knitrout} - - -Note however that experience shows that showing multiple plots have -the effect to shrink considerably the y axis, reducing the visual -impression of discontinuity. - - -\section{Step 2: Estimation} - -\addcontentsline{toc}{section}{Step 2: Estimation} - -RDDtools offers currently two estimators: -\begin{itemize} -\item the simple parametric estimator: function \code{RDDreg_lm()}. -\item the non-parametric local-linear estimator: function \code{RDDreg_np()}. -\end{itemize} -These two functions share some common arguments, which are: -\begin{description} -\item [{RDDobject:}] the input data as obtained with the \code{RDDdata()} -function -\item [{bw:}] the bandwidth. -\item [{covariates:}] this will allow to add covariates in the analysis. -Note that it is presently NOT used. -\end{description} -The bandwidth argument has a different behaviour in the parametric -and non-parametric way: while the parametric estimation can be done -without bandwidth, the non-parametric estimator is by definition based -on a bandwidth. This means that the default behaviours are different: -if no bandwidth is given for the parametric model, the model will -be simply estimated withut bandwidth, that is covering the full sample -on both sides of the cutpoint. On the other side, if no bandwidth -is provided in the non-parametric case, a bandwidth will still be -computed automatically using the method advocated by \citet{ImbensKalyanaraman2012}. - - -\subsection{Parametric} - -The parametric estimator simply estimates a function over the whole -sample (hence called \emph{pooled regression} by \citealp{LeeLemieux2010}): - -\begin{equation} -Y=\alpha+\tau D+\beta(X-c)+\epsilon\label{eq:ParamStandard} -\end{equation} - - -where D is a dummy variable, indicating whether the observations are -above (or equal to) the cutoff point, i.e. $D=I(X\geq c)$. The parameter -of interest is $\tau$, which represents the difference in intercepts -$\alpha_{r}-\alpha_{l}$, i.e. the discontinuity. Note that equation -\ref{eq:ParamStandard} imposes the slope to be equal on both sides -of the cutoff point. While such restriction should hold locally around -the threshold (due to the assumption of random assignment around the -cutoff point), the parametric regression is done by default using -the whole sample, so the restriction is unlikely to hold. In this -case, one should rather estimate: - -\begin{equation} -Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon\label{eq:Param2slopes} -\end{equation} - - -so that $\beta_{1}=\beta_{l}$, and $\beta_{2}=\beta_{r}-\beta_{l}$. - -The two estimators are available with the \code{RDDreg_lm()} function, -the choice between the specifications being made through the \code{slope=c("separate", "same")} -argument: -\begin{description} -\item [{separate:}] the default, estimates different slopes, i.e. equation~\ref{eq:Param2slopes}. -\item [{same:}] Estimates a common slope, i.e. equation~\ref{eq:ParamStandard}. -\end{description} -Note that the order of X has been set as 1 in both cases. If the function -shows moderate non-linearity, this can be potentially captured by -adding further power of X, leading to (for the separate slope equation:) - -\begin{equation} -Y=\alpha+\tau D+\beta_{1}^{1}(X-c)+\beta_{2}^{1}D(X-c)+\ldots+\beta_{1}^{p}(X-c)^{p}+\beta_{2}^{p}D(X-c)^{p}+\epsilon\label{eq:ParamSlopesPowers} -\end{equation} - - -The order of the polynomial can be adjusted with the \code{order} -argument. - -Finally, the estimator can be restricted to a (symmetric) window around -the cutoff point, as is done usually in practice. This is done using -the \code{bw} option. - -In summary, the function \code{RDDreg_lm()} has three main options: -\begin{description} -\item [{slope:}] Whether to use different slopes on each side of the cutoff -(default) or not. -\item [{order:}] Order of the polynomial in X. Default to 1. -\item [{bw:}] Eventual window to estimate the data. Default to full data. -\end{description} -We show now the different applications, still using the Lee dataset: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{reg_linear_1} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd)} -\end{alltt} -\end{kframe} -\end{knitrout} - - -We now estimate different versions, first restricting the slope to -be the same, then changing the order, and finally using a smaller -window: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{reg_linear_2} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{slope} \hlstd{=} \hlstr{"same"}\hlstd{)} -\hlstd{reg_linear_3} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{order} \hlstd{=} \hlnum{3}\hlstd{)} -\hlstd{reg_linear_4} \hlkwb{<-} \hlkwd{RDDreg_lm}\hlstd{(Lee2008_rdd,} \hlkwc{bw} \hlstd{=} \hlnum{0.4}\hlstd{)} -\end{alltt} -\end{kframe} -\end{knitrout} - - -Model's output is shown with the \code{print()} and \code{summary()} -function: while the \code{print()} function just shows few informations -and the LATE estimate, the \code{summary()} function shows the full -output of the underlying regression model: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{reg_linear_1} -\end{alltt} -\begin{verbatim} -## ### RDD regression: parametric ### -## Polynomial order: 1 -## Slopes: separate -## Number of obs: 6558 (left: 2740, right: 3818) -## -## Coefficient: -## Estimate Std. Error t value Pr(>|t|) -## D 0.11823 0.00568 20.8 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -\end{verbatim} -\begin{alltt} -\hlkwd{summary}\hlstd{(reg_linear_1)} -\end{alltt} -\begin{verbatim} -## -## Call: -## lm(formula = y ~ ., data = dat_step1, weights = weights) -## -## Residuals: -## Min 1Q Median 3Q Max -## -0.8941 -0.0619 0.0023 0.0713 0.8640 -## -## Coefficients: -## Estimate Std. Error t value Pr(>|t|) -## (Intercept) 0.43295 0.00428 101.25 < 2e-16 *** -## D 0.11823 0.00568 20.82 < 2e-16 *** -## x 0.29691 0.01155 25.71 < 2e-16 *** -## x_right 0.04598 0.01350 3.41 0.00066 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -## -## Residual standard error: 0.138 on 6554 degrees of freedom -## Multiple R-squared: 0.671, Adjusted R-squared: 0.671 -## F-statistic: 4.45e+03 on 3 and 6554 DF, p-value: <2e-16 -\end{verbatim} -\begin{alltt} -\hlstd{reg_linear_2} -\end{alltt} -\begin{verbatim} -## ### RDD regression: parametric ### -## Polynomial order: 1 -## Slopes: same -## Number of obs: 6558 (left: 2740, right: 3818) -## -## Coefficient: -## Estimate Std. Error t value Pr(>|t|) -## D 0.11373 0.00553 20.6 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -\end{verbatim} -\begin{alltt} -\hlstd{reg_linear_3} -\end{alltt} -\begin{verbatim} -## ### RDD regression: parametric ### -## Polynomial order: 3 -## Slopes: separate -## Number of obs: 6558 (left: 2740, right: 3818) -## -## Coefficient: -## Estimate Std. Error t value Pr(>|t|) -## D 0.1115 0.0107 10.5 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -\end{verbatim} -\begin{alltt} -\hlstd{reg_linear_4} -\end{alltt} -\begin{verbatim} -## ### RDD regression: parametric ### -## Polynomial order: 1 -## Slopes: separate -## Bandwidth: 0.4 -## Number of obs: 4169 (left: 2043, right: 2126) -## -## Coefficient: -## Estimate Std. Error t value Pr(>|t|) -## D 0.08863 0.00727 12.2 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -Finally, a \code{plot()} function adds the estimated curve to the -binned plot. Here we show the difference between the model estimated -with polynomial of order 1 and order 3: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,} \hlnum{1}\hlstd{))} -\hlkwd{plot}\hlstd{(reg_linear_1)} -\hlkwd{plot}\hlstd{(reg_linear_3)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-9} -\begin{kframe}\begin{alltt} -\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,} \hlnum{1}\hlstd{))} -\end{alltt} -\end{kframe} -\end{knitrout} - - - -\subsection{Non-parametric} - -Although the parametric estimator is often used in practice, another -estimator has important appeal, in this context where one is interested -in estimating a regression just around a cutoff. In this case, non-parametric -estimators such as the local-linear kernel regression of \citet{FanGijbels1992,FanGijbels1996}, -which aim at estimating a regression locally at each point, have interesting -features, as advocated by \citet{Porter2003}. A local linear regression -amounts to do a simple weighted linear regression, where the weights -are given by a kernel function. Formally, the local-linear estimator -(LLE) is given by its estimating equation: - - - -\begin{equation} -\hat{\alpha}(c),\hat{\beta}(c),\hat{\tau}(c)=\argmin{\alpha,\beta,\tau}\sum_{i=1}^{n}\left(Y_{i}-\alpha-\tau D-\beta(X_{i}-c)\right)^{2}\mathcal{K}\left(\frac{X_{i}-c}{h}\right)\label{eq:LLEform} -\end{equation} - - -where $\mathcal{K}(\cdot)$ is a kernel function attributing weights -to each point according to their distance to the point c. Note that -the parameters $\alpha$, $\beta$ and $\tau$ are written as of function -of $c$ to emphasize the fact that these are \emph{local} estimate, -unlike in the parametric rate. The kernel used in RDDtools here is -the triangular kernel (also called \emph{edge} function sometimes): -$K(x)=I(|x|\leq1)(1-|x|)$. This choice, which departs from the the -suggestion of \citet{LeeLemieux2010}, is driven by the fact that -the triangular kernel was shown to be optimal when one estimates a -parameter at a boundary, which is precisely our case here \citep{ChengFanEtAl1997}. -Unlike the package \pkg{rdd}, we do not offer other kernels in \pkg{RDDtools}, -since the kernel selected is optimal, and changing the kernel is found -to have little impact compared to changing the bandwidths. - -Note that using the LLE estimator reduces to do a weighted OLS (WOLS) -at each point% -\footnote{See \citep[equ. 3.4, page 58]{FanGijbels1996}. % -}, which allows to use the usual regression function \code{lm()} in -R, specifying the weights as given by the kernel. However, although -this is a WOLS, the variance of the LLE is not the same as that of -the WOLS, unless one is ready to assume that the bandwidth used is -the true \emph{bandwidth}% -\footnote{A second option is use a smaller bandwidth, in which case standard -inference can be applied. This has however the drawback of using a -sub-optimal bandwidth, with a slower rate of convergence. % -}. However, most, if not all, papers in the literature do use the standard -WOLS inference, eventually adjusted for heteroskedasticity. This is -also done currently in the RDDtools package, although we intend to -do this following the work of \citet{CalonicoCattaneoEtAl2012}. - -Another question arises is the choice of the bandwidth, which is a -crucial question since this choice has a huge impact on the estimation. -Typically, decreasing the bandwidth will reduce the bias of the estimator, -but increase its variance. One way of choosing the bandwidth is then -to try to minimise the mean-squared error (MSE) of the estimator, -which allows to trade-off bias and variance. This approach is pursued -by \citet{ImbensKalyanaraman2012}, and is available in \pkg{RDDtools} -with the function \code{RDDbw_IK()}. This function takes simply a -RDDdata object as input, and returns the optimal value according to -the MSE criterion. - -As an illustration, we use now the non-parametric estimator for the -Lee dataset, estimating first the bandwidth and then the discontinuity -with \code{RDDreg_np()}: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{bw_IK} \hlkwb{<-} \hlkwd{RDDbw_IK}\hlstd{(Lee2008_rdd)} -\hlstd{bw_IK} -\end{alltt} -\begin{verbatim} -## h_opt -## 0.2939 -\end{verbatim} -\begin{alltt} -\hlstd{reg_nonpara} \hlkwb{<-} \hlkwd{RDDreg_np}\hlstd{(}\hlkwc{RDDobject} \hlstd{= Lee2008_rdd,} \hlkwc{bw} \hlstd{= bw_IK)} -\end{alltt} -\end{kframe} -\end{knitrout} - - -The output, of class \code{RDDreg_np}, has the usual \code{print()}, -\code{summary()} and \code{plot()} functions: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{reg_nonpara} -\end{alltt} -\begin{verbatim} -## ### RDD regression: nonparametric local linear### -## Bandwidth: 0.2939 -## Number of obs: 3200 (left: 1594, right: 1606) -## -## Coefficient: -## Estimate Std. Error z value Pr(>|z|) -## D 0.07992 0.00946 8.44 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -\end{verbatim} -\begin{alltt} -\hlkwd{summary}\hlstd{(reg_nonpara)} -\end{alltt} -\begin{verbatim} -## ### RDD regression: nonparametric local linear### -## Bandwidth: 0.2939 -## Number of obs: 3200 (left: 1594, right: 1606) -## -## Weighted Residuals: -## Min 1Q Median 3Q Max -## -0.9775 -0.0672 -0.0050 0.0450 0.9376 -## -## Coefficient: -## Estimate Std. Error z value Pr(>|z|) -## D 0.07992 0.00946 8.44 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -## -## Local R squared: 0.356 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -The \code{plot()} function shows the point estimates% -\footnote{Note that the estimates are obtained with the \code{locpoly()} function -from package \pkg{KernSmooth}. This has however the disadvantage -that it is not the same kernel used as in the previously, since the -locpoly function uses a gaussian kernel, while we use a triangular -one. Since this is only for visual purpose, the difference should -however not be perceptible. Furthermore, using the \code{locpoly()} -function has the advantage that the algorithm is way faster, since -the authors did implement a fast binned implementation, see \citet[section 3.6]{FanGijbels1996}. % -} over a grid defined within the bandwidth range, i.e. the parameter -$\alpha(x)$ from equation~\ref{eq:LLEform} such as $\alpha(x)\quad$$\forall$ -$[x-bw;x+bw]$. This should not be confused with the line drawn in -the parametric plots, which show the curve $y=f(x)=\hat{\alpha}+\hat{\beta}(x-c)+\hat{\tau}D$. - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(reg_nonpara)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-12} - -\end{knitrout} - - - -\subsection{Assessing the sensibility of the estimator} - -Both the parametric and non-parametric estimators are dependent on -the choice of extra-parameters such as the polynomial order, or the -bandwidth. It is however known that this choice can have a big impact, -especially in the case of the bandwidth choice for the non-parametric -case. A simple way to assess the sensitivity of the results is to -plot the value of the estimate against multiple bandwidths. This is -the purpose of the function \code{plotSensi()}, which work both on -\code{RDDreg_lm()} as well as \code{RDDreg_np()}. In the former -case, the function will assess the sensitivity against the polynomial -order (eventually the bandwidth if it was specified), while in the -latter case against the bandwidth. - -We illustrate this on the previous non-parametric estimator: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plotSensi}\hlstd{(reg_nonpara,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-13} - -\end{knitrout} - - -and we illustrate it also on the parametric estimator where a bandwidth -was specified: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plotSensi}\hlstd{(reg_linear_4,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-14} - -\end{knitrout} - - - -\section{Step 3: Validity tests} - -\addcontentsline{toc}{section}{Step 3: Validity tests} - -Once the discontinuity estimated and its sensitivity against the bandwidth -choice assessed, the last step in the analysis is to proceed to a -few validity tests. - - -\subsection{Placebo tests} - -A way to convince its readers that the discontinuity one has found -is a true one is to show that it is not the a spurious result one -could have obtained at a random cutoff. Hence, as advocated by \citet{ImbensLemieux2008}, -one can run placebo tests, where one estimates a discontinuity but -at a different point than the true cutoff. This is available through -the \code{plotPlacebo()} function, which works on \code{RDDreg_lm} -or \code{RDDreg_np} objects. An important question is on which point -this should be tested. The fact is that the sample should not contain -the cutoff point (so that the presence of a discontinuity at its point -does not impact the estimates at other points), and be far away from -that cutoff (as well as from the min and max of the whole distribution) -so that it contains a fair amount of points at both sides for estimation. -The default is then to run for points on the left within the first -and last quartiles of the left sample, and the same on the right. - -We illustrate this on the non-parametric estimator: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plotPlacebo}\hlstd{(reg_nonpara,} \hlkwc{device} \hlstd{=} \hlstr{"ggplot"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-15} - -\end{knitrout} - - - -\subsection{Forcing variable} - -One of the cases where the assumptions underlying the RDD analysis -might be incorrect is when participants are allowed to manipulate -the variable that lead to treatment, i.e. are able to affect whether -they are treated or not. This question is usually answered factually, -looking at the context of the experiment. One can however also test -whether the forcing variable itself shows a trace of manipulation, -which would result into a discontinuity of its density, as suggested -by \citet{McCrary2008}. - -The test was implemented by D Dimmery in package \pkg{rdd}, and is -simply wrapped by the function dens\_test(), so that it works directly -on a RDDdata object: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{dens_test}\hlstd{(Lee2008_rdd)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/unnamed-chunk-16} -\begin{kframe}\begin{verbatim} -## -## McCrary Test for no discontinuity of density around cutpoint -## -## data: Lee2008_rdd -## z-val = 1.295, p-value = 0.1952 -## alternative hypothesis: Density is discontinuous around cutpoint -## sample estimates: -## Discontinuity -## 0.1035 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -The test automatically returns a plot, showing the density estimates -at the left and right of the cutoff, together with the confidence -intervals of these estimates. One rejects the null hypothesis of no -discontinuity if visually the confidence intervals do not overlap. - - -\subsection{Baseline Covariates} - -Another crucial assumption in RDD is that treatment is randomly distributed -around the cutoff, so that individuals around are similar. This can -be easily tested, as is done in the Randomised Control Trial (RCT) -case, by running test for balanced covariates. Two kinds of tests -have been implemented, allowing to test equality in means (t-test) -or in distribution (Kolmogorov-Smirnov). As this is a typical case -of multiple testing, both functions offers the possibility to adjust -the p-values with various procedures such as the Bonferoni, Holmes -or the more recent Benjamini-Hochberg procedures. - -We run here the equality in means test: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{covarTest_mean}\hlstd{(Lee2008_rdd)} -\end{alltt} -\begin{verbatim} -## mean of x mean of y Difference statistic p.value -## z1 0.03658 0.01154 -0.02504 1.019 0.3082 -## z2 20.02 20 -0.02255 0.4549 0.6492 -## z3 2.008 2.009 0.001503 -0.07364 0.9413 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -as well as the equality in distribution test: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{covarTest_dis}\hlstd{(Lee2008_rdd)} -\end{alltt} - - -{\ttfamily\noindent\color{warningcolor}{\#\# Warning: p-value will be approximate in the presence of ties}}\begin{verbatim} -## statistic p.value -## z1 0.02406 0.3145 -## z2 0.0157 0.8263 -## z3 0.004626 1 -\end{verbatim} -\end{kframe} -\end{knitrout} - - -Since the covariates were generated randomly with a single parameter, -we would expect that no equality test is rejected. - - -\section{Conclusion} - -\bibliographystyle{econometrica} -\addcontentsline{toc}{section}{\refname}\bibliography{RDD_refs} - - -%\addcontentsline{toc}{section}{\refname} -%\bibliography{./RDDrefs} -%\bibliography{/home/mat/Dropbox/Documents/Ordi/Bibtex/GeneralBiblio,/home/mat/Dropbox/Documents/Ordi/Bibtex/biblioFAO_mat} -\end{document} diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index 3981194..0000000 --- a/README.Rmd +++ /dev/null @@ -1,173 +0,0 @@ -RDDtools: an R package for Regression Discontinuity Design -======================================================== - -**RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing. - - -Installing **RDDtools** ------------------------ - -This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**: - -```{r eval=FALSE} -library(devtools) -install_github(repo="RDDtools", username="MatthieuStigler", subdir="RDDtools") -``` - -Note however the latest version of RDDtools only works with R 3.0, and that you might need to install [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows. - - -Documentation ------------------------ -The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github. - -RDDtools: main features ------------------------ - - -+ Simple visualisation of the data using binned-plot: **plot()** - -+ Bandwidth selection: - + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()** - + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()** -+ Estimation: - + RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008]. - + RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation). - + RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression. -+ Post-Estimation tools: - + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ). - + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]). -+ Regression sensitivity analysis: - + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()** - + *Placebo plot* using different cutpoints: **plotPlacebo()** -+ Design sensitivity analysis: - + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**. - + Test of equal means of covariates: **covarTest_mean()** - + Test of equal density of covariates: **covarTest_dens()** -+ Datasets - + Contains the seminal dataset of [Lee 2008]: **Lee2008** - + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()** - -Using RDDtools: a quick example ------------------------ -**RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied. - -### Data preparation and visualisation -Load the package, and load the built-in dataset from [Lee 2008]: - -```{r options, echo=FALSE} -opts_chunk$set(warning= FALSE, message=FALSE, fig.align="center", fig.path='figuresREADME/') -``` - - -```{r} -library(RDDtools) -data(Lee2008) -``` - -Declare the data to be a *RDDdata* object: - -```{r} -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -``` - - -You can now directly summarise and visualise this data: - -```{r dataPlot} -summary(Lee2008_rdd) -plot(Lee2008_rdd) -``` - -### Estimation - -#### Parametric - -Estimate parametrically, by fitting a 4th order polynomial: -```{r reg_para} -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) -reg_para - -plot(reg_para) -``` - - -#### Non-parametric -As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth: -```{r RegPlot} -bw_ik <- RDDbw_IK(Lee2008_rdd) -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik) -print(reg_nonpara) -plot(x=reg_nonpara) - -``` - -### Regression Sensitivity tests: - -One can easily check the sensitivity of the estimate to different bandwidths: -```{r SensiPlot} -plotSensi(reg_nonpara, from=0.05, to=1, by=0.1) -``` - -Or run the Placebo test, estimating the RDD effect based on fake cutpoints: -```{r placeboPlot} -plotPlacebo(reg_nonpara) -``` - -### Design Sensitivity tests: - -Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available: - -+ Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()** -+ Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available: - -#### Discontinuity comes from manipulation: McCrary test - -use simply the function **dens_test()**, on either the raw data, or the regression output: -```{r DensPlot} -dens_test(reg_nonpara) -``` - -#### Discontinuity comes from covariates: covariates balance tests - -Two tests available: -+ equal means of covariates: **covarTest_mean()** -+ equal density of covariates: **covarTest_dens()** - - -We need here to simulate some data, given that the Lee (2008) dataset contains no covariates. -We here simulate three variables, with the second having a different mean on the left and the right. - -```{r} -set.seed(123) -n_Lee <- nrow(Lee2008) -Z <- data.frame(z1 = rnorm(n_Lee, sd=2), - z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), - z3 = sample(letters, size = n_Lee, replace = TRUE)) -Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) -``` - - -Run the tests: -```{r} -## test for equality of means around cutoff: -covarTest_mean(Lee2008_rdd_Z, bw=0.3) - -## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: -covarTest_dis(Lee2008_rdd_Z, bw=0.3) -``` - -Tests correctly reject equality of the second, and correctly do not reject equality for the first and third. - - [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959" - - [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697" - - [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635" - - [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427" - - [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270." - - - \ No newline at end of file diff --git a/README.md b/README.md index 81662c0..37b57a6 100644 --- a/README.md +++ b/README.md @@ -1,278 +1,70 @@ -RDDtools: an R package for Regression Discontinuity Design -======================================================== +rddtools +======== -**RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing. +[![License](https://img.shields.io/badge/license-GPLv3-brightgreen.svg?style=flat)](https://www.gnu.org/licenses/gpl-3.0.html) +[![CRAN Version](https://www.r-pkg.org/badges/version/rddtools)](https://cran.r-project.org/package=rddtools) +[![R build status](https://github.com/bquast/rddtools/workflows/R-CMD-check/badge.svg)](https://github.com/bquast/rddtools/actions?workflow=R-CMD-check) +[![Total RStudio Cloud Downloads](https://cranlogs.r-pkg.org/badges/grand-total/rddtools?color=brightgreen)](https://cran.r-project.org/package=rddtools) +[![RStudio Cloud Downloads](https://cranlogs.r-pkg.org/badges/rddtools?color=brightgreen)](https://cran.r-project.org/package=rddtools) +**rddtools** is an R package designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing. -Installing **RDDtools** + +Installing **rddtools** ----------------------- This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**: - ```r -library(devtools) -install_github(repo = "RDDtools", username = "MatthieuStigler", subdir = "RDDtools") +if (!require('remotes')) install.packages('remotes') +remotes::install_github('bquast/rddtools') ``` - -Note however the latest version of RDDtools only works with R 3.0, and that you might need to install [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows. +Note however the latest version of rddtools only works with R 3.0, and that you might need to install [Rtools](https://cran.r-project.org/bin/windows/Rtools/) if on Windows. Documentation ----------------------- -The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github. +The (preliminary) documentation is available in the help files directly, as well as in the *vignettes*. The vignettes can be accessed from R. -RDDtools: main features ------------------------ +```r +vignette('rddtools') +``` +rddtools: main features +----------------------- -+ Simple visualisation of the data using binned-plot: **plot()** ++ Simple visualisation of the data using binned-plot: `plot()` + Bandwidth selection: - + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()** - + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()** + + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: `rdd_bw_ik()` + + MSE global bandwidth procedure of [Ruppert et al 1995]: `rdd_bw_rsw()` + Estimation: - + RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008]. - + RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation). + + RDD parametric estimation: `rdd_reg_lm()` This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008]. + + RDD local non-parametric estimation: `rdd_reg_np()`. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation). + RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression. + Post-Estimation tools: - + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ). - + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]). + + Various tools, to obtain predictions at given covariate values ( `rdd_pred()` ), or to convert to other classes, to lm ( **as.lm()** ), or to the package `np` ( `as.npreg()` ). + + Function to do inference with clustered data: `clusterInf()` either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]). + Regression sensitivity analysis: - + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()** - + *Placebo plot* using different cutpoints: **plotPlacebo()** + + Plot the sensitivity of the coefficient with respect to the bandwith: `plotSensi()` + + *Placebo plot* using different cutpoints: `plotPlacebo()` + Design sensitivity analysis: - + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**. - + Test of equal means of covariates: **covarTest_mean()** - + Test of equal density of covariates: **covarTest_dens()** + + McCrary test of manipulation of the forcing variable: wrapper `dens_test()` to the function `DCdensity()` from package `rdd`. + + Test of equal means of covariates: `covarTest_mean()` + + Test of equal density of covariates: `covarTest_dens()` + Datasets - + Contains the seminal dataset of [Lee 2008]: **Lee2008** - + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()** + + Contains the seminal dataset of [Lee 2008]: `house` + + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: `gen_mc_ik()` -Using RDDtools: a quick example +References ----------------------- -**RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied. - -### Data preparation and visualisation -Load the package, and load the built-in dataset from [Lee 2008]: - - - - - - -```r -library(RDDtools) -data(Lee2008) -``` - - -Declare the data to be a *RDDdata* object: - - -```r -Lee2008_rdd <- RDDdata(y = Lee2008$y, x = Lee2008$x, cutpoint = 0) -``` - - - -You can now directly summarise and visualise this data: - - -```r -summary(Lee2008_rdd) -``` - -``` -## ### RDDdata object ### -## -## Cutpoint: 0 -## Sample size: -## -Full : 6558 -## -Left : 2740 -## -Right: 3818 -## Covariates: no -``` - -```r -plot(Lee2008_rdd) -``` - -plot of chunk dataPlot - - -### Estimation - -#### Parametric - -Estimate parametrically, by fitting a 4th order polynomial: - -```r -reg_para <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 4) -reg_para -``` - -``` -## ### RDD regression: parametric ### -## Polynomial order: 4 -## Slopes: separate -## Number of obs: 6558 (left: 2740, right: 3818) -## -## Coefficient: -## Estimate Std. Error t value Pr(>|t|) -## D 0.0766 0.0132 5.79 7.6e-09 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -``` - -```r - -plot(reg_para) -``` - -plot of chunk reg_para - - - -#### Non-parametric -As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth: - -```r -bw_ik <- RDDbw_IK(Lee2008_rdd) -reg_nonpara <- RDDreg_np(RDDobject = Lee2008_rdd, bw = bw_ik) -print(reg_nonpara) -``` - -``` -## ### RDD regression: nonparametric local linear### -## Bandwidth: 0.2939 -## Number of obs: 3200 (left: 1594, right: 1606) -## -## Coefficient: -## Estimate Std. Error z value Pr(>|z|) -## D 0.07992 0.00946 8.44 <2e-16 *** -## --- -## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -``` - -```r -plot(x = reg_nonpara) -``` - -plot of chunk RegPlot - - -### Regression Sensitivity tests: - -One can easily check the sensitivity of the estimate to different bandwidths: - -```r -plotSensi(reg_nonpara, from = 0.05, to = 1, by = 0.1) -``` - -plot of chunk SensiPlot - - -Or run the Placebo test, estimating the RDD effect based on fake cutpoints: - -```r -plotPlacebo(reg_nonpara) -``` - -plot of chunk placeboPlot - - -### Design Sensitivity tests: - -Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available: - -+ Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()** -+ Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available: - -#### Discontinuity comes from manipulation: McCrary test - -use simply the function **dens_test()**, on either the raw data, or the regression output: - -```r -dens_test(reg_nonpara) -``` - -plot of chunk DensPlot - -``` -## -## McCrary Test for no discontinuity of density around cutpoint -## -## data: reg_nonpara -## z-val = 1.295, p-value = 0.1952 -## alternative hypothesis: Density is discontinuous around cutpoint -## sample estimates: -## Discontinuity -## 0.1035 -``` - - -#### Discontinuity comes from covariates: covariates balance tests - -Two tests available: -+ equal means of covariates: **covarTest_mean()** -+ equal density of covariates: **covarTest_dens()** - - -We need here to simulate some data, given that the Lee (2008) dataset contains no covariates. -We here simulate three variables, with the second having a different mean on the left and the right. - - -```r -set.seed(123) -n_Lee <- nrow(Lee2008) -Z <- data.frame(z1 = rnorm(n_Lee, sd = 2), z2 = rnorm(n_Lee, mean = ifelse(Lee2008 < - 0, 5, 8)), z3 = sample(letters, size = n_Lee, replace = TRUE)) -Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) -``` - - - -Run the tests: - -```r -## test for equality of means around cutoff: -covarTest_mean(Lee2008_rdd_Z, bw = 0.3) -``` - -``` -## mean of x mean of y Difference statistic p.value -## z1 0.004268 0.02186 0.01759 -0.2539 0.7996 -## z2 5.006 7.985 2.979 -84.85 0 -## z3 13.19 13.44 0.2465 -0.941 0.3468 -``` - -```r - -## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: -covarTest_dis(Lee2008_rdd_Z, bw = 0.3) -``` - -``` -## statistic p.value -## z1 0.03482 0.2727 -## z2 0.8648 0 -## z3 0.03009 0.4474 -``` - - -Tests correctly reject equality of the second, and correctly do not reject equality for the first and third. - - [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959" + [Imbens and Kalyanaraman 2012]: https://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959" - [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697" + [Lee 2008]: https://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697" - [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635" + [Imbens and Lemieux 2008]: https://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635" - [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427" - - [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270." - - + [Cameron et al. 2008]: https://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427" + [Ruppert et al 1995]: https://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270." diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..2ea1eb2 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,38 @@ +# Test environments + +- local Windows 10 install, R 4.2.0 +- local Linux (Arch) install, R 4.2.0 +- local macOS install, R 4.2.0 +- GitHub Actions + - Windows Server, R release + - MacOS, R release + - MacOS, R dev + - Ubuntu, R release +- win-builder + - devel + - release + + +# R CMD check + +R CMD check succeeded + +── R CMD check results ─────────────────────────────────────────────────────────────────────── rddtools 1.8.0 ──── +Duration: 1m 36.3s + +0 errors ✔ | 0 warnings ✔ | 0 notes ✔ + +R CMD check succeeded + + +# win builder + +There is a note about a possibly invalid URL, the URL works (JSTOR stable). + +Found the following (possibly) invalid URLs: + URL: https://www.jstor.org/stable/2291516 + From: README.md + Status: 403 + Message: Forbidden + +This is a valid URL that works fine. \ No newline at end of file diff --git a/RDDtools/data/STAR_MHE.rda b/data/STAR_MHE.rda similarity index 100% rename from RDDtools/data/STAR_MHE.rda rename to data/STAR_MHE.rda diff --git a/data/house.rda b/data/house.rda new file mode 100644 index 0000000..6a157b9 Binary files /dev/null and b/data/house.rda differ diff --git a/data/indh.rda b/data/indh.rda new file mode 100644 index 0000000..54949c1 Binary files /dev/null and b/data/indh.rda differ diff --git a/figuresREADME/DensPlot.png b/figuresREADME/DensPlot.png deleted file mode 100644 index ef7498d..0000000 Binary files a/figuresREADME/DensPlot.png and /dev/null differ diff --git a/figuresREADME/RegPlot.png b/figuresREADME/RegPlot.png deleted file mode 100644 index ba49f78..0000000 Binary files a/figuresREADME/RegPlot.png and /dev/null differ diff --git a/figuresREADME/SensiPlot.png b/figuresREADME/SensiPlot.png deleted file mode 100644 index b709e71..0000000 Binary files a/figuresREADME/SensiPlot.png and /dev/null differ diff --git a/figuresREADME/dataPlot.png b/figuresREADME/dataPlot.png deleted file mode 100644 index ec610f7..0000000 Binary files a/figuresREADME/dataPlot.png and /dev/null differ diff --git a/figuresREADME/placeboPlot.png b/figuresREADME/placeboPlot.png deleted file mode 100644 index 2a028ac..0000000 Binary files a/figuresREADME/placeboPlot.png and /dev/null differ diff --git a/figuresREADME/reg_para.png b/figuresREADME/reg_para.png deleted file mode 100644 index 7e78537..0000000 Binary files a/figuresREADME/reg_para.png and /dev/null differ diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..87f48da --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,16 @@ +bibentry(bibtype = "TechReport", + title = "rddtools: A toolbox for regression discontinuity in R", + author = c(person("Matthieu", "Stigler"), + person("Bastiaan", "Quast") ), + institution = "The Graduate Institute", + address = "Maison de la paix, Geneva, Switzerland", + year = "2016", + url = "https://bastiaanquast.com/rddtools/", + textVersion = "Stigler, M. and B. Quast, B (2016). rddtools: A toolbox for regression discontinuity in R. ", + + + mheader = "To cite rddtools in publications please use:", + + mfooter = "We have invested a lot of time and effort in creating rddtools, please cite it when using it for data analysis. See also 'citation()' for citing R." + +) diff --git a/inst/ChangeLog b/inst/ChangeLog new file mode 100644 index 0000000..a5052e6 --- /dev/null +++ b/inst/ChangeLog @@ -0,0 +1,6 @@ +Version 0.5.0: Matthieu Stigler (2018-01-29) + -new: plotBin allows for separate bin on each side + -new: wrapper for CCT plots + -fix issues with new output from rdtable + -add test file + -bw_ik work on regression output objects diff --git a/inst/devtools_internal_tests.R b/inst/devtools_internal_tests.R new file mode 100644 index 0000000..d8a5cc4 --- /dev/null +++ b/inst/devtools_internal_tests.R @@ -0,0 +1 @@ +devtools::check_rhub(email="Matthieu.Stigler@gmail.com", interactive=FALSE) diff --git a/RDDtools/man/STAR_MHE.Rd b/man/STAR_MHE.Rd similarity index 66% rename from RDDtools/man/STAR_MHE.Rd rename to man/STAR_MHE.Rd index 36a1a38..cf50faf 100644 --- a/RDDtools/man/STAR_MHE.Rd +++ b/man/STAR_MHE.Rd @@ -1,9 +1,11 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rddtools.R \docType{data} \name{STAR_MHE} \alias{STAR_MHE} \title{Transformation of the STAR dataset as used in Angrist and Pischke (2008)} -\format{A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, +\format{ +A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, all other are created by Angrist and Pischke STAT code. \describe{ \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})} @@ -11,41 +13,37 @@ all other are created by Angrist and Pischke STAT code. \item{classid}{The id of the class (computed by A & P)} \item{cs}{Class size (computed by A & P)} \item{female, nwhite}{Various covariates (computed by A & P)} -}} +} +} \source{ Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website -\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012. -} -\usage{ -STAR_MHE } \description{ Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) } \details{ -). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. -The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. -The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website -(\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta. +). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. +The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. +The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website, on the webstar.dta. } \examples{ data(STAR_MHE) # Compute the group means: -STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean) +STAR_MHE_means <- aggregate(STAR_MHE[, c('classid', 'pscore', 'cs')], + by=list(STAR_MHE$classid), mean) # Regression of means, with weighted average: reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs) coef(summary(reg_krug_gls))[2,2] } \references{ -Krueger, A. (1999) "Experimental Estimates Of Education Production Functions," +Krueger, A. (1999) 'Experimental Estimates Of Education Production Functions,' \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May. -Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, +Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, Princeton University press } \seealso{ \code{\link[AER]{STAR}} for the original dataset. } - diff --git a/man/as.lm.Rd b/man/as.lm.Rd new file mode 100644 index 0000000..ec2b19d --- /dev/null +++ b/man/as.lm.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rdd_data_methods.R +\name{as.lm} +\alias{as.lm} +\title{Convert a rdd object to lm} +\usage{ +as.lm(x) +} +\arguments{ +\item{x}{An object to convert to lm} +} +\value{ +An object of class \code{lm} +} +\description{ +Convert a rdd object to lm +} +\examples{ +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +reg_para <- rdd_reg_lm(rdd_object=house_rdd) +reg_para_lm <- as.lm(reg_para) +reg_para_lm +plot(reg_para_lm, which=4) +} +\seealso{ +\code{\link{as.npreg}} which converts \code{rdd_reg} objects into \code{npreg} from package \code{np}. +} diff --git a/man/as.npregbw.Rd b/man/as.npregbw.Rd new file mode 100644 index 0000000..bb60c79 --- /dev/null +++ b/man/as.npregbw.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as.npreg.R +\name{as.npregbw} +\alias{as.npregbw} +\alias{as.npreg} +\title{Convert an rdd_reg object to a \code{npreg} object} +\usage{ +as.npregbw(x, ...) + +as.npreg(x, ...) +} +\arguments{ +\item{x}{Object of class \code{rdd_reg} created by \code{\link{rdd_reg_np}} or \code{\link{rdd_reg_lm}}} + +\item{\ldots}{Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}}} +} +\value{ +An object of class \code{npreg} or \code{npregbw} +} +\description{ +Convert an rdd_object to a non-parametric regression \code{npreg} from package \code{np} +} +\details{ +This function converts an rdd_reg object into an \code{npreg} object from package \code{np} +Note that the output won't be the same, since \code{npreg} does not offer a triangular kernel, but a Gaussian or Epanechinkov one. +Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while rdd_reg +proceeds as if the kernel was univariate. A simple solution to make the multivariate kernel similar to the univariate one +is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. +} +\examples{ +# Estimate ususal rdd_reg: + data(house) + house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) + reg_nonpara <- rdd_reg_np(rdd_object=house_rdd) + +## Convert to npreg: + reg_nonpara_np <- as.npreg(reg_nonpara) + reg_nonpara_np + rdd_coef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) + +## Compare with result obtained with a Gaussian kernel: + bw_lm <- dnorm(house_rdd$x, sd=rddtools:::getBW(reg_nonpara)) + reg_nonpara_gaus <- rdd_reg_lm(rdd_object=house_rdd, w=bw_lm) + all.equal(rdd_coef(reg_nonpara_gaus),rdd_coef(reg_nonpara_np)) +} +\seealso{ +\code{\link{as.lm}} which converts \code{rdd_reg} objects into \code{lm}. +} diff --git a/RDDtools/man/clusterInf.Rd b/man/clusterInf.Rd similarity index 72% rename from RDDtools/man/clusterInf.Rd rename to man/clusterInf.Rd index afe70c7..7a730e2 100644 --- a/RDDtools/man/clusterInf.Rd +++ b/man/clusterInf.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clusterInf.R \name{clusterInf} \alias{clusterInf} \title{Post-inference for clustered data} @@ -6,7 +7,7 @@ clusterInf(object, clusterVar, vcov. = NULL, type = c("df-adj", "HC"), ...) } \arguments{ -\item{object}{Object of class lm, from which RDDreg also inherits.} +\item{object}{Object of class lm, from which rdd_reg also inherits.} \item{clusterVar}{The variable containing the cluster attributions.} @@ -24,23 +25,22 @@ Correct standard-errors to account for clustered data, doing either a degrees of possibly on the range specified by bandwidth } \examples{ -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +reg_para <- rdd_reg_lm(rdd_object=house_rdd) # here we just generate randomly a cluster variable: -nlet <- sort(c(outer(letters, letters, paste, sep=""))) -clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) +nlet <- sort(c(outer(letters, letters, paste, sep=''))) +clusRandom <- sample(nlet[1:60], size=nrow(house_rdd), replace=TRUE) # now do post-inference: clusterInf(reg_para, clusterVar=clusRandom) -clusterInf(reg_para, clusterVar=clusRandom, type="HC") +clusterInf(reg_para, clusterVar=clusRandom, type='HC') } \references{ -Wooldridge (2003) Cluster-sample methods in applied econometrics. +Wooldridge (2003) Cluster-sample methods in applied econometrics. \emph{AmericanEconomic Review}, 93, p. 133-138 } \seealso{ \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf} } - diff --git a/RDDtools/man/covarTest_dis.Rd b/man/covarTest_dis.Rd similarity index 61% rename from RDDtools/man/covarTest_dis.Rd rename to man/covarTest_dis.Rd index 4d97d44..4de36e2 100644 --- a/RDDtools/man/covarTest_dis.Rd +++ b/man/covarTest_dis.Rd @@ -1,31 +1,40 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/covarTests.R \name{covarTest_dis} \alias{covarTest_dis} -\alias{covarTest_dis.RDDdata} -\alias{covarTest_dis.RDDreg} +\alias{covarTest_dis.rdd_data} +\alias{covarTest_dis.rdd_reg} \title{Testing for balanced covariates: equality of distribution} \usage{ -covarTest_dis(object, bw, exact = NULL, p.adjust = c("none", "holm", "BH", - "BY", "hochberg", "hommel", "bonferroni")) +covarTest_dis( + object, + bw, + exact = NULL, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) -\method{covarTest_dis}{RDDdata}(object, bw = NULL, exact = FALSE, - p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", - "bonferroni")) +\method{covarTest_dis}{rdd_data}( + object, + bw = NULL, + exact = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) -\method{covarTest_dis}{RDDreg}(object, bw = NULL, exact = FALSE, - p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", - "bonferroni")) +\method{covarTest_dis}{rdd_reg}( + object, + bw = NULL, + exact = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) } \arguments{ -\item{object}{object of class RDDdata} +\item{object}{object of class rdd_data} \item{bw}{a bandwidth} \item{exact}{Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed.} \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function} - -\item{\ldots}{currently not used} } \value{ A data frame with, for each covariate, the K-S statistic and its p-value. @@ -34,29 +43,28 @@ A data frame with, for each covariate, the K-S statistic and its p-value. Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold } \examples{ -data(Lee2008) +data(house) ## Add randomly generated covariates set.seed(123) -n_Lee <- nrow(Lee2008) -Z <- data.frame(z1 = rnorm(n_Lee, sd=2), - z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), +n_Lee <- nrow(house) +Z <- data.frame(z1 = rnorm(n_Lee, sd=2), + z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)), z3 = sample(letters, size = n_Lee, replace = TRUE)) -Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) +house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0) ## Kolmogorov-Smirnov test of equality in distribution: -covarTest_dis(Lee2008_rdd_Z, bw=0.3) +covarTest_dis(house_rdd_Z, bw=0.3) ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff: -covarTest_mean(Lee2008_rdd_Z, bw=0.3) +covarTest_mean(house_rdd_Z, bw=0.3) ## covarTest_dis works also on regression outputs (bw will be taken from the model) -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) +reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z) covarTest_dis(reg_nonpara) } -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} \seealso{ \code{\link{covarTest_mean}} for the t-test of equality of means } - +\author{ +Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> +} diff --git a/RDDtools/man/covarTest_mean.Rd b/man/covarTest_mean.Rd similarity index 58% rename from RDDtools/man/covarTest_mean.Rd rename to man/covarTest_mean.Rd index 983b84a..84dcba4 100644 --- a/RDDtools/man/covarTest_mean.Rd +++ b/man/covarTest_mean.Rd @@ -1,24 +1,37 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/covarTests.R \name{covarTest_mean} \alias{covarTest_mean} -\alias{covarTest_mean.RDDdata} -\alias{covarTest_mean.RDDreg} +\alias{covarTest_mean.rdd_data} +\alias{covarTest_mean.rdd_reg} \title{Testing for balanced covariates: equality of means with t-test} \usage{ -covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE, - p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", - "bonferroni")) +covarTest_mean( + object, + bw = NULL, + paired = FALSE, + var.equal = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) -\method{covarTest_mean}{RDDdata}(object, bw = NULL, paired = FALSE, - var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg", - "hommel", "bonferroni")) +\method{covarTest_mean}{rdd_data}( + object, + bw = NULL, + paired = FALSE, + var.equal = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) -\method{covarTest_mean}{RDDreg}(object, bw = NULL, paired = FALSE, - var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg", - "hommel", "bonferroni")) +\method{covarTest_mean}{rdd_reg}( + object, + bw = NULL, + paired = FALSE, + var.equal = FALSE, + p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", "bonferroni") +) } \arguments{ -\item{object}{object of class RDDdata} +\item{object}{object of class rdd_data} \item{bw}{a bandwidth} @@ -27,8 +40,6 @@ covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE, \item{var.equal}{Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal} \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function} - -\item{\ldots}{currently not used} } \value{ A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. @@ -37,30 +48,29 @@ A data frame with, for each covariate, the mean on each size, the difference, t- Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold } \examples{ -data(Lee2008) +data(house) ## Add randomly generated covariates set.seed(123) -n_Lee <- nrow(Lee2008) -Z <- data.frame(z1 = rnorm(n_Lee, sd=2), - z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), +n_Lee <- nrow(house) +Z <- data.frame(z1 = rnorm(n_Lee, sd=2), + z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)), z3 = sample(letters, size = n_Lee, replace = TRUE)) -Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) +house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0) ## test for equality of means around cutoff: -covarTest_mean(Lee2008_rdd_Z, bw=0.3) +covarTest_mean(house_rdd_Z, bw=0.3) ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: -covarTest_dis(Lee2008_rdd_Z, bw=0.3) +covarTest_dis(house_rdd_Z, bw=0.3) ## covarTest_mean works also on regression outputs (bw will be taken from the model) -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) +reg_nonpara <- rdd_reg_np(rdd_object=house_rdd_Z) covarTest_mean(reg_nonpara) } -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> -} \seealso{ \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution } - +\author{ +Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> +} diff --git a/RDDtools/man/dens_test.Rd b/man/dens_test.Rd similarity index 62% rename from RDDtools/man/dens_test.Rd rename to man/dens_test.Rd index 027ccc8..4537fb5 100644 --- a/RDDtools/man/dens_test.Rd +++ b/man/dens_test.Rd @@ -1,12 +1,13 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dens_test.R \name{dens_test} \alias{dens_test} \title{Run the McCracy test for manipulation of the forcing variable} \usage{ -dens_test(RDDobject, bin = NULL, bw = NULL, plot = TRUE, ...) +dens_test(rdd_object, bin = NULL, bw = NULL, plot = TRUE, ...) } \arguments{ -\item{RDDobject}{object of class RDDdata} +\item{rdd_object}{object of class rdd_data} \item{bin}{Argument of the \code{\link{DCdensity}} function, the binwidth} @@ -17,12 +18,10 @@ dens_test(RDDobject, bin = NULL, bw = NULL, plot = TRUE, ...) \item{\ldots}{Further arguments passed to \code{\link[rdd]{DCdensity}}.} } \description{ -Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{RDDobject}. +Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{rdd_object}. } \examples{ -library(RDDtools) -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -dens_test(Lee2008_rdd) +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +dens_test(house_rdd) } - diff --git a/RDDtools/man/gen_MC_IK.Rd b/man/gen_mc_ik.Rd similarity index 55% rename from RDDtools/man/gen_MC_IK.Rd rename to man/gen_mc_ik.Rd index 74af276..70826f4 100644 --- a/RDDtools/man/gen_MC_IK.Rd +++ b/man/gen_mc_ik.Rd @@ -1,10 +1,16 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{gen_MC_IK} -\alias{gen_MC_IK} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gen_mc_ik.R +\name{gen_mc_ik} +\alias{gen_mc_ik} \title{Generate Monte Carlo simulations of Imbens and Kalyanaraman} \usage{ -gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame", - "RDDdata"), size) +gen_mc_ik( + n = 200, + version = 1, + sd = 0.1295, + output = c("data.frame", "rdd_data"), + size +) } \arguments{ \item{n}{The size of sampel to generate} @@ -13,9 +19,9 @@ gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame", \item{sd}{The standard deviation of the error term.} -\item{output}{Whether to return a data-frame, or already a RDDdata} +\item{output}{Whether to return a data-frame, or already a rdd_data} -\item{size}{The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1} +\item{size}{The size of the effect, this depends on the specific version, defaults are as in ik: 0.04, NULL, 0.1, 0.1} } \value{ An data frame with x and y variables. @@ -24,19 +30,19 @@ An data frame with x and y variables. Generate the simulations reported in Imbens and Kalyanaraman (2012) } \examples{ -MC1_dat <- gen_MC_IK() -MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) +mc1_dat <- gen_mc_ik() +MC1_rdd <- rdd_data(y=mc1_dat$y, x=mc1_dat$x, cutpoint=0) ## Use np regression: -reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd) +reg_nonpara <- rdd_reg_np(rdd_object=MC1_rdd) reg_nonpara # Represent the curves: plotCu <- function(version=1, xlim=c(-0.1,0.1)){ - res <- gen_MC_IK(sd=0.0000001, n=1000, version=version) + res <- gen_mc_ik(sd=0.0000001, n=1000, version=version) res <- res[order(res$x),] - ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y")) - plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version)) + ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), 'y')) + plot(res, type='l', xlim=xlim, ylim=ylim, main=paste('DGP', version)) abline(v=0) xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),] points(xCut, col=2) @@ -48,7 +54,3 @@ plotCu(version=3) plotCu(version=4) layout(matrix(1)) } -\references{ -TODO -} - diff --git a/man/house.Rd b/man/house.Rd new file mode 100644 index 0000000..cdbe867 --- /dev/null +++ b/man/house.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rddtools.R +\docType{data} +\name{house} +\alias{house} +\title{Dataset used in Lee (2008)} +\format{ +A data frame with 6558 observations and two variables: +\describe{ +\item{x}{Vote at election t-1} +\item{y}{Vote at election t} +} +} +\source{ +Guido Imbens webpage: \url{https://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} +} +\description{ +Randomized experiments from non-random selection in U.S. House elections + +Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008), +} +\examples{ +data(house) +rdd_house <- rdd_data(x=x, y=y, data=house, cutpoint=0) +summary(rdd_house) +plot(rdd_house) +} +\references{ +Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,' +Review of Economic Studies (2012) 79, 933-959 + +Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, +\emph{Journal of Econometrics}, 142, 675-697 +} diff --git a/man/indh.Rd b/man/indh.Rd new file mode 100644 index 0000000..5b568a6 --- /dev/null +++ b/man/indh.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rddtools.R +\docType{data} +\name{indh} +\alias{indh} +\title{INDH data set} +\format{ +A data frame with two variables with 720 observations each +} +\description{ +Data from the Initiative Nationale du Development Humaine, collected as the part of the SNSF project "Development Aid and Social Dynamics" +} +\examples{ +# load the data +data(indh) + +# construct rdd_data frame +rdd_dat_indh <- rdd_data(y=choice_pg, x=poverty, data=indh, cutpoint=30) + +# inspect data frame +summary(rdd_dat_indh) + +# perform non-parametric regression +( reg_np_indh <- rdd_reg_np(rdd_dat_indh) ) +} +\references{ +Arcand, Rieger, and Nguyen (2015) 'Development Aid and Social Dyanmics Data Set' +} diff --git a/RDDtools/man/plot.RDDdata.Rd b/man/plot.rdd_data.Rd similarity index 63% rename from RDDtools/man/plot.RDDdata.Rd rename to man/plot.rdd_data.Rd index a684d24..155a8cd 100644 --- a/RDDtools/man/plot.RDDdata.Rd +++ b/man/plot.rdd_data.Rd @@ -1,13 +1,22 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{plot.RDDdata} -\alias{plot.RDDdata} -\title{Plot RDDdata} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rdd_data_methods.R +\name{plot.rdd_data} +\alias{plot.rdd_data} +\title{Plot rdd_data} \usage{ -\method{plot}{RDDdata}(x, h, nbins = NULL, xlim = range(object$x, na.rm = - TRUE), cex = 0.7, nplot = 1, device = c("base", "ggplot"), ...) +\method{plot}{rdd_data}( + x, + h = NULL, + nbins = NULL, + xlim = range(object$x, na.rm = TRUE), + cex = 0.7, + nplot = 1, + device = c("base", "ggplot"), + ... +) } \arguments{ -\item{x}{Object of class RDDdata} +\item{x}{Object of class rdd_data} \item{h}{The binwidth parameter (note this differs from the bandwidth parameter!)} @@ -32,27 +41,26 @@ Binned plot of the forcing and outcome variable \details{ Produces a simple binned plot averaging values within each interval. The length of the intervals is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth -argument, that gives half of the length of the kernel window. -When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}. +argument, that gives half of the length of the kernel window. +When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{rdd_bw_rsw}}. } \examples{ -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -plot(Lee2008_rdd) +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +plot(house_rdd) ## Specify manually the bandwidth: -plot(Lee2008_rdd, h=0.2) +plot(house_rdd, h=0.2) ## Show three plots with different bandwidth: -plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) +plot(house_rdd, h=c(0.2,0.3,0.4), nplot=3) ## Specify instead of the bandwidth, the final number of bins: -plot(Lee2008_rdd, nbins=22) +plot(house_rdd, nbins=22) ## If the specified number of bins is odd, the larger number is given to side with largest range -plot(Lee2008_rdd, nbins=21) +plot(house_rdd, nbins=21) } \author{ Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> } - diff --git a/RDDtools/man/plotBin.Rd b/man/plotBin.Rd similarity index 53% rename from RDDtools/man/plotBin.Rd rename to man/plotBin.Rd index 24ae2c1..7f911c6 100644 --- a/RDDtools/man/plotBin.Rd +++ b/man/plotBin.Rd @@ -1,11 +1,24 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotBin.R \name{plotBin} \alias{plotBin} \title{Bin plotting} \usage{ -plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE, - type = c("value", "number"), xlim = range(x, na.rm = TRUE), cex = 0.9, - main = NULL, xlab, ylab, ...) +plotBin( + x, + y, + h = NULL, + nbins = NULL, + cutpoint = 0, + plot = TRUE, + type = c("value", "number"), + xlim = range(x, na.rm = TRUE), + cex = 0.9, + main = NULL, + xlab, + ylab, + ... +) } \arguments{ \item{x}{Forcing variable} @@ -14,13 +27,15 @@ plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE, \item{h}{the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})} +\item{nbins}{number of Bins} + \item{cutpoint}{Cutpoint} \item{plot}{Logical. Whether to plot or only returned silently} \item{type}{Whether returns the y averages, or the x frequencies} -\item{xlim,cex,main,xlab,ylab}{Usual parameters passed to plot(), see \code{\link{par}}} +\item{xlim, cex, main, xlab, ylab}{Usual parameters passed to plot(), see \code{\link{par}}} \item{\ldots}{further arguments passed to plot.} } @@ -28,13 +43,8 @@ plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE, Returns silently values } \description{ -Do a "scatterplot bin smoothing" -} -\author{ -Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> +Do a 'scatterplot bin smoothing' } \references{ McCrary, Justin. } -\keyword{internal} - diff --git a/RDDtools/man/plotPlacebo.Rd b/man/plotPlacebo.Rd similarity index 66% rename from RDDtools/man/plotPlacebo.Rd rename to man/plotPlacebo.Rd index f1f58ca..140db51 100644 --- a/RDDtools/man/plotPlacebo.Rd +++ b/man/plotPlacebo.Rd @@ -1,40 +1,58 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/placebo.R \name{plotPlacebo} -\alias{computePlacebo} \alias{plotPlacebo} -\alias{plotPlacebo.RDDreg} +\alias{plotPlacebo.rdd_reg} \alias{plotPlaceboDens} -\alias{plotPlaceboDens.RDDreg} +\alias{plotPlaceboDens.rdd_reg} +\alias{computePlacebo} \title{Draw a (density) plot of placebo tests} \usage{ plotPlacebo(object, device = c("ggplot", "base"), ...) -\method{plotPlacebo}{RDDreg}(object, device = c("ggplot", "base"), - from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, - vcov. = NULL, plot = TRUE, output = c("data", "ggplot"), ...) +\method{plotPlacebo}{rdd_reg}( + object, + device = c("ggplot", "base"), + from = 0.25, + to = 0.75, + by = 0.1, + level = 0.95, + same_bw = FALSE, + vcov. = NULL, + plot = TRUE, + output = c("data", "ggplot"), + ... +) plotPlaceboDens(object, device = c("ggplot", "base"), ...) -\method{plotPlaceboDens}{RDDreg}(object, device = c("ggplot", "base"), - from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, - vcov. = NULL, ...) - -computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95, - same_bw = FALSE, vcov. = NULL) +\method{plotPlaceboDens}{rdd_reg}( + object, + device = c("ggplot", "base"), + from = 0.25, + to = 0.75, + by = 0.1, + level = 0.95, + same_bw = FALSE, + vcov. = NULL, + ... +) + +computePlacebo( + object, + from = 0.25, + to = 0.75, + by = 0.1, + level = 0.95, + same_bw = FALSE, + vcov. = NULL +) } \arguments{ \item{object}{the output of an RDD regression} \item{device}{Whether to draw a base or a ggplot graph.} -\item{\ldots}{Further arguments passed to specific methods.} - -\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.} - -\item{plot}{Whether to actually plot the data.} - -\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} - \item{from}{Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint} \item{to}{Ending point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint} @@ -44,6 +62,14 @@ computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95, \item{level}{Level of the confidence interval shown} \item{same_bw}{Whether to re-estimate the bandwidth at each point} + +\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.} + +\item{plot}{Whether to actually plot the data.} + +\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} + +\item{\ldots}{Further arguments passed to specific methods.} } \value{ A data frame containing the cutpoints, their corresponding estimates and confidence intervals. @@ -52,13 +78,13 @@ A data frame containing the cutpoints, their corresponding estimates and confide Draw a plot of placebo tests, estimating the impact on fake cutpoints } \examples{ -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) -reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +reg_nonpara <- rdd_reg_np(rdd_object=house_rdd) plotPlacebo(reg_nonpara) # Use with another vcov function; cluster case -reg_nonpara_lminf <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") +reg_nonpara_lminf <- rdd_reg_np(rdd_object=house_rdd, inference='lm') # need to be a function applied to updated object! vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x) plotPlacebo(reg_nonpara_lminf, vcov. = vc) @@ -66,4 +92,3 @@ plotPlacebo(reg_nonpara_lminf, vcov. = vc) \author{ Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> } - diff --git a/man/plotSensi.Rd b/man/plotSensi.Rd new file mode 100644 index 0000000..da793f1 --- /dev/null +++ b/man/plotSensi.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotSensi.R +\name{plotSensi} +\alias{plotSensi} +\alias{plotSensi.rdd_reg_np} +\alias{plotSensi.rdd_reg_lm} +\title{Plot the sensitivity to the bandwidth} +\usage{ +plotSensi( + rdd_regobject, + from, + to, + by = 0.01, + level = 0.95, + output = c("data", "ggplot"), + plot = TRUE, + ... +) + +\method{plotSensi}{rdd_reg_np}( + rdd_regobject, + from, + to, + by = 0.05, + level = 0.95, + output = c("data", "ggplot"), + plot = TRUE, + device = c("ggplot", "base"), + vcov. = NULL, + ... +) + +\method{plotSensi}{rdd_reg_lm}( + rdd_regobject, + from, + to, + by = 0.05, + level = 0.95, + output = c("data", "ggplot"), + plot = TRUE, + order, + type = c("colour", "facet"), + ... +) +} +\arguments{ +\item{rdd_regobject}{object of a RDD regression, from either \code{\link{rdd_reg_lm}} or \code{\link{rdd_reg_np}}} + +\item{from}{First bandwidth point. Default value is max(1e-3, bw-0.1)} + +\item{to}{Last bandwidth point. Default value is bw+0.1} + +\item{by}{Increments in the \code{from} \code{to} sequence} + +\item{level}{Level of the confidence interval} + +\item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} + +\item{plot}{Whether to actually plot the data.} + +\item{device}{Whether to draw a base or a ggplot graph.} + +\item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}} + +\item{order}{For parametric models (from \code{\link{rdd_reg_lm}}), the order of the polynomial.} + +\item{type}{For parametric models (from \code{\link{rdd_reg_lm}}) whether different orders are represented as different colour or as different facets.} + +\item{\ldots}{Further arguments passed to specific methods} +} +\value{ +A data frame containing the bandwidths and corresponding estimates and confidence intervals. +} +\description{ +Draw a plot showing the LATE estimates depending on multiple bandwidths +} +\examples{ +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) + +#Non-parametric estimate +bw_ik <- rdd_bw_ik(house_rdd) +reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik) +plotSensi(reg_nonpara) +plotSensi(reg_nonpara, device='base') + +#Parametric estimate: +reg_para_ik <- rdd_reg_lm(rdd_object=house_rdd, order=4, bw=bw_ik) +plotSensi(reg_para_ik) +plotSensi(reg_para_ik, type='facet') +} +\author{ +Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> +} diff --git a/man/rdd_bw_cct_estim.Rd b/man/rdd_bw_cct_estim.Rd new file mode 100644 index 0000000..1e22ad1 --- /dev/null +++ b/man/rdd_bw_cct_estim.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bw_cct_estim.R +\name{rdd_bw_cct_estim} +\alias{rdd_bw_cct_estim} +\title{Bandwidth selection for Regression Discontinuity estimators, CTT 2014} +\usage{ +rdd_bw_cct_estim( + rdd_object, + method = c("mserd", "msetwo", "msesum", "msecomb1", "msecomb2", "cerrd", "certwo", + "cersum", "cercomb1"), + kernel = c("Triangular", "Uniform", "Epanechnikov"), + ... +) +} +\arguments{ +\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}} + +\item{method}{The type of method used. See} + +\item{kernel}{The type of kernel used: either \code{Triangular}, \code{Uniform} or \code{Epanechnikov}.} + +\item{\ldots}{further arguments passed to \code{\link[rdrobust]{rdbwselect}}.} +} +\value{ +See documentation of \code{\link[rdrobust]{rdbwselect}} +} +\description{ +Simple wrapper of the Calonico-Cattaneo-Titiunik (2014) bandwidth selection procedures +for RDD estimators \code{\link[rdrobust]{rdbwselect}}. +} +\examples{ +data(house) +rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +rdd_bw_cct_estim(rd) + +} +\references{ +Calonico, S., M. D. Cattaneo, and R. Titiunik. 2014a. Robust Nonparametric Confidence Intervals for Regression-Discontinuity Designs. Econometrica 82(6): 2295-2326. +\url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}. +} +\seealso{ +\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +} +\author{ +Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdbwselect}} +} diff --git a/man/rdd_bw_cct_plot.Rd b/man/rdd_bw_cct_plot.Rd new file mode 100644 index 0000000..5eff1df --- /dev/null +++ b/man/rdd_bw_cct_plot.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bw_cct_plot.R +\name{rdd_bw_cct_plot} +\alias{rdd_bw_cct_plot} +\title{Bandwidth selection for Regression Discontinuity visualisation, CTT 2015} +\usage{ +rdd_bw_cct_plot( + rdd_object, + method = c("esmv", "es", "espr", "esmvpr", "qs", "qspr", "qsmv", "qsmvpr"), + ... +) +} +\arguments{ +\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}} + +\item{method}{The type of method used. See \code{\link[rdrobust]{rdplot}}. +Default is \code{esmv}, the variance mimicking evenly-spaced method.} + +\item{\ldots}{further arguments passed to \code{\link[rdrobust]{rdplot}}.} +} +\value{ +See documentation of \code{\link[rdrobust]{rdplot}} +} +\description{ +Simple wrapper of the Calonico-Cattaneo-Titiunik (2015) bandwidth selection procedures +for RDD visualisation \code{\link[rdrobust]{rdplot}}. +} +\examples{ +data(house) +rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +rdd_bw_cct_plot(rd) + +} +\references{ +Calonico, S., M. D. Cattaneo, and R. Titiunik. 2015a. Optimal Data-Driven Regression Discontinuity Plots. Journal of the American Statistical Association 110(512): 1753-1769. +\url{https://www.tandfonline.com/doi/abs/10.1080/01621459.2015.1017578}. +} +\seealso{ +\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +} +\author{ +Original code written by Calonico, Cattaneo, Farrell and Titiuni, see \code{\link[rdrobust]{rdplot}} +} diff --git a/man/rdd_bw_ik.Rd b/man/rdd_bw_ik.Rd new file mode 100644 index 0000000..3847201 --- /dev/null +++ b/man/rdd_bw_ik.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bw_ik.R +\name{rdd_bw_ik} +\alias{rdd_bw_ik} +\title{Imbens-Kalyanaraman Optimal Bandwidth Calculation} +\usage{ +rdd_bw_ik(rdd_object, kernel = c("Triangular", "Uniform", "Normal")) +} +\arguments{ +\item{rdd_object}{of class rdd_data created by \code{\link{rdd_data}}} + +\item{kernel}{The type of kernel used: either \code{triangular} or \code{uniform}.} +} +\value{ +The optimal bandwidth +} +\description{ +Imbens-Kalyanaraman optimal bandwidth +for local linear regression in Regression discontinuity designs. +} +\examples{ +data(house) +rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +rdd_bw_ik(rd) +} +\references{ +Imbens, Guido and Karthik Kalyanaraman. (2012) 'Optimal Bandwidth Choice for the regression discontinuity estimator,' +Review of Economic Studies (2012) 79, 933-959 +} +\seealso{ +\code{\link{rdd_bw_rsw}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) +} +\author{ +Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> +} diff --git a/RDDtools/man/RDDbw_RSW.Rd b/man/rdd_bw_rsw.Rd similarity index 52% rename from RDDtools/man/RDDbw_RSW.Rd rename to man/rdd_bw_rsw.Rd index 671c63f..feb71f5 100644 --- a/RDDtools/man/RDDbw_RSW.Rd +++ b/man/rdd_bw_rsw.Rd @@ -1,32 +1,32 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{RDDbw_RSW} -\alias{RDDbw_RSW} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bw_rot.R +\name{rdd_bw_rsw} +\alias{rdd_bw_rsw} \title{Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth}} \usage{ -RDDbw_RSW(object, type = c("global", "sided")) +rdd_bw_rsw(object, type = c("global", "sided")) } \arguments{ -\item{object}{object of class RDDdata created by \code{\link{RDDdata}}} +\item{object}{object of class rdd_data created by \code{\link{rdd_data}}} -\item{type}{Whether to choose a global bandwidth for the whole function (\code{global}) +\item{type}{Whether to choose a global bandwidth for the whole function (\code{global}) or for each side (\code{sided})} } \value{ One (or two for \code{sided}) bandwidth value. } \description{ -Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) +Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) either to the whole function, or to the functions below and above the cutpoint. } \examples{ -data(Lee2008) -rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) -RDDbw_RSW(rd) +data(house) +rd<- rdd_data(x=house$x, y=house$y, cutpoint=0) +rdd_bw_rsw(rd) } \references{ See \code{\link[KernSmooth]{dpill}} } \seealso{ -\code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) +\code{\link{rdd_bw_ik}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) } - diff --git a/RDDtools/man/RDDcoef.Rd b/man/rdd_coef.Rd similarity index 57% rename from RDDtools/man/RDDcoef.Rd rename to man/rdd_coef.Rd index ec712a3..e7a0e9c 100644 --- a/RDDtools/man/RDDcoef.Rd +++ b/man/rdd_coef.Rd @@ -1,15 +1,16 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{RDDcoef} -\alias{RDDcoef} -\alias{RDDcoef.RDDreg_np} -\alias{RDDcoef.default} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rdd_coef.R +\name{rdd_coef} +\alias{rdd_coef} +\alias{rdd_coef.default} +\alias{rdd_coef.rdd_reg_np} \title{RDD coefficient} \usage{ -RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...) +rdd_coef(object, allInfo = FALSE, allCo = FALSE, ...) -\method{RDDcoef}{default}(object, allInfo = FALSE, allCo = FALSE, ...) +\method{rdd_coef}{default}(object, allInfo = FALSE, allCo = FALSE, ...) -\method{RDDcoef}{RDDreg_np}(object, allInfo = FALSE, allCo = FALSE, ...) +\method{rdd_coef}{rdd_reg_np}(object, allInfo = FALSE, allCo = FALSE, ...) } \arguments{ \item{object}{A RDD regression object} @@ -21,10 +22,9 @@ RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...) \item{\ldots}{Further arguments passed to/from specific methods} } \value{ -Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, +Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, its standard value, t test and p-value and } \description{ Function to access the RDD coefficient in the various regressions } - diff --git a/man/rdd_data.Rd b/man/rdd_data.Rd new file mode 100644 index 0000000..03b1855 --- /dev/null +++ b/man/rdd_data.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rdd_data.R +\name{rdd_data} +\alias{rdd_data} +\title{Construct rdd_data} +\usage{ +rdd_data(y, x, covar, cutpoint, z, labels, data) +} +\arguments{ +\item{y}{Output} + +\item{x}{Forcing variable} + +\item{covar}{Exogeneous variables} + +\item{cutpoint}{Cutpoint} + +\item{z}{Assignment variable for the fuzzy case. Should be 0/1 or TRUE/FALSE variable.} + +\item{labels}{Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently.} + +\item{data}{A data-frame for the \code{x} and \code{y} variables. If this is provided, +the column names can be entered directly for argument \code{x}, \code{y} and \code{covar}. +For \code{covar}, should be a character vector.} +} +\value{ +Object of class \code{rdd_data}, inheriting from \code{data.frame} +} +\description{ +Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. +} +\details{ +Arguments \code{x}, \code{y} (and eventually \code{covar}) can be either given as: +\itemize{ +\item vectors (eventually data-frame for \code{covar}) +\item quote/character when \code{data} is also provided. For multiple \code{covar}, use a vector of characters +} +} +\examples{ +data(house) +rd <- rdd_data(x=house$x, y=house$y, cutpoint=0) +rd2 <- rdd_data(x=x, y=y, data=house, cutpoint=0) + +# The print() function is the same as the print.data.frame: +rd + +# The summary() and plot() function are specific to rdd_data +summary(rd) +plot(rd) + +# for the fuzzy case, you need to specify the assignment variable z: +rd_dat_fakefuzzy <- rdd_data(x=house$x, y=house$y, + z=ifelse(house$x>0+rnorm(nrow(house), sd=0.05),1,0), + cutpoint=0) +summary(rd_dat_fakefuzzy) +} +\author{ +Matthieu Stigler \email{Matthieu.Stigler@gmail.com} +} diff --git a/RDDtools/man/RDDgenreg.Rd b/man/rdd_gen_reg.Rd similarity index 64% rename from RDDtools/man/RDDgenreg.Rd rename to man/rdd_gen_reg.Rd index 6c3750e..c396f77 100644 --- a/RDDtools/man/RDDgenreg.Rd +++ b/man/rdd_gen_reg.Rd @@ -1,14 +1,26 @@ -% Generated by roxygen2 (4.0.1): do not edit by hand -\name{RDDgenreg} -\alias{RDDgenreg} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reg_gen.R +\name{rdd_gen_reg} +\alias{rdd_gen_reg} \title{General polynomial estimator of the regression discontinuity} \usage{ -RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL, - slope = c("separate", "same"), covar.opt = list(strategy = c("include", - "residual"), slope = c("same", "separate"), bw = NULL), weights, ...) +rdd_gen_reg( + rdd_object, + fun = glm, + covariates = NULL, + order = 1, + bw = NULL, + slope = c("separate", "same"), + covar.opt = list(strategy = c("include", "residual"), slope = c("same", "separate"), bw + = NULL), + weights, + ... +) } \arguments{ -\item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}} +\item{rdd_object}{Object of class rdd_data created by \code{\link{rdd_data}}} + +\item{fun}{The function to estimate the parameters} \item{covariates}{Formula to include covariates} @@ -16,46 +28,44 @@ RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL, \item{bw}{A bandwidth to specify the subset on which the kernel weighted regression is estimated} -\item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}} - \item{slope}{Whether slopes should be different on left or right (separate), or the same.} \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).} -\item{fun}{The function to estimate the parameters} +\item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}} \item{\ldots}{Further arguments passed to fun. See the example.} } \value{ -An object of class RDDreg_lm and class lm, with specific print and plot methods +An object of class rdd_reg_lm and class lm, with specific print and plot methods } \description{ Compute RDD estimate allowing a locally kernel weighted version of any estimation function possibly on the range specified by bandwidth } \details{ -This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. +This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. It is assumed that the custom funciton has following behaviour: \enumerate{ \item A formula interface, together with a \code{data} argument \item A \code{weight} argument \item A coef(summary(x)) returning a data-frame containing a column Estimate } -Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}} +Note that for the last requirement, this can be accomodated by writing a specific \code{\link{rdd_coef}} function for the class of the object returned by \code{fun}. } \examples{ ## Step 0: prepare data -data(Lee2008) -Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) +data(house) +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) ## Estimate a local probit: -Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y + %\VignetteIndexEntry{Morocco} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set(collapse = T, comment = "#>") +``` + +we use the data from the Initiative Nationale du Development Humaine (INDH) a development project in Morocco. +The data is included with the `rddtools` package under the name `indh`. + +We start by loading the package and the dataset. + +```{r, message=FALSE} +library(rddtools) +data("indh") +``` + +Now that we have loading the data we can briefly inspect the structure of the data + +```{r} +str(indh) +``` + +The `indh` object is a `data.frame` containing 720 observations (representing individuals) of two variables: + +- `choice_pg` +- `poverty` + +The variable of interest is `choice_pg`, which represent the decision to contibute to a public good or not. +The observations are individuals choosing to contribute or not, these individuals are clustered by the variable `poverty` which is the municiple structure at which funding was distributed as part of the INDH project. +The forcing variable is `poverty` which represents the number of households in a commune living below the poverty threshold. +As part of the INDH, commune with a proportion of household below the poverty threshhold greater than 30% were allowed to distribute the funding using a **Community Driven Development** scheme. +The cutoff point for our analysis is therefore `30`. + +We can now transform the `data.frame` to a special `rdd_data` `data.frame` using the `rdd_data()` function. + +```{r} +rdd_dat_indh <- rdd_data(y=choice_pg, + x=poverty, + data=indh, + cutpoint=30 ) +``` + +The structure is similar but contains some additional information. + +```{r} +str(rdd_dat_indh) +``` + +In order to best understand our data, we start with an exploratory data analysis using tables... + +```{r} +summary(rdd_dat_indh) +``` + +...and plots. + +```{r} +plot(rdd_dat_indh[1:715,]) +``` + +We can now continue with a standard Regression Discontinuity Design (RDD) estimation. + +```{r} +(reg_para <- rdd_reg_lm(rdd_dat_indh, order=4)) +``` + +In addition to the parametric estimation, we can also perform a non-parametric estimation. + +```{r} +bw_ik <- rdd_bw_ik(rdd_dat_indh) +(reg_nonpara <- rdd_reg_np(rdd_object=rdd_dat_indh, bw=bw_ik)) +``` + +Sensitity tests. + +```{r} +plotSensi(reg_nonpara, from=0.05, to=1, by=0.1) +``` diff --git a/vignettes/rddtools.Rmd b/vignettes/rddtools.Rmd new file mode 100644 index 0000000..02c1126 --- /dev/null +++ b/vignettes/rddtools.Rmd @@ -0,0 +1,109 @@ +--- +title: "rddtools" +author: "Matthieu Stigler" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{rddtools} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set(collapse = T, comment = "#>") +``` + +**RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *rdd_data* object, on which different anaylsis tools can be applied. + +# Data Preparation and Visualisation +Load the package, and load the built-in dataset from [Lee 2008]: + +```{r} +library(rddtools) +data(house) +``` + +Declare the data to be a *rdd_data* object: + +```{r} +house_rdd <- rdd_data(y=house$y, x=house$x, cutpoint=0) +``` + + +You can now directly summarise and visualise this data: + +```{r dataPlot} +summary(house_rdd) +plot(house_rdd) +``` + + +# Parametric Estimation + +Estimate parametrically, by fitting a 4th order polynomial. + +```{r reg_para} +reg_para <- rdd_reg_lm(rdd_object=house_rdd, order=4) +reg_para + +plot(reg_para) +``` + + +# Non-parametric Estimation + +Run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth. + +```{r RegPlot} +bw_ik <- rdd_bw_ik(house_rdd) +reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik) +print(reg_nonpara) +``` + +# Regression Sensitivity tests: + +One can easily check the sensitivity of the estimate to different bandwidths: +```{r SensiPlot} +plotSensi(reg_nonpara, from=0.05, to=1, by=0.1) +``` + +Or run the Placebo test, estimating the RDD effect based on fake cutpoints: +```{r placeboPlot} +plotPlacebo(reg_nonpara) +``` + +# Design Sensitivity tests: + +Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available: + ++ Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()** ++ Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available: + +## Discontinuity comes from manipulation: McCrary test + +use simply the function **dens_test()**, on either the raw data, or the regression output: +```{r DensPlot} +dens_test(reg_nonpara) +``` + +## Discontinuity comes from covariates: covariates balance tests + +Two tests available: ++ equal means of covariates: **covarTest_mean()** ++ equal density of covariates: **covarTest_dens()** + + +We need here to simulate some data, given that the Lee (2008) dataset contains no covariates. +We here simulate three variables, with the second having a different mean on the left and the right. + +```{r} +set.seed(123) +n_Lee <- nrow(house) +Z <- data.frame(z1 = rnorm(n_Lee, sd=2), + z2 = rnorm(n_Lee, mean = ifelse(house<0, 5, 8)), + z3 = sample(letters, size = n_Lee, replace = TRUE)) +house_rdd_Z <- rdd_data(y = house$y, x = house$x, covar = Z, cutpoint = 0) +``` + +Tests correctly reject equality of the second, and correctly do not reject equality for the first and third.