diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index f4d1d22..a95f2f6 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -23,9 +23,14 @@ /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-fastshap-vs-other-pkgs.R="F70E1DED" /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-genOMat.R="54612D12" /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-shapviz.R="29D8DEE8" +/Users/bgreenwell/Dropbox/devel/fastshap/man/bin.Rd="869889FE" +/Users/bgreenwell/Dropbox/devel/fastshap/revdep/cran.md="FA0CBEF5" +/Users/bgreenwell/Dropbox/devel/fastshap/revdep/failures.md="54C2BBDB" +/Users/bgreenwell/Dropbox/devel/fastshap/revdep/problems.md="EC41BDD2" /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/fastshap-genOMat.cpp="99FEC81E" /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-benchmark.R="29ADFB84" /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-parallel.R="7B058F98" +/Users/bgreenwell/Dropbox/devel/fastshap/src/Makevars="2B6CF773" /Users/bgreenwell/Dropbox/devel/fastshap/src/Makevars.win="0A0149DE" /Users/bgreenwell/Dropbox/devel/fastshap/src/RcppExports.cpp="394C142D" /Users/bgreenwell/Dropbox/devel/fastshap/vignettes/fastshap.Rmd="536A2979" diff --git a/DESCRIPTION b/DESCRIPTION index 784ab37..1066bbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: fastshap Type: Package Title: Fast Approximate Shapley Values -Version: 0.1.0 +Version: 0.1.1 Authors@R: person("Brandon", "Greenwell", email = "greenwell.brandon@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8120-0084")) Description: Computes fast (relative to other implementations) approximate @@ -23,6 +23,7 @@ Enhances: Suggests: AmesHousing, covr, + earth, knitr, ranger, rmarkdown, diff --git a/NEWS.md b/NEWS.md index 1daf5d4..64f311f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# fastshap 0.1.1 + +## Changed + +* This NEWS file now follows the [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) format. + +## Fixed + +* Removed an unnecessary `.Rd` file to satisfy CRAN policies. +* Fixed a couple of outdated URLs. +* Added [earth](https://CRAN.R-project.org/package=earth) to the list of suggested packages since it's referenced a couple of times in the package documentation. + # fastshap 0.1.0 ## Breaking changes @@ -70,9 +82,9 @@ features. ## New features * The default method of `explain()` gained a new logical argument called `adjust`. When `adjust = TRUE` (and `nsim > 1`), the algorithm will adjust the sum of the estimated Shapley values to satisfy the *efficiency property*; that is, to equal the difference between the model's prediction for that sample and the average prediction over all the training data. This option is experimental and we follow the same approach as in -[shap](https://github.com/slundberg/shap) [(#6)](https://github.com/bgreenwell/fastshap/issues/6). +[shap](https://github.com/shap/shap) [(#6)](https://github.com/bgreenwell/fastshap/issues/6). -* New (experimental) function for constructing [force plots](https://github.com/slundberg/shap) [(#7)](https://github.com/bgreenwell/fastshap/issues/7) to help visualize prediction explanations. The function is also a generic which means additional methods can be added. +* New (experimental) function for constructing [force plots](https://github.com/shap/shap) [(#7)](https://github.com/bgreenwell/fastshap/issues/7) to help visualize prediction explanations. The function is also a generic which means additional methods can be added. * Function `explain()` became a generic and gained a new logical argument, `exact`, for computing exact Shapley contributions for linear models (Linear SHAP, which assumes independent features) and boosted decision trees (Tree SHAP). Currently, only `"lm"`, `"glm"`, and `"xgb.Booster"` objects are supported [(#2)](https://github.com/bgreenwell/fastshap/issues/2)[(#3)](https://github.com/bgreenwell/fastshap/issues/3). diff --git a/R/explain.R b/R/explain.R index 9c2af21..577dd5a 100644 --- a/R/explain.R +++ b/R/explain.R @@ -6,7 +6,7 @@ #' @importFrom foreach foreach %do% %dopar% #' @importFrom stats var explain_column <- function(object, X, column, pred_wrapper, newdata = NULL) { - + # Check types if (!is.null(newdata) && !identical(class(X), class(newdata))) { stop("Arguments `X` and `newdata` do not inherit from the same class: ", @@ -191,7 +191,7 @@ explain_column <- function(object, X, column, pred_wrapper, newdata = NULL) { #' [stats::lm()] or [stats::glm()] object) assumes that the #' input features are independent. Also, setting `adjust = TRUE` is #' experimental and we follow the same approach as in -#' [shap](https://github.com/slundberg/shap). +#' [shap](https://github.com/shap/shap). #' #' @references #' Strumbelj, E., and Igor K. (2014). Explaining prediction models and diff --git a/R/gen_friedman.R b/R/gen_friedman.R index a6f951c..77df673 100644 --- a/R/gen_friedman.R +++ b/R/gen_friedman.R @@ -7,6 +7,8 @@ #' @param Integer specifying the number of bins to split `x` into. #' #' @keywords internal +#' +#' @noRd bin <- function(x, n_bins) { quantiles <- stats::quantile(x, probs = seq(from = 0, to = 1, length = n_bins + 1)) diff --git a/R/print.R b/R/print.R deleted file mode 100644 index baa5504..0000000 --- a/R/print.R +++ /dev/null @@ -1,10 +0,0 @@ -#' #' @keywords internal -#' #' -#' #' @export -#' print.explain <- function(x, ...) { -#' if (is.matrix(x)) { -#' x <- data.matrix(as.data.frame(x)) -#' } -#' print(x) -#' invisible(x) -#' } diff --git a/man/bin.Rd b/man/bin.Rd deleted file mode 100644 index c46807b..0000000 --- a/man/bin.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gen_friedman.R -\name{bin} -\alias{bin} -\title{Bin a numeric vector} -\usage{ -bin(x, n_bins) -} -\arguments{ -\item{x}{A numeric vector.} - -\item{Integer}{specifying the number of bins to split \code{x} into.} -} -\description{ -Function to bin a numeric vector -} -\keyword{internal} diff --git a/man/explain.Rd b/man/explain.Rd index 7772b1f..c38d898 100644 --- a/man/explain.Rd +++ b/man/explain.Rd @@ -176,7 +176,7 @@ Setting \code{exact = TRUE} with a linear model (i.e., an \code{\link[stats:lm]{stats::lm()}} or \code{\link[stats:glm]{stats::glm()}} object) assumes that the input features are independent. Also, setting \code{adjust = TRUE} is experimental and we follow the same approach as in -\href{https://github.com/slundberg/shap}{shap}. +\href{https://github.com/shap/shap}{shap}. } \examples{ # diff --git a/slowtests/slowtests-boston_cache/html/py-rf_19cfdea6266670ce27001b32d9083ac3.rdb b/slowtests/slowtests-boston_cache/html/py-rf_19cfdea6266670ce27001b32d9083ac3.rdb new file mode 100644 index 0000000..e69de29 diff --git a/slowtests/slowtests-boston_cache/html/py-setup_c681e02caf3f3488bd640b91fddbf054.rdb b/slowtests/slowtests-boston_cache/html/py-setup_c681e02caf3f3488bd640b91fddbf054.rdb new file mode 100644 index 0000000..e69de29 diff --git a/slowtests/slowtests-boston_cache/html/py-shap_d04b6d6c1ff57939ff5765fc79ef4037.rdb b/slowtests/slowtests-boston_cache/html/py-shap_d04b6d6c1ff57939ff5765fc79ef4037.rdb new file mode 100644 index 0000000..e69de29 diff --git a/slowtests/slowtests-boston_cache/html/r-plot-results_f963b9beb39bc73e027e71f2a3ea4228.rdb b/slowtests/slowtests-boston_cache/html/r-plot-results_f963b9beb39bc73e027e71f2a3ea4228.rdb new file mode 100644 index 0000000..e69de29 diff --git a/src/Makevars b/src/Makevars index dd3fb6d..caea957 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,4 +1,3 @@ -CXX_STD = CXX11 ## PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) ## PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/vignettes/fastshap.Rmd b/vignettes/fastshap.Rmd index bec0489..ad8fa17 100644 --- a/vignettes/fastshap.Rmd +++ b/vignettes/fastshap.Rmd @@ -153,7 +153,7 @@ set.seed(2129) # for reproducibility ## [1] "explain" "matrix" "array" ``` -Note that the MC approach used by [fastshap](https://cran.r-project.org/package=fastshap) (and other packages) will not produce Shapley-based feature contributions that satisfy the [efficiency property](https://christophm.github.io/interpretable-ml-book/shapley.html#the-shapley-value-in-detail); that is, they won't add up to the difference between the corresponding prediction and baseline (i.e., average training prediction). However, borrowing a trick from the popular Python [shap](https://github.com/slundberg/shap) library, we can use a regression-based adjustment to correct the sum. To do this, simply set `adjust = TRUE` in the call to `explain()`^[Note that `nsim` has to be larger than one whenever setting `adjust = TRUE`.]: +Note that the MC approach used by [fastshap](https://cran.r-project.org/package=fastshap) (and other packages) will not produce Shapley-based feature contributions that satisfy the [efficiency property](https://christophm.github.io/interpretable-ml-book/shapley.html#the-shapley-value-in-detail); that is, they won't add up to the difference between the corresponding prediction and baseline (i.e., average training prediction). However, borrowing a trick from the popular Python [shap](https://github.com/shap/shap) library, we can use a regression-based adjustment to correct the sum. To do this, simply set `adjust = TRUE` in the call to `explain()`^[Note that `nsim` has to be larger than one whenever setting `adjust = TRUE`.]: ```r diff --git a/vignettes/fastshap.Rmd.orig b/vignettes/fastshap.Rmd.orig index eb5d77b..21bba30 100644 --- a/vignettes/fastshap.Rmd.orig +++ b/vignettes/fastshap.Rmd.orig @@ -97,7 +97,7 @@ set.seed(2129) # for reproducibility nsim = 1000)) ``` -Note that the MC approach used by [fastshap](https://cran.r-project.org/package=fastshap) (and other packages) will not produce Shapley-based feature contributions that satisfy the [efficiency property](https://christophm.github.io/interpretable-ml-book/shapley.html#the-shapley-value-in-detail); that is, they won't add up to the difference between the corresponding prediction and baseline (i.e., average training prediction). However, borrowing a trick from the popular Python [shap](https://github.com/slundberg/shap) library, we can use a regression-based adjustment to correct the sum. To do this, simply set `adjust = TRUE` in the call to `explain()`^[Note that `nsim` has to be larger than one whenever setting `adjust = TRUE`.]: +Note that the MC approach used by [fastshap](https://cran.r-project.org/package=fastshap) (and other packages) will not produce Shapley-based feature contributions that satisfy the [efficiency property](https://christophm.github.io/interpretable-ml-book/shapley.html#the-shapley-value-in-detail); that is, they won't add up to the difference between the corresponding prediction and baseline (i.e., average training prediction). However, borrowing a trick from the popular Python [shap](https://github.com/shap/shap) library, we can use a regression-based adjustment to correct the sum. To do this, simply set `adjust = TRUE` in the call to `explain()`^[Note that `nsim` has to be larger than one whenever setting `adjust = TRUE`.]: ```{r titanic-explain-jack-adjust} set.seed(2133) # for reproducibility