diff --git a/DESCRIPTION b/DESCRIPTION index 6baa79c9..31f789bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,13 @@ Package: auditor -Title: Model audit - verification, validation, and error analysis +Title: Model Audit - Verification, Validation, and Error Analysis Version: 0.2.0 Authors@R: c( person("Alicja", "Gosiewska", , "alicjagosiewska@gmail.com", role = c("aut", "cre")), person("Przemyslaw", "Biecek", role = c("aut", "ths")) ) -Description: The 'auditor' package provides an easy to use unified interface for creating validation - plots for any model. This visualizations allow to asses and compare the goodness of fit, performance, - and similarity of models. The auditor help statisticians, data scientists, and researchers can avoid - repetitive work consisting of writing code needed to create residuals plots. +Description: Provides an easy to use unified interface for creating validation plots for any model. + The 'auditor' helps to avoid repetitive work consisting of writing code needed to create residual plots. + This visualizations allow to asses and compare the goodness of fit, performance, and similarity of models. Depends: R (>= 3.0.0) License: GPL Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 63284e8e..114016bd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# version 0.2.0 +# version 0.2.0 - released on CRAN ## 07/05.2019 - new plot functions: `plotLift()`, `plotCumulativeGain()`, `plotTwoSidedECDF()`, `plotModelCorrelation()`, `plotResidualDensity()`, `plotModelPCA()`, plotPrediction()`, plotModelRanking()` @@ -10,6 +10,7 @@ - `variable = NULL` parameter in `scoreDW()`, `scoreRuns()`, plotAutocorrelation()`, `plotACF()` causes the residuals to be not sorted by any variable - densities in `plotResidualDensity()` may be now separated by variable values - for function `score()` parameter `score` is renamed into `type` +- new examples # version 0.1.1.0000 ## 09/03/2018 diff --git a/R/audit.R b/R/audit.R index a9694e91..6a622eb3 100644 --- a/R/audit.R +++ b/R/audit.R @@ -18,7 +18,7 @@ #' \item \code{model} the audited model, #' \item \code{fitted.values} fitted values from model, #' \item \code{data} data used for fitting the model, -#' \item \code{y} vecor with values of predicted variable used for fittng the model, +#' \item \code{y} vector with values of predicted variable used for fitting the model, #' \item \code{predict.function} function that were used for model predictions, #' \item \code{residual.function} function that were used for calculating model residuals, #' \item \code{residuals} diff --git a/R/plotACF.R b/R/plotACF.R index 31f74f08..d4b380b3 100644 --- a/R/plotACF.R +++ b/R/plotACF.R @@ -8,6 +8,19 @@ #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' @param alpha Confidence level of the interval. #' +#' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotACF(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotACF(lm_au, rf_au) +#' +#' #' @import ggplot2 #' @importFrom stats qnorm acf #' diff --git a/R/plotAutocorrelation.R b/R/plotAutocorrelation.R index e8a2478a..91f628f1 100644 --- a/R/plotAutocorrelation.R +++ b/R/plotAutocorrelation.R @@ -7,6 +7,12 @@ #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' @param score Logical, if TRUE values of \link{scoreDW} and \link{scoreRuns} will be added to plot. #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotAutocorrelation(lm_au) +#' #' @import ggplot2 #' #' @export diff --git a/R/plotCooksDistance.R b/R/plotCooksDistance.R index a45ca7f9..04af998c 100644 --- a/R/plotCooksDistance.R +++ b/R/plotCooksDistance.R @@ -16,6 +16,14 @@ #' #' For model classes other than lm and glm the distances are computed directly from the definition. #' +#' +#' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotCooksDistance(lm_au) +#' #' @import ggplot2 #' #' @export diff --git a/R/plotCumulativeGain.R b/R/plotCumulativeGain.R index a62f8878..74e19d52 100644 --- a/R/plotCumulativeGain.R +++ b/R/plotCumulativeGain.R @@ -1,6 +1,6 @@ #' @title Cumulative Gain Chart #' -#' @description Cumulative Gain Chartis is a plot of the rate of positive prediction against true positive rate for the different thresholds. +#' @description Cumulative Gain Chart is is a plot of the rate of positive prediction against true positive rate for the different thresholds. #' It is useful for measuring and comparing the accuracy of the classificators. #' @param object An object of class ModelAudit. #' @param ... Other modelAudit objects to be plotted together. @@ -9,6 +9,15 @@ #' #' @seealso \code{\link{plot.modelAudit}} #' +#' @examples +#' library(mlbench) +#' data("PimaIndiansDiabetes") +#' Pima <- PimaIndiansDiabetes +#' Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +#' glm_model <- glm(diabetes~., family=binomial, data=Pima) +#' glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +#' plotCumulativeGain(glm_au) +#' #' @import ggplot2 #' @importFrom ROCR performance prediction #' diff --git a/R/plotHalfNormal.R b/R/plotHalfNormal.R index 25baa993..1fc6be71 100644 --- a/R/plotHalfNormal.R +++ b/R/plotHalfNormal.R @@ -8,7 +8,7 @@ #' #' @param object ModelAudit object, fitted model object or numeric vector. #' @param score If TRUE score based on probability density function is displayed on the plot. -#' @param quant.scale if TRUE values on avis are on quantile scale. +#' @param quant.scale if TRUE values on axis are on quantile scale. #' @param main Title of plot. #' @param xlab The text for the x axis. #' @param ylab The text for the y axis. diff --git a/R/plotLift.R b/R/plotLift.R index a86823d9..0944fec9 100644 --- a/R/plotLift.R +++ b/R/plotLift.R @@ -11,6 +11,15 @@ #' #' @return ggplot object #' +#' @examples +#' library(mlbench) +#' data("PimaIndiansDiabetes") +#' Pima <- PimaIndiansDiabetes +#' Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +#' glm_model <- glm(diabetes~., family=binomial, data=Pima) +#' glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +#' plotLIFT(glm_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotModelCorrelation.R b/R/plotModelCorrelation.R index 0f00204a..d3ff473e 100644 --- a/R/plotModelCorrelation.R +++ b/R/plotModelCorrelation.R @@ -8,6 +8,15 @@ #' #' @return ggplot object #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotModelCorrelation(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotModelPCA.R b/R/plotModelPCA.R index a00d692f..7bcf2aa0 100644 --- a/R/plotModelPCA.R +++ b/R/plotModelPCA.R @@ -5,11 +5,20 @@ #' #' @param object An object of class ModelAudit, #' @param ... Other modelAudit objects to be plotted together. -#' @param scale A logical value indicating whether the models residuals should be scaled bfore the analysis. +#' @param scale A logical value indicating whether the models residuals should be scaled before the analysis. #' @param invisible A text specifying the elements to be hidden on the plot. Default value is "none". Allowed values are "model", "observ". #' #' @return ggplot object #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotModelPCA(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotModelRanking.R b/R/plotModelRanking.R index 3c492a0d..5c83aca2 100644 --- a/R/plotModelRanking.R +++ b/R/plotModelRanking.R @@ -9,6 +9,15 @@ #' #' @return ggplot object #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotModelRanking(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotPrediction.R b/R/plotPrediction.R index de04e17d..444c197b 100644 --- a/R/plotPrediction.R +++ b/R/plotPrediction.R @@ -7,6 +7,17 @@ #' @param ... Other modelAudit objects to be plotted together. #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotPrediction(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotPrediction(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotREC.R b/R/plotREC.R index b3a49188..0f47eef3 100644 --- a/R/plotREC.R +++ b/R/plotREC.R @@ -19,17 +19,15 @@ #' @seealso \code{\link{plot.modelAudit}, \link{plotROC}, \link{plotRROC}} #' #' @examples -#' library(auditor) -#' library(randomForest) #' library(car) -#' model_lm <- lm(prestige ~ education + women + income, data = Prestige) -#' audit_lm <- audit(model_lm) -#' -#' plotREC(audit_lm) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotREC(lm_au) #' -#' model_rf <- randomForest(prestige ~ education + women + income, data = Prestige) -#' audit_rf <- audit(model_rf) -#' plotREC(audit_lm, audit_rf) +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotREC(lm_au, rf_au) #' #' #' @export diff --git a/R/plotROC.R b/R/plotROC.R index d5f5d520..3c2a6948 100644 --- a/R/plotROC.R +++ b/R/plotROC.R @@ -14,17 +14,13 @@ #' @import plotROC #' #' @examples -#' library(auditor) #' library(mlbench) #' data("PimaIndiansDiabetes") -#' -#' model.glm <- glm(diabetes~., family=binomial, data=PimaIndiansDiabetes) -#' au.glm <- audit(model.glm, label="class glm") -#' plotROC(au.glm) -#' -#' model.glm.press <- glm(diabetes~pressure, family=binomial, data=PimaIndiansDiabetes) -#' au.glm.press <- audit(model.glm.press) -#' plotROC(au.glm, au.glm.press) +#' Pima <- PimaIndiansDiabetes +#' Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +#' glm_model <- glm(diabetes~., family=binomial, data=Pima) +#' glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +#' plotROC(glm_au) #' #' @export diff --git a/R/plotRROC.R b/R/plotRROC.R index 7da0aaf6..fe1cabc8 100644 --- a/R/plotRROC.R +++ b/R/plotRROC.R @@ -9,11 +9,11 @@ #' #' @return ggplot object #' -#' @details For RROC curves we use a shift, which is an equvalent to the threshold for ROC curves. +#' @details For RROC curves we use a shift, which is an equivalent to the threshold for ROC curves. #' For each observation we calculate new prediction: \eqn{\hat{y}'=\hat{y}+s} where s is the shift. #' Therefore, there are different error values for each shift: \eqn{e_i = \hat{y_i}' - y_i} #' -#' Over-estimation is caluclates as: \eqn{OVER= \sum(e_i|e_i>0)}. +#' Over-estimation is calculated as: \eqn{OVER= \sum(e_i|e_i>0)}. #' #' Under-estimation is calculated as: \eqn{UNDER = \sum(e_i|e_i<0)}. #' @@ -27,17 +27,15 @@ #' #' #' @examples -#' library(auditor) -#' library(randomForest) #' library(car) -#' model_lm <- lm(prestige ~ education + women + income, data = Prestige) -#' audit_lm <- audit(model_lm) -#' -#' plotRROC(audit_lm) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotRROC(lm_au) #' -#' model_rf <- randomForest(prestige ~ education + women + income, data = Prestige) -#' audit_rf <- audit(model_rf) -#' plotRROC(audit_lm, audit_rf) +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotRROC(lm_au, rf_au) #' #' @import ggplot2 #' diff --git a/R/plotResidual.R b/R/plotResidual.R index 534d0f04..1dfdec98 100644 --- a/R/plotResidual.R +++ b/R/plotResidual.R @@ -6,6 +6,17 @@ #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' @param ... Other modelAudit objects to be plotted together. #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotResidual(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotResidual(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotResidualDensity.R b/R/plotResidualDensity.R index 184ad226..55cc9a06 100644 --- a/R/plotResidualDensity.R +++ b/R/plotResidualDensity.R @@ -8,6 +8,17 @@ #' #' @return ggplot object #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotResidualDensity(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotResidualDensity(lm_au, rf_au) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plotScaleLocation.R b/R/plotScaleLocation.R index 7bc50536..b85c3d56 100644 --- a/R/plotScaleLocation.R +++ b/R/plotScaleLocation.R @@ -8,6 +8,13 @@ #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' @param score A logical value. If TRUE value of \link{scoreGQ} will be added. #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotScaleLocation(lm_au) +#' +#' #' @import ggplot2 #' @importFrom stats median #' diff --git a/R/plotTwoSidedECDF.R b/R/plotTwoSidedECDF.R index cda5cc3d..cc92acc8 100644 --- a/R/plotTwoSidedECDF.R +++ b/R/plotTwoSidedECDF.R @@ -11,6 +11,17 @@ #' #' @return ggplot object #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotTwoSidedECDF(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plotTwoSidedECDF(lm_au, rf_au, y.reversed = TRUE) +#' #' @seealso \code{\link{plot.modelAudit}} #' #' @import ggplot2 diff --git a/R/plot_ModelAudit.R b/R/plot_ModelAudit.R index 1c5f4c8d..15a8491a 100644 --- a/R/plot_ModelAudit.R +++ b/R/plot_ModelAudit.R @@ -3,17 +3,29 @@ #' @description This function provides several diagnostic plots for regression and classification models. #' #' @param x object of class modelAudit -#' @param ... other arguments dependent on the type of plot or additionam objects of class modelAudit +#' @param ... other arguments dependent on the type of plot or additionl objects of class modelAudit #' @param type the type of plot. Possible values: 'ACF', 'Autocorrelation', 'CumulativeGain', 'CooksDistance', 'HalfNormal', 'Residuals', 'LIFT', -#' ModelPCA', 'ModelCorreltion', 'Prediction', 'REC', 'ResidualDensity', 'Residual', 'ROC', 'RROC', -#' ScaleLocation', 'TwoSidedECDF' (for detailed description see functions in seealso section). +#' ModelPCA', 'ModelRanking', ModelCorrelation', 'Prediction', 'REC', 'ResidualDensity', 'Residual', 'ROC', 'RROC', +#' ScaleLocation', 'TwoSidedECDF' (for detailed description see functions in see also section). #' @param ask logical; if TRUE, the user is asked before each plot, see \code{\link[graphics]{par}(ask=)}. #' #' @seealso \code{\link{plotACF}, \link{plotAutocorrelation}, \link{plotCumulativeGain}, \link{plotCooksDistance}, -#' \link{plotHalfNormal}, \link{plotResidual}, \link{plotLIFT}, \link{plotModelPCA}, \link{plotModelCorrelation}, +#' \link{plotHalfNormal}, \link{plotResidual}, \link{plotLIFT}, \link{plotModelPCA}, \link{plotModelRanking}, \link{plotModelCorrelation}, #' \link{plotPrediction}, \link{plotREC}, \link{plotResidualDensity}, \link{plotResidual}, \link{plotROC}, #' \link{plotRROC}, \link{plotScaleLocation}, \link{plotTwoSidedECDF}} #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plot(lm_au) +#' +#' library(randomForest) +#' rf_model <- randomForest(prestige~education + women + income, data = Prestige) +#' rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +#' plot(lm_au, rf_au, type = "ModelRanking") +#' +#' #' @importFrom grDevices devAskNewPage #' @importFrom graphics plot #' @@ -26,7 +38,7 @@ plot.modelAudit <- function(x, ..., type="Residual", ask = TRUE){ object <- x plotNames <- c('ACF', 'Autocorrelation', 'CumulativeGain', 'CooksDistance', 'HalfNormal', 'Residual', 'LIFT', - 'ModelPCA', 'ModelCorrelation', 'Prediction', 'REC', 'ResidualDensity', 'Residuals', 'ROC', 'RROC', + 'ModelPCA', 'ModelRanking', 'ModelCorrelation', 'Prediction', 'REC', 'ResidualDensity', 'Residuals', 'ROC', 'RROC', 'ScaleLocation', 'TwoSidedECDF') if(!all(type %in% plotNames)){ @@ -61,6 +73,7 @@ plotTypePlot <- function(x, ..., type){ HalfNormal = { return(plotHalfNormal(x, ...)) }, LIFT = {return(plotLIFT(x, ...))}, ModelPCA = {return(plotModelPCA(x, ...))}, + ModelRanking = {return(plotModelRanking(x, ...))}, ModelCorrelation = {return(plotModelCorrelation(x, ...))}, Prediction = {return(plotPrediction(x, ...))}, REC = { return(plotREC(x, ...)) }, diff --git a/R/score.R b/R/score.R index ba715d98..335d5773 100644 --- a/R/score.R +++ b/R/score.R @@ -5,13 +5,19 @@ #' #' @param object Object An object of class modelAudit. #' @param type The type of score to be calculated. Possible values: 'Cook', 'DW', 'GQ', 'HalfNormal', 'MAE', 'MSE', 'REC', 'RMSE', 'ROC', 'RROC', 'Runs' -#' (for detailed description see functions in seealso section). +#' (for detailed description see functions in see also section). #' @param ... Other arguments dependent on the type of score. #' #' @seealso \code{\link{scoreCooksDistance}, \link{scoreDW}, \link{scoreGQ}, \link{scoreHalfNormal}, \link{scoreMAE}, \link{scoreMSE}, \link{scoreREC}, \link{scoreROC}, \link{scoreRROC}, \link{scoreRuns}} #' #' @return an object of class scoreAudit, except Cooks distance, where numeric vector is returned #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' score(lm_au, type = 'Runs') +#' #' @export score <- function(object, type = 'MSE', ...){ diff --git a/R/scoreCooksDistance.R b/R/scoreCooksDistance.R index 42872098..0111301a 100644 --- a/R/scoreCooksDistance.R +++ b/R/scoreCooksDistance.R @@ -15,6 +15,13 @@ #' Models of classes other than lm and glm the distances are computed directly from the definition, #' so this may take a while. #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreCooksDistance(lm_au) +#' +#' #' @importFrom stats cooks.distance update #' #' @seealso \code{\link{score}} diff --git a/R/scoreDW.R b/R/scoreDW.R index 144be8a0..2994aead 100644 --- a/R/scoreDW.R +++ b/R/scoreDW.R @@ -1,12 +1,19 @@ #' @title Durbin-Watson Score #' #' @description Score based on Durbin-Watson test statistic. -#' The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +#' The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only #' when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. #' #' @param object object An object of class ModelAudit #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreDW(lm_au) +#' +#' #' @importFrom car durbinWatsonTest #' #' @return an object of class scoreAudit diff --git a/R/scoreGQ.R b/R/scoreGQ.R index b6b94846..1b24a68f 100644 --- a/R/scoreGQ.R +++ b/R/scoreGQ.R @@ -11,13 +11,19 @@ #' where \eqn{MSE = (RSS)/(n-p)} #' where n is the number of observations and p is the number of variables . #' -#' The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +#' The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only #' when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. #' \code{scoreGQ} function uses a two-sided F-test. #' #' @param object Object An object of class ModelAudit. #' @param variable Name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreGQ(lm_au) +#' #' @importFrom stats update rstandard predict pf sd #' #' @return an object of class scoreAudit diff --git a/R/scoreHalfNormal.R b/R/scoreHalfNormal.R index 894a4910..65d14ba2 100644 --- a/R/scoreHalfNormal.R +++ b/R/scoreHalfNormal.R @@ -10,6 +10,13 @@ #' @param object ModelAudit object or fitted model. #' @param ... Extra arguments passed to \link[hnp]{hnp}. #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' plotHalfNormal(lm_au) +#' +#' #' @importFrom hnp hnp #' #' @export diff --git a/R/scoreMAE.R b/R/scoreMAE.R index 741e0777..0ddad656 100644 --- a/R/scoreMAE.R +++ b/R/scoreMAE.R @@ -6,6 +6,13 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreMAE(lm_au) +#' +#' #' @seealso \code{\link{score}} #' #' @export diff --git a/R/scoreMSE.R b/R/scoreMSE.R index 18b4dc4d..2b1b1569 100644 --- a/R/scoreMSE.R +++ b/R/scoreMSE.R @@ -6,6 +6,12 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreMSE(lm_au) +#' #' @seealso \code{\link{score}} #' #' @export diff --git a/R/scoreREC.R b/R/scoreREC.R index cd39fc25..1c8be5d4 100644 --- a/R/scoreREC.R +++ b/R/scoreREC.R @@ -7,6 +7,13 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreREC(lm_au) +#' +#' #' @seealso \code{\link{plotREC}} #' #' @references J. Bi, and K. P. Bennet, "Regression error characteristic curves," in Proc. 20th Int. Conf. Machine Learning, Washington DC, 2003, pp. 43-50 diff --git a/R/scoreRMSE.R b/R/scoreRMSE.R index fcd9a4c1..fb74c03a 100644 --- a/R/scoreRMSE.R +++ b/R/scoreRMSE.R @@ -6,6 +6,13 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreRMSE(lm_au) +#' +#' #' @seealso \code{\link{score}} #' #' @export diff --git a/R/scoreROC.R b/R/scoreROC.R index 9e871e5b..9b097282 100644 --- a/R/scoreROC.R +++ b/R/scoreROC.R @@ -5,6 +5,15 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(mlbench) +#' data("PimaIndiansDiabetes") +#' Pima <- PimaIndiansDiabetes +#' Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +#' glm_model <- glm(diabetes~., family=binomial, data=Pima) +#' glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +#' scoreROC(glm_au) +#' #' @seealso \code{\link{plotROC}} #' #' @importFrom ROCR performance prediction diff --git a/R/scoreRROC.R b/R/scoreRROC.R index 7bff7241..1b988825 100644 --- a/R/scoreRROC.R +++ b/R/scoreRROC.R @@ -6,6 +6,13 @@ #' #' @return an object of class scoreAudit #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreRROC(lm_au) +#' +#' #' @seealso \code{\link{plotRROC}} #' #' @references Hernández-Orallo, José. 2013. ‘ROC Curves for Regression’. Pattern Recognition 46 (12): 3395–3411. diff --git a/R/scoreRuns.R b/R/scoreRuns.R index cca3a682..25506146 100644 --- a/R/scoreRuns.R +++ b/R/scoreRuns.R @@ -1,12 +1,18 @@ #' @title Runs Score #' #' @description Score based on Runs test statistic. Note that this test is not very strong. It utilizes only signs of the residuals. -#' The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +#' The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only #' when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. #' #' @param object object An object of class ModelAudit. #' @param variable name of model variable to order residuals. If value is NULL data order is taken. If value is "Predicted response" or "Fitted values" then data is ordered by fitted values. If value is "Observed response" the data is ordered by a vector of actual response (\code{y} parameter passed to the \code{\link{audit}} function). #' +#' @examples +#' library(car) +#' lm_model <- lm(prestige~education + women + income, data = Prestige) +#' lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +#' scoreRuns(lm_au) +#' #' @importFrom tseries runs.test #' #' @export diff --git a/docs/articles/HalfNormal.html b/docs/articles/HalfNormal.html index 091eab60..bb3baad1 100644 --- a/docs/articles/HalfNormal.html +++ b/docs/articles/HalfNormal.html @@ -73,7 +73,7 @@

The half-normal plots

Alicja Gosiewska

-

2018-05-09

+

2018-05-11

diff --git a/docs/articles/Intorduction_into_model_audit.html b/docs/articles/Intorduction_into_model_audit.html index ce2add69..653fbf5b 100644 --- a/docs/articles/Intorduction_into_model_audit.html +++ b/docs/articles/Intorduction_into_model_audit.html @@ -73,7 +73,7 @@

Introduction into model audit

Alicja Gosiewska

-

2018-05-09

+

2018-05-11

diff --git a/docs/index.html b/docs/index.html index e2a6c661..faec5123 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,7 +5,7 @@ -Model audit - verification, validation, and error analysis • auditor +Model Audit - Verification, Validation, and Error Analysis • auditor @@ -171,6 +171,8 @@

-
+

-version 0.2.0

+version 0.2.0 - released on CRAN 2018-05-11 +

07/05.2019

@@ -128,12 +129,14 @@

  • densities in plotResidualDensity() may be now separated by variable values
  • for function score() parameter score is renamed into type
  • +
  • new examples
  • -version 0.1.1.0000

    +version 0.1.1.0000 Unreleased +

    09/03/2018

    @@ -147,7 +150,8 @@

    -version 0.1.0.0000

    +version 0.1.0.0000 Unreleased +

    18/02/2018

    @@ -165,7 +169,8 @@

    -version 0.0.2.0000

    +version 0.0.2.0000 Unreleased +

    23/11/2017

    @@ -180,7 +185,8 @@

    -version 0.0.1.0000

    +version 0.0.1.0000 Unreleased +

    9/11/2017

    @@ -208,7 +214,7 @@

    Contents

    #> randomForest 4.6-14
    #> Type rfNews() to see new features/changes/bug fixes.
    model.rf <- randomForest(Species ~ ., data=iris) audit.rf <- audit(model.rf)

    diff --git a/docs/reference/plot.modelAudit-1.png b/docs/reference/plot.modelAudit-1.png new file mode 100644 index 00000000..1cc923d3 Binary files /dev/null and b/docs/reference/plot.modelAudit-1.png differ diff --git a/docs/reference/plot.modelAudit-2.png b/docs/reference/plot.modelAudit-2.png new file mode 100644 index 00000000..d91e3e9f Binary files /dev/null and b/docs/reference/plot.modelAudit-2.png differ diff --git a/docs/reference/plotACF-1.png b/docs/reference/plotACF-1.png new file mode 100644 index 00000000..a2897ebf Binary files /dev/null and b/docs/reference/plotACF-1.png differ diff --git a/docs/reference/plotACF-2.png b/docs/reference/plotACF-2.png new file mode 100644 index 00000000..a57e20a5 Binary files /dev/null and b/docs/reference/plotACF-2.png differ diff --git a/docs/reference/plotACF.html b/docs/reference/plotACF.html index 832e4494..f44f4ca4 100644 --- a/docs/reference/plotACF.html +++ b/docs/reference/plotACF.html @@ -125,12 +125,25 @@

    Ar +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotACF(lm_au)
    +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotACF(lm_au, rf_au)
    + +
    diff --git a/docs/reference/plotAutocorrelation-1.png b/docs/reference/plotAutocorrelation-1.png new file mode 100644 index 00000000..2f1724bc Binary files /dev/null and b/docs/reference/plotAutocorrelation-1.png differ diff --git a/docs/reference/plotAutocorrelation.html b/docs/reference/plotAutocorrelation.html index 404937a6..b4c5cc7c 100644 --- a/docs/reference/plotAutocorrelation.html +++ b/docs/reference/plotAutocorrelation.html @@ -121,12 +121,20 @@

    Ar +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotAutocorrelation(lm_au)
    +
    diff --git a/docs/reference/plotCooksDistance-1.png b/docs/reference/plotCooksDistance-1.png new file mode 100644 index 00000000..85923136 Binary files /dev/null and b/docs/reference/plotCooksDistance-1.png differ diff --git a/docs/reference/plotCooksDistance.html b/docs/reference/plotCooksDistance.html index 6cb26f27..9b3ffa45 100644 --- a/docs/reference/plotCooksDistance.html +++ b/docs/reference/plotCooksDistance.html @@ -130,6 +130,12 @@

    Details

    For model classes other than lm and glm the distances are computed directly from the definition.

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotCooksDistance(lm_au)
    +
    diff --git a/docs/reference/plotCumulativeGain-1.png b/docs/reference/plotCumulativeGain-1.png new file mode 100644 index 00000000..5ec7ea0e Binary files /dev/null and b/docs/reference/plotCumulativeGain-1.png differ diff --git a/docs/reference/plotCumulativeGain.html b/docs/reference/plotCumulativeGain.html index 503a9bdb..d9953a67 100644 --- a/docs/reference/plotCumulativeGain.html +++ b/docs/reference/plotCumulativeGain.html @@ -98,7 +98,7 @@

    Cumulative Gain Chart

    -

    Cumulative Gain Chartis is a plot of the rate of positive prediction against true positive rate for the different thresholds. +

    Cumulative Gain Chart is is a plot of the rate of positive prediction against true positive rate for the different thresholds. It is useful for measuring and comparing the accuracy of the classificators.

    @@ -126,6 +126,15 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotCumulativeGain(glm_au)
    +
    diff --git a/docs/reference/plotHalfNormal.html b/docs/reference/plotHalfNormal.html index ac293120..44cc877d 100644 --- a/docs/reference/plotHalfNormal.html +++ b/docs/reference/plotHalfNormal.html @@ -121,7 +121,7 @@

    Ar quant.scale -

    if TRUE values on avis are on quantile scale.

    +

    if TRUE values on axis are on quantile scale.

    xlab diff --git a/docs/reference/plotLIFT-1.png b/docs/reference/plotLIFT-1.png index 75621c83..452f160c 100644 Binary files a/docs/reference/plotLIFT-1.png and b/docs/reference/plotLIFT-1.png differ diff --git a/docs/reference/plotLIFT.html b/docs/reference/plotLIFT.html index 670b9884..f01817dc 100644 --- a/docs/reference/plotLIFT.html +++ b/docs/reference/plotLIFT.html @@ -137,6 +137,15 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotLIFT(glm_au)
    +
    diff --git a/docs/reference/plotModelCorrelation-1.png b/docs/reference/plotModelCorrelation-1.png new file mode 100644 index 00000000..d48a51dd Binary files /dev/null and b/docs/reference/plotModelCorrelation-1.png differ diff --git a/docs/reference/plotModelCorrelation.html b/docs/reference/plotModelCorrelation.html index 6e706194..0627f5f2 100644 --- a/docs/reference/plotModelCorrelation.html +++ b/docs/reference/plotModelCorrelation.html @@ -129,6 +129,15 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelCorrelation(lm_au, rf_au)
    +
    diff --git a/docs/reference/plotModelPCA-1.png b/docs/reference/plotModelPCA-1.png new file mode 100644 index 00000000..883134ef Binary files /dev/null and b/docs/reference/plotModelPCA-1.png differ diff --git a/docs/reference/plotModelPCA.html b/docs/reference/plotModelPCA.html index e4f6fda1..81a2b15d 100644 --- a/docs/reference/plotModelPCA.html +++ b/docs/reference/plotModelPCA.html @@ -117,7 +117,7 @@

    Ar scale -

    A logical value indicating whether the models residuals should be scaled bfore the analysis.

    +

    A logical value indicating whether the models residuals should be scaled before the analysis.

    invisible @@ -134,6 +134,15 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelPCA(lm_au, rf_au)
    +
    diff --git a/docs/reference/plotModelRanking-1.png b/docs/reference/plotModelRanking-1.png new file mode 100644 index 00000000..1e1ea3ce Binary files /dev/null and b/docs/reference/plotModelRanking-1.png differ diff --git a/docs/reference/plotModelRanking.html b/docs/reference/plotModelRanking.html index c7c082b6..4a85ca4c 100644 --- a/docs/reference/plotModelRanking.html +++ b/docs/reference/plotModelRanking.html @@ -134,6 +134,15 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelRanking(lm_au, rf_au)
    +
    diff --git a/docs/reference/plotPrediction-1.png b/docs/reference/plotPrediction-1.png new file mode 100644 index 00000000..a0c23050 Binary files /dev/null and b/docs/reference/plotPrediction-1.png differ diff --git a/docs/reference/plotPrediction-2.png b/docs/reference/plotPrediction-2.png new file mode 100644 index 00000000..92fa0c2b Binary files /dev/null and b/docs/reference/plotPrediction-2.png differ diff --git a/docs/reference/plotPrediction.html b/docs/reference/plotPrediction.html index 89476bd7..5831d3e1 100644 --- a/docs/reference/plotPrediction.html +++ b/docs/reference/plotPrediction.html @@ -125,6 +125,16 @@

    See a

    plot.modelAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotPrediction(lm_au)
    +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotPrediction(lm_au, rf_au)
    +
    diff --git a/docs/reference/plotREC-2.png b/docs/reference/plotREC-2.png index 3ae3a998..f9cd48b8 100644 Binary files a/docs/reference/plotREC-2.png and b/docs/reference/plotREC-2.png differ diff --git a/docs/reference/plotREC.html b/docs/reference/plotREC.html index 61a66f11..ef48636a 100644 --- a/docs/reference/plotREC.html +++ b/docs/reference/plotREC.html @@ -136,16 +136,14 @@

    See a

    Examples

    -
    library(auditor) +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotREC(lm_au)
    library(randomForest) -library(car) -model_lm <- lm(prestige ~ education + women + income, data = Prestige) -audit_lm <- audit(model_lm) - -plotREC(audit_lm)
    -model_rf <- randomForest(prestige ~ education + women + income, data = Prestige) -audit_rf <- audit(model_rf) -plotREC(audit_lm, audit_rf)
    +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotREC(lm_au, rf_au)
    diff --git a/docs/reference/plotROC-1.png b/docs/reference/plotROC-1.png index 6aded5ad..31a45df1 100644 Binary files a/docs/reference/plotROC-1.png and b/docs/reference/plotROC-1.png differ diff --git a/docs/reference/plotROC.html b/docs/reference/plotROC.html index 8e1daa14..cbafab5a 100644 --- a/docs/reference/plotROC.html +++ b/docs/reference/plotROC.html @@ -127,16 +127,13 @@

    See a

    Examples

    -
    library(auditor) -library(mlbench) +
    library(mlbench) data("PimaIndiansDiabetes") - -model.glm <- glm(diabetes~., family=binomial, data=PimaIndiansDiabetes) -au.glm <- audit(model.glm, label="class glm") -plotROC(au.glm)
    -model.glm.press <- glm(diabetes~pressure, family=binomial, data=PimaIndiansDiabetes) -au.glm.press <- audit(model.glm.press) -plotROC(au.glm, au.glm.press)
    +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotROC(glm_au)
    diff --git a/docs/reference/plotResidualDensity-1.png b/docs/reference/plotResidualDensity-1.png new file mode 100644 index 00000000..58f00c06 Binary files /dev/null and b/docs/reference/plotResidualDensity-1.png differ diff --git a/docs/reference/plotResidualDensity-2.png b/docs/reference/plotResidualDensity-2.png new file mode 100644 index 00000000..b2c20a3d Binary files /dev/null and b/docs/reference/plotResidualDensity-2.png differ diff --git a/docs/reference/plotResidualDensity.html b/docs/reference/plotResidualDensity.html index 9b7b5086..3878f8c5 100644 --- a/docs/reference/plotResidualDensity.html +++ b/docs/reference/plotResidualDensity.html @@ -129,6 +129,16 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotResidualDensity(lm_au)
    +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotResidualDensity(lm_au, rf_au)
    +
    diff --git a/docs/reference/plotScaleLocation-1.png b/docs/reference/plotScaleLocation-1.png new file mode 100644 index 00000000..db1ed4bc Binary files /dev/null and b/docs/reference/plotScaleLocation-1.png differ diff --git a/docs/reference/plotScaleLocation.html b/docs/reference/plotScaleLocation.html index 1f94cf81..5bdc96a1 100644 --- a/docs/reference/plotScaleLocation.html +++ b/docs/reference/plotScaleLocation.html @@ -122,12 +122,21 @@

    Ar +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotScaleLocation(lm_au)
    + +
    diff --git a/docs/reference/plotTwoSidedECDF-1.png b/docs/reference/plotTwoSidedECDF-1.png new file mode 100644 index 00000000..e8337252 Binary files /dev/null and b/docs/reference/plotTwoSidedECDF-1.png differ diff --git a/docs/reference/plotTwoSidedECDF-2.png b/docs/reference/plotTwoSidedECDF-2.png new file mode 100644 index 00000000..f0834f43 Binary files /dev/null and b/docs/reference/plotTwoSidedECDF-2.png differ diff --git a/docs/reference/plotTwoSidedECDF.html b/docs/reference/plotTwoSidedECDF.html index b12e1f80..7612ac3b 100644 --- a/docs/reference/plotTwoSidedECDF.html +++ b/docs/reference/plotTwoSidedECDF.html @@ -142,6 +142,16 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotTwoSidedECDF(lm_au)
    +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotTwoSidedECDF(lm_au, rf_au, y.reversed = TRUE)
    +
    diff --git a/docs/reference/score.html b/docs/reference/score.html index 2ed33a12..9a4196d4 100644 --- a/docs/reference/score.html +++ b/docs/reference/score.html @@ -114,7 +114,7 @@

    Ar type

    The type of score to be calculated. Possible values: 'Cook', 'DW', 'GQ', 'HalfNormal', 'MAE', 'MSE', 'REC', 'RMSE', 'ROC', 'RROC', 'Runs' -(for detailed description see functions in seealso section).

    +(for detailed description see functions in see also section).

    ... @@ -131,6 +131,22 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +score(lm_au, type = 'Runs')
    #> $name +#> [1] "Runs" +#> +#> $score +#> [1] -0.5933797 +#> +#> $pValue +#> [1] 0.5529271 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    +
    diff --git a/docs/reference/scoreCooksDistance.html b/docs/reference/scoreCooksDistance.html index 3d10f2e8..5352d918 100644 --- a/docs/reference/scoreCooksDistance.html +++ b/docs/reference/scoreCooksDistance.html @@ -135,6 +135,80 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreCooksDistance(lm_au)
    #> gov.administrators general.managers accountants +#> 2.428267e-03 2.832697e-01 1.626121e-03 +#> purchasing.officers chemists physicists +#> 1.082658e-03 1.377992e-02 4.882702e-03 +#> biologists architects civil.engineers +#> 6.387086e-03 6.269429e-04 2.971297e-03 +#> mining.engineers surveyors draughtsmen +#> 5.231977e-06 1.622283e-02 4.172801e-03 +#> computer.programers economists psychologists +#> 9.065386e-03 3.516136e-04 2.169004e-02 +#> social.workers lawyers librarians +#> 4.736014e-03 2.362512e-03 5.954443e-04 +#> vocational.counsellors ministers university.teachers +#> 1.987192e-02 8.388665e-02 1.484911e-02 +#> primary.school.teachers secondary.school.teachers physicians +#> 1.584048e-03 2.548978e-05 6.166504e-02 +#> veterinarians osteopaths.chiropractors nurses +#> 3.858089e-02 3.102104e-02 5.185906e-02 +#> nursing.aides physio.therapsts pharmacists +#> 4.911175e-04 5.498177e-02 1.893992e-04 +#> medical.technicians commercial.artists radio.tv.announcers +#> 3.391784e-02 5.101554e-03 1.935625e-04 +#> athletes secretaries typists +#> 3.390048e-04 6.852068e-06 1.779877e-03 +#> bookkeepers tellers.cashiers computer.operators +#> 1.487824e-03 1.014634e-03 5.068692e-04 +#> shipping.clerks file.clerks receptionsts +#> 3.824718e-03 3.959903e-02 3.093447e-03 +#> mail.carriers postal.clerks telephone.operators +#> 6.563431e-04 4.892611e-04 1.462173e-03 +#> collectors claim.adjustors travel.clerks +#> 1.760987e-02 1.937443e-03 8.541219e-03 +#> office.clerks sales.supervisors commercial.travellers +#> 6.700605e-03 3.567173e-04 8.673946e-03 +#> sales.clerks newsboys service.station.attendant +#> 1.562527e-02 1.177427e-01 4.948243e-02 +#> insurance.agents real.estate.salesmen buyers +#> 1.459960e-03 9.756845e-05 9.584963e-05 +#> firefighters policemen cooks +#> 1.019968e-04 6.862129e-05 2.592845e-05 +#> bartenders funeral.directors babysitters +#> 1.659973e-02 3.232608e-03 1.266394e-02 +#> launderers janitors elevator.operators +#> 8.930962e-03 1.188528e-02 8.503645e-03 +#> farmers farm.workers rotary.well.drillers +#> 4.899856e-02 1.148605e-02 1.561217e-03 +#> bakers slaughterers.1 slaughterers.2 +#> 8.279081e-03 4.390716e-03 9.233956e-04 +#> canners textile.weavers textile.labourers +#> 1.749033e-03 6.540165e-03 1.489118e-03 +#> tool.die.makers machinists sheet.metal.workers +#> 9.062896e-04 2.451712e-03 1.151053e-04 +#> welders auto.workers aircraft.workers +#> 5.388366e-03 9.738267e-07 2.200670e-03 +#> electronic.workers radio.tv.repairmen sewing.mach.operators +#> 4.934191e-02 4.137224e-03 1.397534e-02 +#> auto.repairmen aircraft.repairmen railway.sectionmen +#> 1.269647e-03 1.665114e-03 1.638252e-10 +#> electrical.linemen electricians construction.foremen +#> 1.183954e-04 2.873767e-03 2.019788e-02 +#> carpenters masons house.painters +#> 1.491918e-02 1.027504e-02 4.754342e-04 +#> plumbers construction.labourers pilots +#> 3.493312e-03 1.594688e-03 1.748322e-03 +#> train.engineers bus.drivers taxi.drivers +#> 9.635459e-03 1.574710e-03 5.745797e-03 +#> longshoremen typesetters bookbinders +#> 8.498365e-03 8.455003e-05 7.186315e-04
    + +
    diff --git a/docs/reference/scoreDW.html b/docs/reference/scoreDW.html index 067ea517..d8b35f06 100644 --- a/docs/reference/scoreDW.html +++ b/docs/reference/scoreDW.html @@ -99,7 +99,7 @@

    Durbin-Watson Score

    Score based on Durbin-Watson test statistic. -The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score.

    @@ -123,6 +123,20 @@

    Value

    an object of class scoreAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreDW(lm_au)
    #> $name +#> [1] "Durbin-Watson" +#> +#> $score +#> [1] 1.686923 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    + +
    diff --git a/docs/reference/scoreGQ.html b/docs/reference/scoreGQ.html index 7cedbf58..e6d62779 100644 --- a/docs/reference/scoreGQ.html +++ b/docs/reference/scoreGQ.html @@ -106,7 +106,7 @@

    Goldfeld-Quandt Score

    \(F = (MSE_1)/(MSE_2)\) where \(MSE = (RSS)/(n-p)\) where n is the number of observations and p is the number of variables .

    -

    The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +

    The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. scoreGQ function uses a two-sided F-test.

    @@ -131,6 +131,26 @@

    Value

    an object of class scoreAudit

    +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreGQ(lm_au)
    #> $name +#> [1] "Goldfeld-Quandt" +#> +#> $score +#> [1] 0.3983684 +#> +#> $parameter +#> df1 df2 +#> 45 45 +#> +#> $pValue +#> [1] 0.002540368 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    +
    diff --git a/docs/reference/scoreHalfNormal-1.png b/docs/reference/scoreHalfNormal-1.png new file mode 100644 index 00000000..fdfafc3b Binary files /dev/null and b/docs/reference/scoreHalfNormal-1.png differ diff --git a/docs/reference/scoreHalfNormal.html b/docs/reference/scoreHalfNormal.html index 46a4ff26..8e051959 100644 --- a/docs/reference/scoreHalfNormal.html +++ b/docs/reference/scoreHalfNormal.html @@ -121,12 +121,21 @@

    Ar +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotHalfNormal(lm_au)
    #> Gaussian model (lm object)
    + +
    diff --git a/docs/reference/scoreMAE.html b/docs/reference/scoreMAE.html index b5c7b7d8..ae7df408 100644 --- a/docs/reference/scoreMAE.html +++ b/docs/reference/scoreMAE.html @@ -121,6 +121,20 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreMAE(lm_au)
    #> $name +#> [1] "MAE" +#> +#> $score +#> [1] 6.166535 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    + +
    diff --git a/docs/reference/scoreMSE.html b/docs/reference/scoreMSE.html index d88ac07f..4bbe9eb8 100644 --- a/docs/reference/scoreMSE.html +++ b/docs/reference/scoreMSE.html @@ -121,6 +121,19 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreMSE(lm_au)
    #> $name +#> [1] "MSE" +#> +#> $score +#> [1] 59.15265 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    +
    diff --git a/docs/reference/scoreREC.html b/docs/reference/scoreREC.html index b6657c66..6d4367d9 100644 --- a/docs/reference/scoreREC.html +++ b/docs/reference/scoreREC.html @@ -126,6 +126,20 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreREC(lm_au)
    #> $name +#> [1] "REC" +#> +#> $score +#> [1] 6.010425 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    + +
    diff --git a/docs/reference/scoreRMSE.html b/docs/reference/scoreRMSE.html index 777d167b..1a879ac0 100644 --- a/docs/reference/scoreRMSE.html +++ b/docs/reference/scoreRMSE.html @@ -121,6 +121,20 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRMSE(lm_au)
    #> $name +#> [1] "RMSE" +#> +#> $score +#> [1] 7.691076 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    + +
    diff --git a/docs/reference/scoreROC.html b/docs/reference/scoreROC.html index b53667da..5959c84f 100644 --- a/docs/reference/scoreROC.html +++ b/docs/reference/scoreROC.html @@ -121,6 +121,24 @@

    See a +

    Examples

    +
    library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +scoreROC(glm_au)
    #> $name +#> [1] "ROC" +#> +#> $score +#> $score[[1]] +#> [1] 0.8394254 +#> +#> +#> attr(,"class") +#> [1] "scoreAudit"
    +
    diff --git a/docs/reference/scoreRROC.html b/docs/reference/scoreRROC.html index 00fe801c..a26d34c4 100644 --- a/docs/reference/scoreRROC.html +++ b/docs/reference/scoreRROC.html @@ -125,6 +125,20 @@

    See a +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRROC(lm_au)
    #> $name +#> [1] "RROC" +#> +#> $score +#> [1] 307712.1 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    + +
    diff --git a/docs/reference/scoreRuns.html b/docs/reference/scoreRuns.html index 6210426d..09a85d6f 100644 --- a/docs/reference/scoreRuns.html +++ b/docs/reference/scoreRuns.html @@ -99,7 +99,7 @@

    Runs Score

    Score based on Runs test statistic. Note that this test is not very strong. It utilizes only signs of the residuals. -The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score.

    @@ -119,12 +119,30 @@

    Ar +

    Examples

    +
    library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRuns(lm_au)
    #> $name +#> [1] "Runs" +#> +#> $score +#> [1] -0.5933797 +#> +#> $pValue +#> [1] 0.5529271 +#> +#> attr(,"class") +#> [1] "scoreAudit"
    +
    diff --git a/man/audit.Rd b/man/audit.Rd index 74955081..1564bd31 100644 --- a/man/audit.Rd +++ b/man/audit.Rd @@ -28,7 +28,7 @@ An object of class ModelAudit, which contains: \item \code{model} the audited model, \item \code{fitted.values} fitted values from model, \item \code{data} data used for fitting the model, -\item \code{y} vecor with values of predicted variable used for fittng the model, +\item \code{y} vector with values of predicted variable used for fitting the model, \item \code{predict.function} function that were used for model predictions, \item \code{residual.function} function that were used for calculating model residuals, \item \code{residuals} diff --git a/man/plot.modelAudit.Rd b/man/plot.modelAudit.Rd index af949cbb..ce719c34 100644 --- a/man/plot.modelAudit.Rd +++ b/man/plot.modelAudit.Rd @@ -9,20 +9,33 @@ \arguments{ \item{x}{object of class modelAudit} -\item{...}{other arguments dependent on the type of plot or additionam objects of class modelAudit} +\item{...}{other arguments dependent on the type of plot or additionl objects of class modelAudit} \item{type}{the type of plot. Possible values: 'ACF', 'Autocorrelation', 'CumulativeGain', 'CooksDistance', 'HalfNormal', 'Residuals', 'LIFT', -ModelPCA', 'ModelCorreltion', 'Prediction', 'REC', 'ResidualDensity', 'Residual', 'ROC', 'RROC', -ScaleLocation', 'TwoSidedECDF' (for detailed description see functions in seealso section).} +ModelPCA', 'ModelRanking', ModelCorrelation', 'Prediction', 'REC', 'ResidualDensity', 'Residual', 'ROC', 'RROC', +ScaleLocation', 'TwoSidedECDF' (for detailed description see functions in see also section).} \item{ask}{logical; if TRUE, the user is asked before each plot, see \code{\link[graphics]{par}(ask=)}.} } \description{ This function provides several diagnostic plots for regression and classification models. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plot(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plot(lm_au, rf_au, type = "ModelRanking") + + } \seealso{ \code{\link{plotACF}, \link{plotAutocorrelation}, \link{plotCumulativeGain}, \link{plotCooksDistance}, -\link{plotHalfNormal}, \link{plotResidual}, \link{plotLIFT}, \link{plotModelPCA}, \link{plotModelCorrelation}, +\link{plotHalfNormal}, \link{plotResidual}, \link{plotLIFT}, \link{plotModelPCA}, \link{plotModelRanking}, \link{plotModelCorrelation}, \link{plotPrediction}, \link{plotREC}, \link{plotResidualDensity}, \link{plotResidual}, \link{plotROC}, \link{plotRROC}, \link{plotScaleLocation}, \link{plotTwoSidedECDF}} } diff --git a/man/plotACF.Rd b/man/plotACF.Rd index 0298b94a..ad7a5523 100644 --- a/man/plotACF.Rd +++ b/man/plotACF.Rd @@ -18,3 +18,16 @@ plotACF(object, ..., variable = NULL, alpha = 0.95) \description{ Plot Autocorrelation Function of models residuals. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotACF(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotACF(lm_au, rf_au) + + +} diff --git a/man/plotAutocorrelation.Rd b/man/plotAutocorrelation.Rd index 498a1016..ef3fea50 100644 --- a/man/plotAutocorrelation.Rd +++ b/man/plotAutocorrelation.Rd @@ -16,3 +16,10 @@ plotAutocorrelation(object, variable = NULL, score = TRUE) \description{ Plot of i-th residual vs i+1-th residual. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotAutocorrelation(lm_au) + +} diff --git a/man/plotCooksDistance.Rd b/man/plotCooksDistance.Rd index 0c7f865a..64b38231 100644 --- a/man/plotCooksDistance.Rd +++ b/man/plotCooksDistance.Rd @@ -26,3 +26,10 @@ It shows how much all the values in the model change when the i-th observation i For model classes other than lm and glm the distances are computed directly from the definition. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotCooksDistance(lm_au) + +} diff --git a/man/plotCumulativeGain.Rd b/man/plotCumulativeGain.Rd index b5ed7250..1d14c542 100644 --- a/man/plotCumulativeGain.Rd +++ b/man/plotCumulativeGain.Rd @@ -15,8 +15,18 @@ plotCumulativeGain(object, ...) ggplot object } \description{ -Cumulative Gain Chartis is a plot of the rate of positive prediction against true positive rate for the different thresholds. +Cumulative Gain Chart is is a plot of the rate of positive prediction against true positive rate for the different thresholds. It is useful for measuring and comparing the accuracy of the classificators. +} +\examples{ +library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotCumulativeGain(glm_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotHalfNormal.Rd b/man/plotHalfNormal.Rd index 295bd6af..a3a1234b 100644 --- a/man/plotHalfNormal.Rd +++ b/man/plotHalfNormal.Rd @@ -12,7 +12,7 @@ plotHalfNormal(object, score = TRUE, quant.scale = FALSE, \item{score}{If TRUE score based on probability density function is displayed on the plot.} -\item{quant.scale}{if TRUE values on avis are on quantile scale.} +\item{quant.scale}{if TRUE values on axis are on quantile scale.} \item{xlab}{The text for the x axis.} diff --git a/man/plotLIFT.Rd b/man/plotLIFT.Rd index 31692f29..2d20a409 100644 --- a/man/plotLIFT.Rd +++ b/man/plotLIFT.Rd @@ -23,6 +23,16 @@ LIFT Chart shows the ratio of a model to a random guess. } \details{ Response vector provided by y argument in audit function should be an integer vector containing binary labels with values 0,1. +} +\examples{ +library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotLIFT(glm_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotModelCorrelation.Rd b/man/plotModelCorrelation.Rd index 49d8e951..e2b2ff2e 100644 --- a/man/plotModelCorrelation.Rd +++ b/man/plotModelCorrelation.Rd @@ -18,6 +18,16 @@ ggplot object } \description{ Matrix of plots +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelCorrelation(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotModelPCA.Rd b/man/plotModelPCA.Rd index cd8fe1ff..6c0b82a2 100644 --- a/man/plotModelPCA.Rd +++ b/man/plotModelPCA.Rd @@ -11,7 +11,7 @@ plotModelPCA(object, ..., scale = TRUE, invisible = "none") \item{...}{Other modelAudit objects to be plotted together.} -\item{scale}{A logical value indicating whether the models residuals should be scaled bfore the analysis.} +\item{scale}{A logical value indicating whether the models residuals should be scaled before the analysis.} \item{invisible}{A text specifying the elements to be hidden on the plot. Default value is "none". Allowed values are "model", "observ".} } @@ -21,6 +21,16 @@ ggplot object \description{ Principal Component Analysis of models residuals. PCA can be used to assess the similarity of the models. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelPCA(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotModelRanking.Rd b/man/plotModelRanking.Rd index 6ac68b89..b6d83509 100644 --- a/man/plotModelRanking.Rd +++ b/man/plotModelRanking.Rd @@ -21,6 +21,16 @@ ggplot object } \description{ Radar plot with model scores. Scores are scaled to [0,1], each score is inversed and divided by maximum score value. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotModelRanking(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotPrediction.Rd b/man/plotPrediction.Rd index 71f17141..3b6b4929 100644 --- a/man/plotPrediction.Rd +++ b/man/plotPrediction.Rd @@ -15,6 +15,18 @@ plotPrediction(object, ..., variable = "Observed response") } \description{ Plot of predicted response vs observed or variable Values. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotPrediction(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotPrediction(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotREC.Rd b/man/plotREC.Rd index 9cd52fa6..abd6f568 100644 --- a/man/plotREC.Rd +++ b/man/plotREC.Rd @@ -24,17 +24,15 @@ REC curve estimates the Cumulative Distribution Function (CDF) of the error Area Over the REC Curve (REC) is a biased estimate of the expected error } \examples{ -library(auditor) -library(randomForest) library(car) -model_lm <- lm(prestige ~ education + women + income, data = Prestige) -audit_lm <- audit(model_lm) - -plotREC(audit_lm) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotREC(lm_au) -model_rf <- randomForest(prestige ~ education + women + income, data = Prestige) -audit_rf <- audit(model_rf) -plotREC(audit_lm, audit_rf) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotREC(lm_au, rf_au) } diff --git a/man/plotROC.Rd b/man/plotROC.Rd index 94e3611a..f9e4123d 100644 --- a/man/plotROC.Rd +++ b/man/plotROC.Rd @@ -19,17 +19,13 @@ Receiver Operating Characterstic Curve is a plot of the true positive rate (TPR) It is useful for measuring and comparing the accuracy of the classificators. } \examples{ -library(auditor) library(mlbench) data("PimaIndiansDiabetes") - -model.glm <- glm(diabetes~., family=binomial, data=PimaIndiansDiabetes) -au.glm <- audit(model.glm, label="class glm") -plotROC(au.glm) - -model.glm.press <- glm(diabetes~pressure, family=binomial, data=PimaIndiansDiabetes) -au.glm.press <- audit(model.glm.press) -plotROC(au.glm, au.glm.press) +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +plotROC(glm_au) } \seealso{ diff --git a/man/plotRROC.Rd b/man/plotRROC.Rd index 96b4870b..6413d19d 100644 --- a/man/plotRROC.Rd +++ b/man/plotRROC.Rd @@ -20,11 +20,11 @@ The RROC is a plot where on the x-axis we depict total over-estimation and on th under-estimation. } \details{ -For RROC curves we use a shift, which is an equvalent to the threshold for ROC curves. +For RROC curves we use a shift, which is an equivalent to the threshold for ROC curves. For each observation we calculate new prediction: \eqn{\hat{y}'=\hat{y}+s} where s is the shift. Therefore, there are different error values for each shift: \eqn{e_i = \hat{y_i}' - y_i} -Over-estimation is caluclates as: \eqn{OVER= \sum(e_i|e_i>0)}. +Over-estimation is calculated as: \eqn{OVER= \sum(e_i|e_i>0)}. Under-estimation is calculated as: \eqn{UNDER = \sum(e_i|e_i<0)}. @@ -33,17 +33,15 @@ Under-estimation is calculated as: \eqn{UNDER = \sum(e_i|e_i<0)}. The Area Over the RROC Curve (AOC) equals to the variance of the errors multiplied by \eqn{frac{n^2}{2}}. } \examples{ -library(auditor) -library(randomForest) library(car) -model_lm <- lm(prestige ~ education + women + income, data = Prestige) -audit_lm <- audit(model_lm) - -plotRROC(audit_lm) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotRROC(lm_au) -model_rf <- randomForest(prestige ~ education + women + income, data = Prestige) -audit_rf <- audit(model_rf) -plotRROC(audit_lm, audit_rf) +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotRROC(lm_au, rf_au) } \references{ diff --git a/man/plotResidual.Rd b/man/plotResidual.Rd index 82e00fac..7fcc9e80 100644 --- a/man/plotResidual.Rd +++ b/man/plotResidual.Rd @@ -15,6 +15,18 @@ plotResidual(object, ..., variable = NULL) } \description{ A plot of residuals against fitted values, observed values or any variable. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotResidual(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotResidual(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotResidualDensity.Rd b/man/plotResidualDensity.Rd index 9ccb0f5d..43d6fdf0 100644 --- a/man/plotResidualDensity.Rd +++ b/man/plotResidualDensity.Rd @@ -18,6 +18,18 @@ ggplot object } \description{ Density of model residuals. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotResidualDensity(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotResidualDensity(lm_au, rf_au) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/plotScaleLocation.Rd b/man/plotScaleLocation.Rd index 84cf4de9..9f0f0545 100644 --- a/man/plotScaleLocation.Rd +++ b/man/plotScaleLocation.Rd @@ -17,3 +17,11 @@ plotScaleLocation(object, variable = NULL, score = FALSE) Variable values vs square root of the absolute value of the residuals. A vertical line corresponds to median. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotScaleLocation(lm_au) + + +} diff --git a/man/plotTwoSidedECDF.Rd b/man/plotTwoSidedECDF.Rd index 37fadbc3..ad174005 100644 --- a/man/plotTwoSidedECDF.Rd +++ b/man/plotTwoSidedECDF.Rd @@ -25,6 +25,18 @@ ggplot object } \description{ Cumulative Distribution Function for positive and negative residuals. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotTwoSidedECDF(lm_au) + +library(randomForest) +rf_model <- randomForest(prestige~education + women + income, data = Prestige) +rf_au <- audit(rf_model, data = Prestige, y = Prestige$prestige) +plotTwoSidedECDF(lm_au, rf_au, y.reversed = TRUE) + } \seealso{ \code{\link{plot.modelAudit}} diff --git a/man/score.Rd b/man/score.Rd index def46f0c..03691d0f 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -10,7 +10,7 @@ score(object, type = "MSE", ...) \item{object}{Object An object of class modelAudit.} \item{type}{The type of score to be calculated. Possible values: 'Cook', 'DW', 'GQ', 'HalfNormal', 'MAE', 'MSE', 'REC', 'RMSE', 'ROC', 'RROC', 'Runs' -(for detailed description see functions in seealso section).} +(for detailed description see functions in see also section).} \item{...}{Other arguments dependent on the type of score.} } @@ -20,6 +20,13 @@ an object of class scoreAudit, except Cooks distance, where numeric vector is re \description{ This function provides several scores for model validation and performance assessment. Scores can be also used to compare models. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +score(lm_au, type = 'Runs') + } \seealso{ \code{\link{scoreCooksDistance}, \link{scoreDW}, \link{scoreGQ}, \link{scoreHalfNormal}, \link{scoreMAE}, \link{scoreMSE}, \link{scoreREC}, \link{scoreROC}, \link{scoreRROC}, \link{scoreRuns}} diff --git a/man/scoreCooksDistance.Rd b/man/scoreCooksDistance.Rd index 8b77dad6..62672c57 100644 --- a/man/scoreCooksDistance.Rd +++ b/man/scoreCooksDistance.Rd @@ -27,6 +27,14 @@ It shows how much all the values in the model change when the i-th observation i Models of classes other than lm and glm the distances are computed directly from the definition, so this may take a while. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreCooksDistance(lm_au) + + } \seealso{ \code{\link{score}} diff --git a/man/scoreDW.Rd b/man/scoreDW.Rd index ae16be0f..d1c5cd39 100644 --- a/man/scoreDW.Rd +++ b/man/scoreDW.Rd @@ -16,6 +16,14 @@ an object of class scoreAudit } \description{ Score based on Durbin-Watson test statistic. -The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreDW(lm_au) + + +} diff --git a/man/scoreGQ.Rd b/man/scoreGQ.Rd index 6f47ddab..6759e355 100644 --- a/man/scoreGQ.Rd +++ b/man/scoreGQ.Rd @@ -26,7 +26,14 @@ The test statistic is the ratio of the mean square residual errors for two group where \eqn{MSE = (RSS)/(n-p)} where n is the number of observations and p is the number of variables . -The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. \code{scoreGQ} function uses a two-sided F-test. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreGQ(lm_au) + +} diff --git a/man/scoreHalfNormal.Rd b/man/scoreHalfNormal.Rd index 0697560d..737f8420 100644 --- a/man/scoreHalfNormal.Rd +++ b/man/scoreHalfNormal.Rd @@ -19,3 +19,11 @@ with the distinction that each element of sum is also scaled to take values from \eqn{res_i} is a residual for i-th observation, \eqn{simres_{i,j}} is the residual of j-th simulation for i-th observation, and \eqn{n} is the number of simulations for each observation. Scores are calculated on the basis of simulated data, so they may differ between function calls. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +plotHalfNormal(lm_au) + + +} diff --git a/man/scoreMAE.Rd b/man/scoreMAE.Rd index 89c994b0..18d5b7dc 100644 --- a/man/scoreMAE.Rd +++ b/man/scoreMAE.Rd @@ -14,6 +14,14 @@ an object of class scoreAudit } \description{ Mean Absolute Error. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreMAE(lm_au) + + } \seealso{ \code{\link{score}} diff --git a/man/scoreMSE.Rd b/man/scoreMSE.Rd index e41e07c6..3c48bdd8 100644 --- a/man/scoreMSE.Rd +++ b/man/scoreMSE.Rd @@ -14,6 +14,13 @@ an object of class scoreAudit } \description{ Mean Square Error. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreMSE(lm_au) + } \seealso{ \code{\link{score}} diff --git a/man/scoreREC.Rd b/man/scoreREC.Rd index cf90b959..99193aa1 100644 --- a/man/scoreREC.Rd +++ b/man/scoreREC.Rd @@ -15,6 +15,14 @@ an object of class scoreAudit \description{ The area over the Regression Error Characteristic curve is a measure of the expected error for the regression model. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreREC(lm_au) + + } \references{ J. Bi, and K. P. Bennet, "Regression error characteristic curves," in Proc. 20th Int. Conf. Machine Learning, Washington DC, 2003, pp. 43-50 diff --git a/man/scoreRMSE.Rd b/man/scoreRMSE.Rd index ee0b49a6..4e3af8ee 100644 --- a/man/scoreRMSE.Rd +++ b/man/scoreRMSE.Rd @@ -14,6 +14,14 @@ an object of class scoreAudit } \description{ Root Mean Square Error. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRMSE(lm_au) + + } \seealso{ \code{\link{score}} diff --git a/man/scoreROC.Rd b/man/scoreROC.Rd index d4f4b564..9504e3dc 100644 --- a/man/scoreROC.Rd +++ b/man/scoreROC.Rd @@ -14,6 +14,16 @@ an object of class scoreAudit } \description{ Area Under Curve (AUC) for Receiver Operating Characteristic. +} +\examples{ +library(mlbench) +data("PimaIndiansDiabetes") +Pima <- PimaIndiansDiabetes +Pima$diabetes <- ifelse(Pima$diabetes == "pos", 1, 0) +glm_model <- glm(diabetes~., family=binomial, data=Pima) +glm_au <- audit(glm_model, data = Pima, y = Pima$diabetes) +scoreROC(glm_au) + } \seealso{ \code{\link{plotROC}} diff --git a/man/scoreRROC.Rd b/man/scoreRROC.Rd index 1998f79b..c8dacd16 100644 --- a/man/scoreRROC.Rd +++ b/man/scoreRROC.Rd @@ -14,6 +14,14 @@ an object of class scoreAudit } \description{ The area over the Regression Receiver Operating Characteristic. +} +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRROC(lm_au) + + } \references{ Hernández-Orallo, José. 2013. ‘ROC Curves for Regression’. Pattern Recognition 46 (12): 3395–3411. diff --git a/man/scoreRuns.Rd b/man/scoreRuns.Rd index 5ecbd457..55420915 100644 --- a/man/scoreRuns.Rd +++ b/man/scoreRuns.Rd @@ -13,6 +13,13 @@ scoreRuns(object, variable = NULL) } \description{ Score based on Runs test statistic. Note that this test is not very strong. It utilizes only signs of the residuals. -The score value is helpful in comparing models. It is worth ponting out that results of tests like p-value makes sense only +The score value is helpful in comparing models. It is worth pointing out that results of tests like p-value makes sense only when the test assumptions are satisfied. Otherwise test statistic may be considered as a score. } +\examples{ +library(car) +lm_model <- lm(prestige~education + women + income, data = Prestige) +lm_au <- audit(lm_model, data = Prestige, y = Prestige$prestige) +scoreRuns(lm_au) + +}