% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/explain.R
\name{explain}
\alias{explain}
\alias{explain.default}
\alias{explain.lm}
\alias{explain.xgb.Booster}
\title{Fast approximate Shapley values}
\usage{
explain(object, ...)

\method{explain}{default}(
  object,
  feature_names = NULL,
  X = NULL,
  nsim = 1,
  pred_wrapper = NULL,
  newdata = NULL,
  adjust = FALSE,
  ...
)

\method{explain}{lm}(
  object,
  feature_names = NULL,
  X,
  nsim = 1,
  pred_wrapper,
  newdata = NULL,
  exact = FALSE,
  ...
)

\method{explain}{xgb.Booster}(
  object,
  feature_names = NULL,
  X = NULL,
  nsim = 1,
  pred_wrapper,
  newdata = NULL,
  exact = FALSE,
  ...
)
}
\arguments{
\item{object}{A fitted model object (e.g., a \code{\link[ranger]{ranger}} or
an \code{\link[xgboost]{xgboost}} object).}

\item{...}{Additional optional arguments to be passed on to
\code{\link[plyr]{laply}}.}

\item{feature_names}{Character string giving the names of the predictor
variables (i.e., features) of interest. If \code{NULL} (default) they will be
taken from the column names of \code{X}.}

\item{X}{A matrix-like R object (e.g., a data frame or matrix) containing 
ONLY the feature columns from the training data. \strong{NOTE:} This argument
is required whenever \code{exact = FALSE}.}

\item{nsim}{The number of Monte Carlo repetitions to use for estimating each 
Shapley value (only used when \code{exact = FALSE}). Default is 1. 
\strong{NOTE:} To obtain the most accurate results, \code{nsim} should be set 
as large as feasibly possible.}

\item{pred_wrapper}{Prediction function that requires two arguments,
\code{object} and \code{newdata}. \strong{NOTE:} This argument is required 
whenever \code{exact = FALSE}. The output of this function should be 
determined according to:

\describe{
  \item{Regression}{A numeric vector of predicted outcomes.}
  \item{Binary classification}{A vector of predicted class probabilities
  for the reference class.}
  \item{Multiclass classification}{A vector of predicted class probabilities
  for the reference class.}
}}

\item{newdata}{A matrix-like R object (e.g., a data frame or matrix) 
containing ONLY the feature columns for the observation(s) of interest; that 
is, the observation(s) you want to compute explanations for. Default is 
\code{NULL} which will produce approximate Shapley values for all the rows in 
\code{X} (i.e., the training data).}

\item{adjust}{Logical indicating whether or not to adjust the sum of the 
estimated Shapley values to satisfy the \emph{additivity} (or 
\emph{local accuracy}) property; that is, to equal the difference between the 
model's prediction for that sample and the average prediction over all the 
training data (i.e., \code{X}).}

\item{exact}{Logical indicating whether to compute exact Shapley values. 
Currently only available for \code{\link[stats]{lm}} and 
\code{\link[xgboost]{xgboost}} objects. Default is \code{FALSE}. Note 
that setting \code{exact = TRUE} will return explanations for each of the 
\code{\link[stats]{terms}} in an \code{\link[stats]{lm}} object.}
}
\value{
A tibble with one column for each feature specified in 
\code{feature_names} (if \code{feature_names = NULL}, the default, there will
be one column for each feature in \code{X}) and one row for each observation
in \code{newdata} (if \code{newdata = NULL}, the default, there will be one
row for each observation in \code{X}).
}
\description{
Compute fast (approximate) Shapley values for a set of features.
}
\note{
Setting \code{exact = TRUE} with a linear model (i.e., an 
\code{\link[stats]{lm}} or \code{\link[stats]{glm}} object) assumes that the
input features are independent. Also, setting \code{adjust = TRUE} is 
experimental and we follow the same approach as in
\href{https://github.com/slundberg/shap}{shap}.
}
\examples{
#
# A projection pursuit regression (PPR) example
#

# Load the sample data; see ?datasets::mtcars for details
data(mtcars)

# Fit a projection pursuit regression model
fit <- lm(mpg ~ ., data = mtcars)

# Compute approximate Shapley values using 10 Monte Carlo simulations
set.seed(101)  # for reproducibility
shap <- explain(fit, X = subset(mtcars, select = -mpg), nsim = 10, 
                pred_wrapper = predict)
shap

# Compute exact Shapley (i.e., LinearSHAP) values
shap <- explain(fit, exact = TRUE)
shap

# Shapley-based plots
library(ggplot2)
autoplot(shap)  # Shapley-based importance plot
autoplot(shap, type = "dependence", feature = "wt", X = mtcars)
autoplot(shap, type = "contribution", row_num = 1)  # explain first row of X
}
\seealso{
You can find more examples (with larger and more realistic data 
sets) on the \strong{fastshap} GitHub repository: 
\url{https://github.com/bgreenwell/fastshap}.
}
