% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/LearnerClassifXgboost.R
\name{mlr_learners_classif.xgboost}
\alias{mlr_learners_classif.xgboost}
\alias{LearnerClassifXgboost}
\title{Extreme Gradient Boosting Classification Learner}
\description{
eXtreme Gradient Boosting classification.
Calls \code{\link[xgboost:xgb.train]{xgboost::xgb.train()}} from package \CRANpkg{xgboost}.

If not specified otherwise, the evaluation metric is set to the default \code{"logloss"}
for binary classification problems and set to \code{"mlogloss"} for multiclass problems.
This was necessary to silence a deprecation warning.

Note that using the \code{watchlist} parameter directly will lead to problems when wrapping this \code{\link{Learner}} in a
\code{mlr3pipelines} \code{GraphLearner} as the preprocessing steps will not be applied to the data in the watchlist.
}
\note{
To compute on GPUs, you first need to compile \CRANpkg{xgboost} yourself and link
against CUDA.
See \url{https://xgboost.readthedocs.io/en/stable/build.html#building-with-gpu-support}.
}
\section{Initial parameter values}{

\itemize{
\item \code{nrounds}:
\itemize{
\item Actual default: no default.
\item Adjusted default: 1.
\item Reason for change: Without a default construction of the learner
would error. Just setting a nonsense default to workaround this.
\code{nrounds} needs to be tuned by the user.
}
\item \code{nthread}:
\itemize{
\item Actual value: Undefined, triggering auto-detection of the number of CPUs.
\item Adjusted value: 1.
\item Reason for change: Conflicting with parallelization via \CRANpkg{future}.
}
\item \code{verbose}:
\itemize{
\item Actual default: 1.
\item Adjusted default: 0.
\item Reason for change: Reduce verbosity.
}
}
}

\section{Early stopping}{

Early stopping can be used to find the optimal number of boosting rounds.
The \code{early_stopping_set} parameter controls which set is used to monitor the performance.
Set \code{early_stopping_set = "test"} to monitor the performance of the model on the test set while training.
The test set for early stopping can be set with the \code{"test"} row role in the \link[mlr3:Task]{mlr3::Task}.
Additionally, the range must be set in which the performance must increase with \code{early_stopping_rounds} and the maximum number of boosting rounds with \code{nrounds}.
While resampling, the test set is automatically applied from the \link[mlr3:Resampling]{mlr3::Resampling}.
Not that using the test set for early stopping can potentially bias the performance scores.
See the section on early stopping in the examples.
}

\section{Dictionary}{

This \link{Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_learners} or with the associated sugar function \code{\link[=lrn]{lrn()}}:

\if{html}{\out{<div class="sourceCode">}}\preformatted{mlr_learners$get("classif.xgboost")
lrn("classif.xgboost")
}\if{html}{\out{</div>}}
}

\section{Meta Information}{

\itemize{
\item Task type: \dQuote{classif}
\item Predict Types: \dQuote{response}, \dQuote{prob}
\item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric}
\item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3learners}, \CRANpkg{xgboost}
}
}

\section{Parameters}{
\tabular{lllll}{
   Id \tab Type \tab Default \tab Levels \tab Range \cr
   alpha \tab numeric \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   approxcontrib \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   base_score \tab numeric \tab 0.5 \tab  \tab \eqn{(-\infty, \infty)}{(-Inf, Inf)} \cr
   booster \tab character \tab gbtree \tab gbtree, gblinear, dart \tab - \cr
   callbacks \tab untyped \tab list \tab  \tab - \cr
   colsample_bylevel \tab numeric \tab 1 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   colsample_bynode \tab numeric \tab 1 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   colsample_bytree \tab numeric \tab 1 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   disable_default_eval_metric \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   early_stopping_rounds \tab integer \tab NULL \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   early_stopping_set \tab character \tab none \tab none, train, test \tab - \cr
   eta \tab numeric \tab 0.3 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   eval_metric \tab untyped \tab - \tab  \tab - \cr
   feature_selector \tab character \tab cyclic \tab cyclic, shuffle, random, greedy, thrifty \tab - \cr
   feval \tab untyped \tab  \tab  \tab - \cr
   gamma \tab numeric \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   grow_policy \tab character \tab depthwise \tab depthwise, lossguide \tab - \cr
   interaction_constraints \tab untyped \tab - \tab  \tab - \cr
   iterationrange \tab untyped \tab - \tab  \tab - \cr
   lambda \tab numeric \tab 1 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   lambda_bias \tab numeric \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   max_bin \tab integer \tab 256 \tab  \tab \eqn{[2, \infty)}{[2, Inf)} \cr
   max_delta_step \tab numeric \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   max_depth \tab integer \tab 6 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   max_leaves \tab integer \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   maximize \tab logical \tab NULL \tab TRUE, FALSE \tab - \cr
   min_child_weight \tab numeric \tab 1 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   missing \tab numeric \tab NA \tab  \tab \eqn{(-\infty, \infty)}{(-Inf, Inf)} \cr
   monotone_constraints \tab untyped \tab 0 \tab  \tab - \cr
   normalize_type \tab character \tab tree \tab tree, forest \tab - \cr
   nrounds \tab integer \tab - \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   nthread \tab integer \tab 1 \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   ntreelimit \tab integer \tab NULL \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   num_parallel_tree \tab integer \tab 1 \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   objective \tab untyped \tab binary:logistic \tab  \tab - \cr
   one_drop \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   outputmargin \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   predcontrib \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   predictor \tab character \tab cpu_predictor \tab cpu_predictor, gpu_predictor \tab - \cr
   predinteraction \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   predleaf \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   print_every_n \tab integer \tab 1 \tab  \tab \eqn{[1, \infty)}{[1, Inf)} \cr
   process_type \tab character \tab default \tab default, update \tab - \cr
   rate_drop \tab numeric \tab 0 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   refresh_leaf \tab logical \tab TRUE \tab TRUE, FALSE \tab - \cr
   reshape \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   seed_per_iteration \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   sampling_method \tab character \tab uniform \tab uniform, gradient_based \tab - \cr
   sample_type \tab character \tab uniform \tab uniform, weighted \tab - \cr
   save_name \tab untyped \tab  \tab  \tab - \cr
   save_period \tab integer \tab NULL \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   scale_pos_weight \tab numeric \tab 1 \tab  \tab \eqn{(-\infty, \infty)}{(-Inf, Inf)} \cr
   skip_drop \tab numeric \tab 0 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   strict_shape \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   subsample \tab numeric \tab 1 \tab  \tab \eqn{[0, 1]}{[0, 1]} \cr
   top_k \tab integer \tab 0 \tab  \tab \eqn{[0, \infty)}{[0, Inf)} \cr
   training \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr
   tree_method \tab character \tab auto \tab auto, exact, approx, hist, gpu_hist \tab - \cr
   tweedie_variance_power \tab numeric \tab 1.5 \tab  \tab \eqn{[1, 2]}{[1, 2]} \cr
   updater \tab untyped \tab - \tab  \tab - \cr
   verbose \tab integer \tab 1 \tab  \tab \eqn{[0, 2]}{[0, 2]} \cr
   watchlist \tab untyped \tab  \tab  \tab - \cr
   xgb_model \tab untyped \tab  \tab  \tab - \cr
}
}

\examples{
if (requireNamespace("xgboost", quietly = TRUE)) {
  learner = mlr3::lrn("classif.xgboost")
  print(learner)

  # available parameters:
learner$param_set$ids()
}

# Train learner with early stopping on spam data set
task = tsk("spam")

# Split task into training and test set
split = partition(task, ratio = 0.8)
task$set_row_roles(split$test, "test")

# Set early stopping parameter
learner = lrn("classif.xgboost",
  nrounds = 1000,
  early_stopping_rounds = 100,
  early_stopping_set = "test"
)

# Train learner with early stopping
learner$train(task)
}
\references{
Chen, Tianqi, Guestrin, Carlos (2016).
\dQuote{Xgboost: A scalable tree boosting system.}
In \emph{Proceedings of the 22nd ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, 785--794.
ACM.
\doi{10.1145/2939672.2939785}.
}
\seealso{
\itemize{
\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}:
\url{https://mlr3book.mlr-org.com/basics.html#learners}
\item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners.
\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Learner]{Learners}: \link{mlr_learners}
\item \code{as.data.table(mlr_learners)} for a table of available \link[=Learner]{Learners} in the running session (depending on the loaded packages).
\item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps.
\item Extension packages for additional task types:
\itemize{
\item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis.
\item \CRANpkg{mlr3cluster} for unsupervised clustering.
}
\item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces}
for established default tuning spaces.
}

Other Learner: 
\code{\link{mlr_learners_classif.cv_glmnet}},
\code{\link{mlr_learners_classif.glmnet}},
\code{\link{mlr_learners_classif.kknn}},
\code{\link{mlr_learners_classif.lda}},
\code{\link{mlr_learners_classif.log_reg}},
\code{\link{mlr_learners_classif.multinom}},
\code{\link{mlr_learners_classif.naive_bayes}},
\code{\link{mlr_learners_classif.nnet}},
\code{\link{mlr_learners_classif.qda}},
\code{\link{mlr_learners_classif.ranger}},
\code{\link{mlr_learners_classif.svm}},
\code{\link{mlr_learners_regr.cv_glmnet}},
\code{\link{mlr_learners_regr.glmnet}},
\code{\link{mlr_learners_regr.kknn}},
\code{\link{mlr_learners_regr.km}},
\code{\link{mlr_learners_regr.lm}},
\code{\link{mlr_learners_regr.nnet}},
\code{\link{mlr_learners_regr.ranger}},
\code{\link{mlr_learners_regr.svm}},
\code{\link{mlr_learners_regr.xgboost}}
}
\concept{Learner}
\section{Super classes}{
\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3:LearnerClassif]{mlr3::LearnerClassif}} -> \code{LearnerClassifXgboost}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-LearnerClassifXgboost-new}{\code{LearnerClassifXgboost$new()}}
\item \href{#method-LearnerClassifXgboost-importance}{\code{LearnerClassifXgboost$importance()}}
\item \href{#method-LearnerClassifXgboost-clone}{\code{LearnerClassifXgboost$clone()}}
}
}
\if{html}{\out{
<details><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="format"><a href='../../mlr3/html/Learner.html#method-Learner-format'><code>mlr3::Learner$format()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict_newdata"><a href='../../mlr3/html/Learner.html#method-Learner-predict_newdata'><code>mlr3::Learner$predict_newdata()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="print"><a href='../../mlr3/html/Learner.html#method-Learner-print'><code>mlr3::Learner$print()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="reset"><a href='../../mlr3/html/Learner.html#method-Learner-reset'><code>mlr3::Learner$reset()</code></a></span></li>
<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="train"><a href='../../mlr3/html/Learner.html#method-Learner-train'><code>mlr3::Learner$train()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LearnerClassifXgboost-new"></a>}}
\if{latex}{\out{\hypertarget{method-LearnerClassifXgboost-new}{}}}
\subsection{Method \code{new()}}{
Creates a new instance of this \link[R6:R6Class]{R6} class.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LearnerClassifXgboost$new()}\if{html}{\out{</div>}}
}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LearnerClassifXgboost-importance"></a>}}
\if{latex}{\out{\hypertarget{method-LearnerClassifXgboost-importance}{}}}
\subsection{Method \code{importance()}}{
The importance scores are calculated with \code{\link[xgboost:xgb.importance]{xgboost::xgb.importance()}}.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LearnerClassifXgboost$importance()}\if{html}{\out{</div>}}
}

\subsection{Returns}{
Named \code{numeric()}.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LearnerClassifXgboost-clone"></a>}}
\if{latex}{\out{\hypertarget{method-LearnerClassifXgboost-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LearnerClassifXgboost$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
