% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R, R/S3_LogLoss.R
\name{logloss.factor}
\alias{logloss.factor}
\alias{weighted.logloss.factor}
\alias{logloss.integer}
\alias{weighted.logloss.integer}
\alias{logloss}
\alias{weighted.logloss}
\title{Log Loss}
\usage{
\method{logloss}{factor}(actual, response, normalize = TRUE, ...)

\method{weighted.logloss}{factor}(actual, response, w, normalize = TRUE, ...)

\method{logloss}{integer}(actual, response, normalize = TRUE, ...)

\method{weighted.logloss}{integer}(actual, response, w, normalize = TRUE, ...)

## Generic S3 method
logloss(
 actual,
 response,
 normalize = TRUE,
 ...
)

## Generic S3 method
weighted.logloss(
 actual,
 response,
 w,
 normalize = TRUE,
 ...
)
}
\arguments{
\item{actual}{A vector of <\link{factor}> with \link{length} \eqn{n}, and \eqn{k} levels}

\item{response}{A \eqn{n \times k} <\link{numeric}>-matrix of predicted probabilities.
The \eqn{i}-th row should sum to 1 (i.e., a valid probability distribution
over the \eqn{k} classes). The first column corresponds to the first factor
level in \code{actual}, the second column to the second factor level, and so on.}

\item{normalize}{A <\link{logical}>-value (default: \link{TRUE}). If \link{TRUE},
the mean cross-entropy across all observations is returned; otherwise, the
sum of cross-entropies is returned.}

\item{...}{Arguments passed into other methods}

\item{w}{A <\link{numeric}>-vector of \link{length} \eqn{n}. \link{NULL} by default}
}
\value{
A <\link{numeric}>-vector of \link{length} 1
}
\description{
The \code{\link[=logloss]{logloss()}} function computes the \strong{Log Loss} between observed classes (as a <\link{factor}>) and their predicted probability distributions (a <\link{numeric}> matrix). The \code{\link[=weighted.logloss]{weighted.logloss()}} function is the weighted version, applying observation-specific weights.
}
\section{Definition}{


\deqn{H(p, response) = -\sum_{i} \sum_{j} y_{ij} \log_2(response_{ij})}{
H(p, response) = -\sum_{i} \sum_{j} y_ij log(response_ij)}
where:
\itemize{
\item \eqn{y_{ij}} is the \code{actual}-values, where \eqn{y_{ij}} = 1 if the \code{i}-th sample belongs to class \code{j}, and 0 otherwise.
\item \eqn{response_{ij}} is the estimated probability for the \code{i}-th sample belonging to class \code{j}.
}
}

\section{Creating <\link{factor}>}{


Consider a classification problem with three classes: \code{A}, \code{B}, and \code{C}. The actual vector of \code{\link[=factor]{factor()}} values is defined as follows:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{## set seed
set.seed(1903)

## actual
factor(
  x = sample(x = 1:3, size = 10, replace = TRUE),
  levels = c(1, 2, 3),
  labels = c("A", "B", "C")
)
#>  [1] B A B B A C B C C A
#> Levels: A B C
}\if{html}{\out{</div>}}

Here, the values 1, 2, and 3 are mapped to \code{A}, \code{B}, and \code{C}, respectively. Now, suppose your model does not predict any \code{B}'s. The predicted vector of \code{\link[=factor]{factor()}} values would be defined as follows:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{## set seed
set.seed(1903)

## predicted
factor(
  x = sample(x = c(1, 3), size = 10, replace = TRUE),
  levels = c(1, 2, 3),
  labels = c("A", "B", "C")
)
#>  [1] C A C C C C C C A C
#> Levels: A B C
}\if{html}{\out{</div>}}

In both cases, \eqn{k = 3}, determined indirectly by the \code{levels} argument.
}

\examples{
# 1) Recode the iris data set to a binary classification problem
#    Here, the positive class ("Virginica") is coded as 1,
#    and the rest ("Others") is coded as 0.
iris$species_num <- as.numeric(iris$Species == "virginica")

# 2) Fit a logistic regression model predicting species_num from Sepal.Length & Sepal.Width
model <- glm(
  formula = species_num ~ Sepal.Length + Sepal.Width,
  data    = iris,
  family  = binomial(link = "logit")
)

# 3) Generate predicted classes: "Virginica" vs. "Others"
predicted <- factor(
  as.numeric(predict(model, type = "response") > 0.5),
  levels = c(1, 0),
  labels = c("Virginica", "Others")
)

# 3.1) Generate actual classes
actual <- factor(
  x      = iris$species_num,
  levels = c(1, 0),
  labels = c("Virginica", "Others")
)

# For Log Loss, we need predicted probabilities for each class.
# Since it's a binary model, we create a 2-column matrix:
#   1st column = P("Virginica")
#   2nd column = P("Others") = 1 - P("Virginica")
predicted_probs <- predict(model, type = "response")
response_matrix <- cbind(predicted_probs, 1 - predicted_probs)

# 4) Evaluate unweighted Log Loss
#    'logloss' takes (actual, response_matrix, normalize=TRUE/FALSE).
#    The factor 'actual' must have the positive class (Virginica) as its first level.
unweighted_LogLoss <- logloss(
  actual    = actual,           # factor
  response  = response_matrix,  # numeric matrix of probabilities
  normalize = TRUE              # normalize = TRUE
)

# 5) Evaluate weighted Log Loss
#    We introduce a weight vector, for example:
weights <- iris$Petal.Length / mean(iris$Petal.Length)
weighted_LogLoss <- weighted.logloss(
  actual    = actual,
  response  = response_matrix,
  w         = weights,
  normalize = TRUE
)

# 6) Print Results
cat(
  "Unweighted Log Loss:", unweighted_LogLoss,
  "Weighted Log Loss:", weighted_LogLoss,
  sep = "\n"
)
}
\seealso{
Other Classification: 
\code{\link{ROC.factor}()},
\code{\link{accuracy.factor}()},
\code{\link{baccuracy.factor}()},
\code{\link{ckappa.factor}()},
\code{\link{cmatrix.factor}()},
\code{\link{dor.factor}()},
\code{\link{entropy.matrix}()},
\code{\link{fbeta.factor}()},
\code{\link{fdr.factor}()},
\code{\link{fer.factor}()},
\code{\link{fmi.factor}()},
\code{\link{fpr.factor}()},
\code{\link{jaccard.factor}()},
\code{\link{mcc.factor}()},
\code{\link{nlr.factor}()},
\code{\link{npv.factor}()},
\code{\link{plr.factor}()},
\code{\link{pr.auc.matrix}()},
\code{\link{prROC.factor}()},
\code{\link{precision.factor}()},
\code{\link{recall.factor}()},
\code{\link{roc.auc.matrix}()},
\code{\link{specificity.factor}()},
\code{\link{zerooneloss.factor}()}

Other Supervised Learning: 
\code{\link{ROC.factor}()},
\code{\link{accuracy.factor}()},
\code{\link{baccuracy.factor}()},
\code{\link{ccc.numeric}()},
\code{\link{ckappa.factor}()},
\code{\link{cmatrix.factor}()},
\code{\link{dor.factor}()},
\code{\link{entropy.matrix}()},
\code{\link{fbeta.factor}()},
\code{\link{fdr.factor}()},
\code{\link{fer.factor}()},
\code{\link{fpr.factor}()},
\code{\link{huberloss.numeric}()},
\code{\link{jaccard.factor}()},
\code{\link{mae.numeric}()},
\code{\link{mape.numeric}()},
\code{\link{mcc.factor}()},
\code{\link{mpe.numeric}()},
\code{\link{mse.numeric}()},
\code{\link{nlr.factor}()},
\code{\link{npv.factor}()},
\code{\link{pinball.numeric}()},
\code{\link{plr.factor}()},
\code{\link{pr.auc.matrix}()},
\code{\link{prROC.factor}()},
\code{\link{precision.factor}()},
\code{\link{rae.numeric}()},
\code{\link{recall.factor}()},
\code{\link{rmse.numeric}()},
\code{\link{rmsle.numeric}()},
\code{\link{roc.auc.matrix}()},
\code{\link{rrmse.numeric}()},
\code{\link{rrse.numeric}()},
\code{\link{rsq.numeric}()},
\code{\link{smape.numeric}()},
\code{\link{specificity.factor}()},
\code{\link{zerooneloss.factor}()}
}
\concept{Classification}
\concept{Supervised Learning}
