% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/discretize.R
\name{discretize}
\alias{discretize}
\alias{mdlControl}
\alias{equalsizeControl}
\alias{customBreaksControl}
\title{Discretization}
\usage{
discretize(x, y, control = list(mdlControl(), equalsizeControl()),
  all = TRUE, call = NULL)

mdlControl()

equalsizeControl(k = 10)

customBreaksControl(breaks)
}
\arguments{
\item{x}{Explanatory continuous variables to be discretized or a \link{formula}.}

\item{y}{Dependent variable for supervised discretization or a \link{data.frame} when \code{x} ia a \link{formula}.}

\item{control}{\code{discretizationControl} object containing the parameters for
discretization algorithm. Possible inputs are \code{mdlControl} or \code{equalsizeControl}, so far. If passed as a list, the first element is used.}

\item{all}{Logical indicating if a returned \link{data.frame} should contain other features that were not discretized.
(Example: should \code{Sepal.Width} be returned, when you pass \code{iris} and discretize \code{Sepal.Length, Petal.Length, Petal.Width}.)}

\item{call}{Keep as \code{NULL}. Inner method parameter for consistency.}

\item{k}{Number of partitions.}

\item{breaks}{custom breaks used for partitioning.}
}
\description{
Discretize a range of numeric attributes in the dataset into nominal
attributes. \code{Minimum Description Length} (MDL) method is set as the default
control. There is also available \code{equalsizeControl} method.
}
\examples{

# vectors
discretize(x = iris[[1]], y = iris[[5]])

# list and vector
head(discretize(x = list(iris[[1]], iris$Sepal.Width), y = iris$Species))

# formula input
head(discretize(x = Species ~ ., y = iris))
head(discretize(Species ~ ., iris))

# use different methods for specific columns
ir1 <- discretize(Species ~ Sepal.Length, iris)
ir2 <- discretize(Species ~ Sepal.Width, ir1, control = equalsizeControl(3))
ir3 <- discretize(Species ~ Petal.Length, ir2, control = equalsizeControl(5))
head(ir3)

# custom breaks
ir <- discretize(Species ~ Sepal.Length, iris,
  control = customBreaksControl(breaks = c(0, 2, 5, 7.5, 10)))
head(ir)

\dontrun{
# Same results
library(RWeka)
Rweka_disc_out <- RWeka::Discretize(Species ~ Sepal.Length, iris)[, 1]
FSelectorRcpp_disc_out <- FSelectorRcpp::discretize(Species ~ Sepal.Length,
                                                    iris)[, 1]
table(Rweka_disc_out, FSelectorRcpp_disc_out)
# But faster method
library(microbenchmark)
microbenchmark(FSelectorRcpp::discretize(Species ~ Sepal.Length, iris),
               RWeka::Discretize(Species ~ Sepal.Length, iris))

}

}
\references{
U. M. Fayyad and K. B. Irani. Multi-Interval Discretization of
  Continuous-Valued Attributes for Classification Learning. In 13th
  International Joint Conference on Uncertainly in Artificial
  Intelligence(IJCAI93), pages 1022-1029, 1993.
}
\author{
Zygmunt Zawadzki \email{zygmunt@zstat.pl}
}
