% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/UBStats_Main_Visible_ALL_202508.R
\name{distr.summary.x}
\alias{distr.summary.x}
\title{Summary statistics for a single variable}
\usage{
distr.summary.x(
  x,
  stats = c("summary"),
  by1,
  by2,
  breaks.by1,
  interval.by1 = FALSE,
  breaks.by2,
  interval.by2 = FALSE,
  adj.breaks = TRUE,
  digits = 2,
  f.digits = 4,
  force.digits = FALSE,
  use.scientific = FALSE,
  data,
  ...
)
}
\arguments{
\item{x}{An unquoted string identifying the variable whose
distribution has to be summarized. \code{x} can be the name of a vector
or a factor in the workspace or the name of one of the columns in the
data frame specified in the \code{data} argument.}

\item{stats}{A character vector specifying the summary statistics
to compute (more summaries can be specified).
Specific types of summaries can be requested with the following
options:
\itemize{
\item \code{"summary"}: min, q1, median, mean, q3, max, sd, var;
\item \code{"central"}: central tendency measures;
\item \code{"dispersion"}: measures of dispersion;
\item \code{"fivenumbers"}: five-number summary;
\item \code{"quartiles"}, \code{"quintiles"}, \code{"deciles"},
\code{"percentiles"}: set of quantiles.
}

It is also possible to request the following statistics:
\code{"q1"}, \code{"q2"}, \code{"q3"}, \code{"mean"}, \code{"median"},
\code{"mode"} (which returns the mode, the number of modes and the
proportion of cases with modal value respectively), \code{"min"},
\code{"max"}, \code{"sd"}, \code{"var"}, \code{"cv"} (coefficient of
variation), \code{"range"}, \code{"IQrange"} (interquartile range),
and \code{"p1"}, \code{"p2"},..., \code{"p100"} (i.e. specific
percentiles).}

\item{by1, by2}{Unquoted strings identifying optional variables
(typically taking few values/levels) used to build conditional
summaries, that can be defined same way as \code{x}.}

\item{breaks.by1, breaks.by2}{Allow classifying the variables \code{by1}
and/or \code{by2}, if \emph{numerical}, into intervals.
They can be integers indicating the number of intervals of
equal width used to classify \code{by1} and/or \code{by2},
or vectors of increasing numeric values defining the endpoints
of intervals (closed on the left and open on the right; the last
interval is closed on the right too). To cover the entire range
of values the maximum and the minimum values should be
included between the first and the last break. It is possible to
specify a set of breaks covering only a portion of the range
of \code{by1} and/or \code{by2}.}

\item{interval.by1, interval.by2}{Logical values indicating
whether \code{by1} and/or \code{by2} are variables
measured in classes (\code{TRUE}). If the intervals for
one variable are not consistent (e.g. overlapping intervals,
or intervals with upper endpoint higher than the lower one),
the variable is analysed as it is, even if
results are not necessarily consistent; default to \code{FALSE}.}

\item{adj.breaks}{Logical value indicating whether the endpoints of
intervals of the numerical variables \code{by1} or \code{by2},
when classified into intervals, should be displayed avoiding
scientific notation; default to \code{TRUE}.}

\item{digits, f.digits}{Integer values specifying the number of
decimals used to round respectively summary statistics
(default: \code{digits=4}) and proportions
percentages (default: \code{f.digits=2}). If the chosen rounding
formats some non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
\code{force.digits} is set to \code{TRUE}.}

\item{force.digits}{Logical value indicating whether the
requested summaries should be forcedly rounded to the number of decimals
specified in \code{digits} and \code{f.digits} even if non-zero
values are rounded to zero (default to \code{FALSE}).}

\item{use.scientific}{Logical value indicating whether numbers
in tables should be displayed using
scientific notation (\code{TRUE}); default to \code{FALSE}.}

\item{data}{An optional data frame containing \code{x}
and/or the variables specifying the layers, \code{by1} and \code{by2}.
If not found in \code{data}, the variables are taken from
the environment from which \code{distr.summary.x()} is called.}

\item{...}{Additional arguments to be passed to low level functions.}
}
\value{
A list whose elements are tables
(converted  to dataframes) with the requested summaries, possibly
conditioned to \code{by1} and/or \code{by2}. The values taken
by the conditioning variables are arranged in standard
order (logical, alphabetical or numerical order for vectors,
order of levels for factors, ordered intervals for classified
variables or for variables measured in classes).
}
\description{
\code{distr.summary.x()} computes summary statistics of a vector or a factor.
}
\examples{
data(MktDATA, package = "UBStats")

# Marginal summaries
# - Numerical variable: Default summaries
distr.summary.x(x = AOV, data = MktDATA)
# - Numerical variable: More summaries
distr.summary.x(x = AOV, 
                stats = c("central","dispersion","fivenum"),
                data = MktDATA)
distr.summary.x(x = AOV, stats = c("mode","mean","sd","cv","fivenum"),
                data = MktDATA)
# - Character or factor (only proper statistics calculated)
distr.summary.x(x = LikeMost, stats = c("mode","mean","sd","cv","fivenum"),
                data = MktDATA)
distr.summary.x(x = Education, stats = c("mode","mean","sd","cv","fivenum"),
                data = MktDATA)

# Measures conditioned to a single variable
# - Numerical variable by a character vector
distr.summary.x(x = TotVal, 
                stats = c("p5","p10","p25","p50","p75","p90","p95"),
                by1 = Gender, digits = 1, data = MktDATA)
# - Numerical variable by a numerical variable
#   classified into intervals
distr.summary.x(x = TotVal, 
                stats = c("central","dispersion"),
                by1 = AOV, breaks.by1 = 5,
                digits = 1, data = MktDATA)
# - Numerical variable by a variable measured in classes
distr.summary.x(x = TotVal, 
                stats = c("central","dispersion"),
                by1 = Income.S, 
                interval.by1 = TRUE,
                digits = 1, data = MktDATA)

# Measures conditioned to two variables
distr.summary.x(x = TotVal, stats = "fivenumbers", 
                by1 = Gender, by2 = Kids, data = MktDATA)
distr.summary.x(x = TotVal, stats = "fivenumbers", 
                by1 = Income.S, by2 = Gender,
                interval.by1 = TRUE, data = MktDATA)
distr.summary.x(x = TotVal, stats = "fivenumbers",
                by1 = Gender, by2 = AOV,
                breaks.by2 = 5, data = MktDATA)

# Arguments adj.breaks and use.scientific
#  Variables with a very wide range
LargeX<-MktDATA$TotVal*1000000
LargeBY<-MktDATA$AOV*5000000 
#  - Default: no scientific notation
distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, 
                data = MktDATA)
#  - Scientific notation for summaries 
distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, 
                use.scientific = TRUE, data = MktDATA)
#  - Scientific notation for intervals endpoints
distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, 
                adj.breaks = FALSE, data = MktDATA)
#  - Scientific notation for intervals endpoints and summaries
distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, 
                adj.breaks = FALSE, use.scientific = TRUE,
                data = MktDATA)

# Output the list with the requested summaries
Out_TotVal<-distr.summary.x(x = TotVal, 
                            by1 = Income.S, by2 = Gender,
                            interval.by1 = TRUE,
                            stats = c("central","fivenum","dispersion"),
                            data = MktDATA)

}
\seealso{
\code{\link{summaries.plot.x}()} to graphically display
conditioned tendency summaries of a univariate distribution.

\code{\link{distr.table.x}()} for tabulating a univariate
distribution.

\code{\link{distr.plot.x}()} for plotting a univariate
distribution.
}
\author{
Raffaella Piccarreta \email{raffaella.piccarreta@unibocconi.it}
}
