% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/computeSummaryStatisticsTable.R
\name{computeSummaryStatisticsTable}
\alias{computeSummaryStatisticsTable}
\title{Compute summary statistics for a specific dataset and variables of interest}
\usage{
computeSummaryStatisticsTable(
  data,
  var = NULL,
  varFlag = NULL,
  varInclude0 = FALSE,
  varLab = NULL,
  varLabInclude = length(var) > 1,
  varGeneralLab = "Variable",
  varSubgroupLab = "Variable group",
  varIgnore = NULL,
  varIncludeTotal = FALSE,
  varTotalInclude = FALSE,
  varTotalInSepRow = FALSE,
  colVar = NULL,
  colVarDataLevels = NULL,
  colVarTotal = colVar,
  colVarTotalPerc = colVarTotal,
  colTotalInclude = FALSE,
  colTotalLab = "Total",
  colInclude0 = FALSE,
  rowVar = NULL,
  rowVarDataLevels = NULL,
  rowVarLab = NULL,
  rowOrder = "auto",
  rowOrderTotalFilterFct = NULL,
  rowOrderCatLast = NULL,
  rowVarTotalInclude = NULL,
  rowVarTotalInSepRow = NULL,
  rowVarTotalByVar = NULL,
  rowVarTotalPerc = NULL,
  rowInclude0 = FALSE,
  type = "auto",
  subjectVar = "USUBJID",
  dataTotal = NULL,
  dataTotalPerc = dataTotal,
  dataTotalRow = NULL,
  dataTotalCol = NULL,
  stats = NULL,
  statsVarBy = NULL,
  statsExtra = NULL,
  statsGeneralLab = "Statistic",
  statsPerc = c("statN", "statm"),
  filterFct = NULL,
  labelVars = NULL,
  byVar = NULL,
  byVarLab = NULL,
  checkVarDiffBySubj = "error"
)
}
\arguments{
\item{data}{Data.frame with dataset to consider for the summary table.}

\item{var}{Character vector with variable(s) of \code{data}, 
to compute statistics on.\cr
If NULL (by default), counts by row/column variable(s) are computed.\cr
To also return counts of the \code{rowVar} in case other \code{var}
are specified, you can include: 'all' in the \code{var}.\cr
Missing values, if present, are filtered 
(also for the report of number of subjects/records).}

\item{varFlag}{Character vector, subset of \code{var} with variable(s) 
of type 'flag' (with 'Y', 'N' or '' for empty/non specified value).
Only the counts for records flagged (with 'Y') are retained.}

\item{varInclude0}{Logical, should rows with no counts 
for the count \code{var} or \code{varFlag} variable(s)
be included in the table?
Either:
\itemize{
\item{logical of length 1, if TRUE (FALSE by default) 
rows with no count are included for all \code{var}
}
\item{a character vector containing categorical \code{var} 
for which zero counts rows should be included}
}}

\item{varLab}{Named character vector with label for each variable 
specified in \code{var}.
By default, extracted from the \code{labelVars}.
if not available, \code{var} is used.}

\item{varLabInclude}{Logical, if TRUE
the name of the summary statistic variable(s) (\code{var})
are included in the table.
This is automatically set to TRUE if more than one variable(s) 
and is specified, and FALSE if only one variable is specified.}

\item{varGeneralLab}{String with general label for variable specified in \code{var}.
In case of multiple variable in \code{var}, this will be included in the table header
(see 'rowVarLab' attribute of the output).}

\item{varSubgroupLab}{String with general label for sub-group of
categorical variable(s) for count table, 'Variable group' by default.
This will be included in the final table header (see 'rowVarLab' attribute of the output).}

\item{varIgnore}{Vector with elements to ignore in the \code{var} variable(s).
The \code{data} records with such elements in \code{var} are \strong{filtered} from the data 
at the start of the workflow.}

\item{varIncludeTotal}{This argument is deprecated, please use: 'varTotalInclude' instead.}

\item{varTotalInclude}{Should the total across all categories of \code{var} 
be included for the count table?
Only used for categorical variables (and \code{var} not 'all').
Either:
\itemize{
\item{logical of length 1, if TRUE (FALSE by default) include the total for all categorical \code{var}}
\item{a character vector containing categorical \code{var} for which the total should be included}
}}

\item{varTotalInSepRow}{Logical, should the total per variable be included in
a separated row (by default) or in the row containing the header of the variable?}

\item{colVar}{Character vector with variable(s) to be included in columns.
If multiple variables are specified, the variables should 
be sorted in hierarchical order,
and are included in multi-columns layout.\cr
Use: 'variable' to include the variables to summarize: \code{var}
 (if multiple) in different columns.}

\item{colVarDataLevels}{Data.frame with unique combinations of \code{colVar}
to be included in columns.
Each column should correspond to \code{colVar} and as factor
if the elements should be ordered in the final table.}

\item{colVarTotal}{String with column(s) considered to compute the total by,
reported in the header of the table, by default same as \code{colVar}.
Use: 'variable' to compute total by \code{var} (if multiple).}

\item{colVarTotalPerc}{String with column(s) considered to compute the total by,
used as denominator for the percentage computation, by default same as \code{colVarTotal}.
Use: 'variable' to compute total by \code{var} (if multiple).}

\item{colTotalInclude}{Logical, if TRUE (FALSE by default) include the summary 
statistics across columns in a separated column.}

\item{colTotalLab}{String, label for the total column  'Total' by default.\cr}

\item{colInclude0}{Logical, if TRUE (FALSE by default),
include columns with no records, based on all combinations 
of the \code{columnVar} (assuming nested variable(s)).
If variable(s) are not nested, possible combinations
can be specified via \code{colVarDataLevels}.}

\item{rowVar}{Character vector with variable(s)
to be included in the rows.
If multiple variables are specified, the variables should 
be sorted in hierarchical order
(e.g. body system class before adverse event term)
and are nested in the table.}

\item{rowVarDataLevels}{Data.frame with unique combinations of \code{rowVar}
to be included in columns.
Each column should correspond to \code{colVar} and as factor
if the elements should be ordered in the final table.}

\item{rowVarLab}{Named character vector with 
label for the \code{rowVar} variable(s).}

\item{rowOrder}{Specify how the rows should be ordered in the final table, either a:
\itemize{
\item{String among:}{
\itemize{
\item{'auto' (by default): }{if the variable is a factor, keep its order, otherwise order alphabetically}
\item{'alphabetical': }{order alphabetically}
\item{'total': }{order rows in decreasing order of the total number of subjects
across all columns for this specific category.}
}}
\item{Function with input the summary table and output the ordered elements of the \code{rowVar}}
}
To specify different ordering methods for different \code{rowVar}, specify a list
of such elements, named with the \code{rowVar} variable.
For the table output of \code{\link{computeSummaryStatisticsTable}} (long format),
this order is also reflected in the \strong{\code{levels}} of the row factor variable.}

\item{rowOrderTotalFilterFct}{Function used to filter the data used to order the rows
based on total counts (in case \code{rowOrder} is 'total'),
To order rows based on one specific column category,
e.g. to order based on the counts in the treatment column:
function(x) subset(x, TRTP == "treatmentX")}

\item{rowOrderCatLast}{String with category to be printed in the last 
row of each \code{rowVar} (if any, set to NULL if none).}

\item{rowVarTotalInclude}{Character vector with \code{rowVar}
for which the total should be reported.\cr
If the higher row variable is specified, the total across all rows
is reported. \cr
For the export, these variable(s) are formatted as factor with 
\strong{'Total' as the first level}.}

\item{rowVarTotalInSepRow}{Character vector with \code{rowVarTotalInclude}
(not in \code{rowVarInSepCol}) for which the total should be included in a separated row labelled 'Total'.
Otherwise (by default) the total is included in the header row of each category.}

\item{rowVarTotalByVar}{Character vector with a row variable
used to categorize the row total.\cr
Note that this is only used if row total(s) is/are requested via \code{rowVarTotalInclude},
and this variable should also be included in \code{rowVar}.
This can be specified also for a specific row variable if the vector is named.\cr
For example: \code{c(ADECOD = "AESEV")} to compute total by severity 
for row adverse event term in a typical adverse event count table 
(by System Organ Class and Adverse Event Term).}

\item{rowVarTotalPerc}{Character vector with row variables by which the total
should be computed for the denominator for the percentage computation.
By default the total is only computed only by column (NULL by default).
If the total should be based on the total number of records per variable,
\code{rowVarTotalPerc} should be set to 'variable'.}

\item{rowInclude0}{Logical, if TRUE (FALSE by default),
include rows with no records, based on all combinations 
of the \code{rowVar} (assuming nested variable(s)).}

\item{type}{String with type of table: 
\itemize{
\item{'summaryTable': }{summary table with statistics for numeric variable}
\item{'countTable': }{count table}
\item{'auto' (by default): }{'summaryTable' if the variable is numeric,
'countTable' otherwise}
}}

\item{subjectVar}{String, variable of \code{data} with subject ID,
'USUBJID' by default.}

\item{dataTotal}{Data.frame used to extract the Total number of subject
per column in column header ('N = [X]').
It should contain the variables specified by \code{colVarTotal}.
If not specified, the total number of subjects is extracted from the \code{data}.}

\item{dataTotalPerc}{Data.frame used to extract the total counts per column 
for the computation of the percentage.\cr
By default, \code{dataTotal} is used.\cr
It should contain the variables specified by \code{colVarTotalPerc}.}

\item{dataTotalRow}{Data.frame used to extract the total count across all
elements of the row
variable, list of such data.frame for each \code{rowVar} variable.\cr
If the dataset is specified by row variable, the list should be named with:
variable X if the total across elements of variable X should be included.
By default, \code{data} is used.}

\item{dataTotalCol}{Data.frame from which the total across columns is 
extracted (in case \code{colTotalInclude} is TRUE)
or list of such data.frame for each \code{rowVar} variable.\cr
If the dataset is specified by row variable, the list should be named with:
with:
\itemize{
\item{last row variable: }{for the dataset used in the total column for 
the most nested row variable}
\item{higher row variable (X+1): }{for the dataset used for the total column
and row total of X}
\item{'total': }{for the dataset used for the total column and general row total}
}
If only a subset of the variables is specified in this list, 
\code{data} is used for the remaining variable(s) (or 'total') if needed.\cr
This dataset (the one for 'total' if a list) is also used for:
\itemize{
\item{the header of the total column in case \code{dataTotal} is
not specified}
\item{the denominator of the percentages in the total column
in case \code{dataTotalPerc} is not specified}
}
By default, \code{data} is used.}

\item{stats}{(optional) Statistic(s) of interest to compute, either:
\itemize{
\item{string with the name of a default set of statistics
available in the package, 
see section 'Formatted statistics' in
\code{\link[=inTextSummaryTable-stats]{in-text table statistics}}.\cr
See the corresponding \code{type} parameter of the
\code{\link{getStatsData}} for more information
on how the statistic is internally extracted.}
\item{(expert mode) named list of language object (see \code{\link{is.language}}) 
of base summary statistics of interest, see section:
'Base statistics' in
\code{\link[=inTextSummaryTable-stats]{in-text table statistics}}.\cr
The names are reported in the header.\cr
If \code{stats} if of length 1, the name of the summary statistic is not included
in the table.\cr
The statistics can be specified separately:
\itemize{
\item{for each \code{var} (if multiple), 
by naming each element of the list:
\code{list(varName1 = list(...), varName2 = list())}
}
\item{and/or for each element in:
\code{statsVarBy}, by naming each sublist.}
}}
}}

\item{statsVarBy}{String with variable in \code{rowVar}/code{colVar}
which the statistics should be computed by.
In this case, \code{stats} (nested list or not) should be additionally nested
to specify the statistics for each element in \code{statsVarBy}.}

\item{statsExtra}{(optional) Named list with functions for additional custom
statistics to be computed.\cr
Each function:
\itemize{
\item{has as parameter, either: }{'x': the variable (\code{var}) to compute
the summary statistic on or 'data': the entire dataset}
\item{returns the corresponding summary statistic as a numeric vector}
}
For example, to additionally compute the coefficient of variation, this can be set to:
\code{list(statCVPerc = function(x) sd(x)/mean(x)*100)} (or \code{\link{cv}}).}

\item{statsGeneralLab}{String with general label for statistics, 'Statistic' by default.
Only included if no \code{statsVar} if longer than 1.}

\item{statsPerc}{String with 'base statistical variable' used to compute the 
percentage, either: 
\itemize{
\item{'statN' (by default): }{the number of subjects}
\item{'statm': }{the number of records}
}}

\item{filterFct}{(optional) Function taking as input
the summary table with computed statistics and returning a subset 
of the summary table.\cr
Note: The filtering function should also handle records with :
\itemize{
\item{total for the column header: \code{isTotal} set to TRUE,
and \code{colVar}/\code{rowVar} is NA.\cr
For example: \code{filterFct = function(data) subset(data, isTotal & myColVar == "group 1")}
}
\item{\code{rowVar}/\code{colVar} set to 'Total'/\code{colTotalLab} 
if \code{rowVarTotalInclude}/\code{colTotalInclude} is specified}
}}

\item{labelVars}{(optional) Named character vector with label for
the row, column variable(s) or variable(s) to summarize. \cr
Labels specified via dedicated parameter: e.g. 
\code{rowVarLab}, \code{colVarLab}, \code{varLab}
have priority on this parameter.}

\item{byVar}{Variable(s) of \code{data} for which separated table(s)
should be created.}

\item{byVarLab}{String with label for \code{byVar}, used to set the names
of the output list of table(s).}

\item{checkVarDiffBySubj}{String, 'error' (default), 'warning',
or 'none'.  
Should an error, a warning, or nothing be produced
if a continuous variable (\code{var}) contains
different values for the same subject (by row/column)?}
}
\value{
An object \code{\link{summaryTable}}
or list of such objects if
\code{byVar} is specified.
}
\description{
Compute summary statistics for a specific dataset and variables of interest
}
\author{
Laure Cougnaud
}
