% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prevalence.R
\name{prevalence}
\alias{prevalence}
\title{Estimate point prevalence at an index date.}
\usage{
prevalence(index, num_years_to_estimate, data, inc_formula = NULL,
  inc_model = NULL, surv_formula = NULL, surv_model = NULL,
  registry_start_date = NULL, death_column = NULL,
  incident_column = NULL, age_column = "age", age_dead = 100,
  status_column = "status", N_boot = 1000, population_size = NULL,
  proportion = 1e+05, level = 0.95, dist = c("exponential",
  "weibull", "lognormal"), precision = 2)
}
\arguments{
\item{index}{The date at which to estimate point prevalence as a string in the format
YYYY-MM-DD.}

\item{num_years_to_estimate}{Number of years of data to consider when
estimating point prevalence; multiple values can be specified in a vector.
If any values are greater than the number of years of registry data
available before \code{index_date}, incident cases
for the difference will be simulated.}

\item{data}{A data frame with the corresponding column names provided in
\code{form}.}

\item{inc_formula}{A formula specifying the columns used in the incidence process.
    The LHS should be the name of the column holding the incident dates,
    with the RHS specifying any variables that should be stratified by, or 1 if no
    stratification. For example, with the supplied \code{prevsim} data set, it could
    be used as follows:

    \code{entrydate ~ 1} for a non-stratified process.
    \code{entrydate ~ sex} for a process that will stratify incidence by sex.}

\item{inc_model}{An object that has a \code{draw_incident_population}
method. See the vignette for further guidance.}

\item{surv_formula}{A formula used to specify a survival model, where the
LHS a Surv object, as used by \code{flexsurvreg}.}

\item{surv_model}{An object that has a \code{predict_survival_probability}
method. See the vignette for further guidance.}

\item{registry_start_date}{The starting date of the registry. If not supplied
then defaults to the earliest incidence date in the supplied data set.}

\item{death_column}{A string providing the name of the column which holds the death
date information. If not provided then prevalence cannot be counted and estimates
will be solely derived from simulation.}

\item{incident_column}{A string providing the name of the column which holds the diagnosis
date. If not provided either in this argument or in \code{inc_formula},
then prevalence cannot be counted and estimates will be solely derived from simulation.}

\item{age_column}{A string providing the name of the column that holds patient age. If provided
then patients alive at \code{age_dead} are set to die. This helps combat 'immortal' patients.}

\item{age_dead}{The age at which patients are set to be dead if they are still alive, to prevent
'immortal' patients. Used in conjunction with \code{age_column}.}

\item{status_column}{A string providing the name of the column that holds patient event status at
the event time. If not provided in \code{surv_formula} or in this argument then prevalence
cannot be counted.}

\item{N_boot}{Number of bootstrapped calculations to perform.}

\item{population_size}{Integer corresponding to the size of the population at
risk.}

\item{proportion}{The population ratio to estimate prevalence for.}

\item{level}{Double representing the desired confidence interval width.}

\item{dist}{The distribution used by the default parametric survival model.}

\item{precision}{Integer representing the number of decimal places required.}
}
\value{
A \code{prevalence} object containing the following attributes:
  \item{estimates}{Prevalence estimates at the specified years as both absolute and rates.}
  \item{simulated}{A \code{data.table} containing simulated incident cases from each bootstrap iteration
    Each row corresponds to a simulated incident case with their simulated attributes and survival status.
    Binary flags are provided beginning \code{prev_}, which indicate whether that person contributed
    to the prevalence for the specified time-period. The \code{prev_registry} flag indicates whether that
    person was incident during the registry time-span and alive at the index. These cases are used to
    assess the model fit, as the numbers can be simply compared to the known registry prevalence.}
  \item{counted}{The number of incident cases present in the registry data set.}
  \item{full_surv_model}{The survival model built on the complete registry data set.}
  \item{full_inc_model}{The incidence model built on the complete registry data set.}
  \item{surv_models}{A list of the survival models fitted to each bootstrap iteration.}
  \item{inc_models}{A list of the incidence models fitted to each bootstrap iteration.}
  \item{index_date}{The index date.}
  \item{est_years}{The years at which prevalence is estimated at.}
  \item{counted_incidence_rate}{The overall incidence rate in the registry data set.}
  \item{registry_start}{The date the registry was identified at starting at.}
  \item{proportion}{The denominator to use for estimating prevalence rates.}
  \item{status_col}{The column in the registry data containing the survival status.}
  \item{N_boot}{The number of bootstrap iterations that were run.}
  \item{means}{Covariate means, used when plotting Kaplan-Meier estimators using \code{survfit}.}
  \item{max_event_time}{The maximum time-to-event in the registry data. Again, used in
    \code{survfit} to scale the time-axis.}
  \item{pval}{The p-value resulting from a hypothesis test on the difference between the
  simulated and counted prevalence on the time-span covered by the registry. Tests the
  prevalence fit; if a significant result is found then further diagnostics are required.}
}
\description{
Point prevalence at a specific index date is estimated using contributions to
prevalence from both available registry data, and from Monte Carlo
simulations of the incidence and survival process, as outlined by Crouch et
al (2004) (see References).
}
\details{
The most important parameter is \code{num_years_to_estimate}, which governs
the number of previous years of data to use when estimating the prevalence at
the index date. If this parameter is greater than the number of years of
known incident cases available in the supplied registry data (specified with
argument \code{num_registry_years}), then the remaining
\code{num_years_to_estimate - num_registry_years} years of incident data will
be simulated using Monte Carlo simulation.

The larger \code{num_years_to_estimate}, the more accurate the prevalence
estimate will be, provided an adequate survival model can be fitted to the
registry data. It is therefore important to provide as much clean registry
data as possible.

Prevalence arises from two stochastic processes: incidence and survival.
This is reflected in the function arguments by multiple options for
each of these processes.

The incidence process is specified by an object
that has an associated \code{draw_incident_population} method, which produces the new
incident population. The default implementation is a homogeneous Poisson process,
whereby interarrival times are distributed according to an exponential distribution.
The \code{inc_formula} argument specifies the nature of this process, see the
description for more details. See the vignette for guidance on providing a custom incidence
object.

The survival process is characterised by a method \code{predict_survival_probability},
that estimates the probability of a given individual being alive at the index date.
The default object is a parametric distribution with the functional form being specified
in \code{surv_formula} and distribution given in \code{dist}. See the vignette for guidance
on providing a custom survival model.
}
\examples{
data(prevsim)

\dontrun{
data(prevsim)

prevalence(index='2013-01-30',
           num_years_to_estimate=c(3, 5, 10, 20),
           data=prevsim,
           inc_formula = entrydate ~ sex,
           surv_formula = Surv(time, status) ~ age + sex,
           dist='weibull',
           population_size = 1e6,
           death_column = 'eventdate')
}

}
\references{
Crouch, Simon, et al. "Determining disease prevalence from
  incidence and survival using simulation techniques." Cancer epidemiology
  38.2 (2014): 193-199.
}
\seealso{
Other prevalence functions: \code{\link{test_prevalence_fit}}
}
\concept{prevalence functions}
