% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_missing.R
\name{add_missing}
\alias{add_missing}
\title{Add missing values to a vector given a MCAR, MAR, or MNAR scheme}
\usage{
add_missing(y, fun = function(y, rate = 0.1, ...) rep(rate, length(y)), ...)
}
\arguments{
\item{y}{an input vector that should contain missing data in the form of \code{NA}'s}

\item{fun}{a user defined function indicating the missing data mechanism for each element in \code{y}.
Function must return a vector of probability values with the length equal to the length of \code{y}.
Each value in the returned vector indicates the probability that
the respective element in y will be replaced with \code{NA}.
Function must contain the argument \code{y}, representing the
input vector, however any number of additional arguments can be included}

\item{...}{additional arguments to be passed to \code{FUN}}
}
\value{
the input vector \code{y} with the sampled \code{NA} values
  (according to the \code{FUN} scheme)
}
\description{
Given an input vector, replace elements of this vector with missing values according to some scheme.
Default method replaces input values with a MCAR scheme (where on average 10\% of the values will be
replaced with \code{NA}s). MAR and MNAR are supported by replacing the default \code{FUN} argument.
}
\details{
Given an input vector y, and other relevant variables
inside (X) and outside (Z) the data-set, the three types of missingness are:

\describe{
  \item{MCAR}{Missing completely at random (MCAR). This is realized by randomly sampling the values of the
    input vector (y) irrespective of the possible values in X and Z.
    Therefore missing values are randomly sampled and do not depend on any data characteristics and
    are truly random}
  \item{MAR}{Missing at random (MAR). This is realized when values in the dataset (X)
    predict the missing data  mechanism in y; conceptually this is equivalent to
    \eqn{P(y = NA | X)}. This requires the user to define a custom missing data function}
  \item{MNAR}{Missing not at random (MNAR). This is similar to MAR except
    that the missing mechanism comes
    from the value of y itself or from variables outside the working dataset;
    conceptually this is equivalent to \eqn{P(y = NA | X, Z, y)}. This requires
    the user to define a custom missing data function}
}
}
\examples{

set.seed(1)
y <- rnorm(1000)

## 10\% missing rate with default FUN
head(ymiss <- add_missing(y), 10)

## 50\% missing with default FUN
head(ymiss <- add_missing(y, rate = .5), 10)

## missing values only when female and low
X <- data.frame(group = sample(c('male', 'female'), 1000, replace=TRUE),
                level = sample(c('high', 'low'), 1000, replace=TRUE))
head(X)

fun <- function(y, X, ...){
    p <- rep(0, length(y))
    p[X$group == 'female' & X$level == 'low'] <- .2
    p
}

ymiss <- add_missing(y, X, fun=fun)
tail(cbind(ymiss, X), 10)

## missingness as a function of elements in X (i.e., a type of MAR)
fun <- function(y, X){
   # missingness with a logistic regression approach
   df <- data.frame(y, X)
   mm <- model.matrix(y ~ group + level, df)
   cfs <- c(-5, 2, 3) #intercept, group, and level coefs
   z <- cfs \%*\% t(mm)
   plogis(z)
}

ymiss <- add_missing(y, X, fun=fun)
tail(cbind(ymiss, X), 10)

## missing values when y elements are large (i.e., a type of MNAR)
fun <- function(y) ifelse(abs(y) > 1, .4, 0)
ymiss <- add_missing(y, fun=fun)
tail(cbind(y, ymiss), 10)

}
\references{
Sigal, M. J., & Chalmers, R. P. (2016). Play it again: Teaching statistics with Monte
Carlo simulation. \code{Journal of Statistics Education, 24}(3), 136-156.
\url{http://www.tandfonline.com/doi/full/10.1080/10691898.2016.1246953}
}
