\name{simdata}
\alias{simdata}
\alias{simdataV}
\title{Generation of data sets with survival outcome}
\description{
  This function simulates a data set with survival outcome with given active biomarkers (prognostic and/or interacting with the treatment).
}
\usage{
simdata(n, p, q.main, q.inter, prob.tt, m0, alpha.tt, beta.main,
  beta.inter, b.corr, b.corr.by, wei.shape, recr, fu, timefactor,
  active.main, active.inter)

simdataV(traindata, Nvalid)
}
\arguments{
  \item{n}{the sample size.}
  \item{p}{the number of biomarkers.}
  \item{q.main}{the number of true prognostic biomarkers.}
  \item{q.inter}{the number of true biomarkers interacting with the
  treatement.}
  \item{prob.tt}{the treatement assignement probability.}
  \item{m0}{the baseline median survival time.}
  \item{alpha.tt}{the effect of the treatment (in log-scale).}
  \item{beta.main}{the effect of the prognostic biomarkers (in log-scale).}
  \item{beta.inter}{the effect of the biomarkers interacting with the
  treatment (in log-scale).}
  \item{b.corr}{the correlation between biomarker blocks.}
  \item{b.corr.by}{the size of the blocks of correlated biomarkers.}
  \item{wei.shape}{the shape parameter of the Weibull distribution.}
  \item{recr}{the recruitment period duration.}
  \item{fu}{the follow-up period duration.}
  \item{timefactor}{the scale multiplicative factor for times (i.e. 1 = times in years).}
  \item{active.main}{the list of the prognostic biomarkers (not
  mandatory).}
  \item{active.inter}{the list of the biomarkers interacting with the
  treatment (not mandatory).}
  \item{traindata}{the training set returned by \code{simdata},
  used to generate the new validation data set with the same characteristics.}
  \item{Nvalid}{the sample size of the new validation data set.}
}
\details{
  The \code{simdata} function generates \code{p} Gaussian unit-variance (\eqn{\sigma} = 1) biomarkers including autoregressive correlation (\eqn{\sigma}_ij = \code{b.corr}^|i-j|) within \code{b.corr.by}-biomarker blocks. The number of active biomarkers and their effect sizes (in log-scale) can be specified using \code{q.main} and \code{beta.main} for the true prognostic biomarkers and using \code{q.inter} and \code{beta.inter} for the true treatment-effect modifiers. A total of \code{n} patients is generated and randomly assigned to the experimental (coded as +0.5, with probability \code{prob.tt}) and control treatment (coded as -0.5). The treatment effect is specified using \code{alpha.tt} (in log-scale). Survival times are generated using a Weibull with shape \code{wei.shape} (i.e. 1 = exponential distribution) and patient-specific scale depending on the baseline median survival time \code{m0} and the biomarkers values of the patient.
  Censor status is generated by considering independant censoring from a U(\code{fu}, \code{fu} + \code{recr}) distribution, reflecting a trial with \code{recr} years of accrual and \code{fu} years of follow-up.
  Another data set with the same characteristics as the one generated by \code{simdata} can be obtained by using the \code{simdataV} function.
}
\value{
  A simulated \code{data.frame} object.
}
\author{Nils Ternes, Federico Rotolo, and Stefan Michiels\cr
Maintainer: Nils Ternes \email{nils.ternes@yahoo.com}
}
\examples{
  set.seed(123456)
  sdata <- simdata(
    n = 500, p = 100, q.main = 5, q.inter = 5,
    prob.tt = 0.5, alpha.tt = -0.5,
    beta.main = c(-0.5, -0.2), beta.inter = c(-0.7, -0.4),
    b.corr = 0.6, b.corr.by = 10,
    m0 = 5, wei.shape = 1, recr = 4, fu = 2,
    timefactor = 1,
    active.inter = c("bm003", "bm021", "bm044", "bm049", "bm097"))

  newdata <- simdataV(
    traindata = sdata,
    Nvalid = 500)
}

\keyword{data}
\keyword{simulation}
