\name{BSWiMS.model}
\alias{BSWiMS.model}
\title{BSWiMS model selection}
\description{
	This function returns a set of models that best predict the outcome. Based on a Bootstrap Stage Wise Model Selection algorithm.
}
\usage{
	BSWiMS.model(formula,
	            data,
	            type = c("Auto","LM","LOGIT","COX"),
	            testType = c("Auto","zIDI",
	                         "zNRI",
	                         "Binomial",
	                         "Wilcox",
	                         "tStudent",
	                         "Ftest"),
	            pvalue=0.05,
	            elimination.pValue=0.05,
	            update.pvalue=c(0.05,0.05),
	            variableList=NULL,
	            size=0,
	            loops=32,
	            elimination.bootstrap.steps = 200,
	            unitPvalues=NULL,
	            adjsize=0.0,
	            fraction=1.0,
	            maxTrainModelSize=20,
	            maxCycles=10,
	            print=FALSE,
	            plots=FALSE
	            )

}
\arguments{
	\item{formula}{
		An object of class \code{formula} with the formula to be fitted
	}
	\item{data}{
		A data frame where all variables are stored in different columns
	}
	\item{type}{
		The fit type. Auto will determine the fitting based on the formula
	}
	\item{testType}{
		For an Binary-based optimization, the type of index to be evaluated by the \code{improveProb} function (\code{Hmisc} package): \emph{z}-value of Binary or of NRI. For a NeRI-based optimization, the type of non-parametric test to be evaluated by the \code{improvedResiduals} function: Binomial test ("Binomial"), Wilcoxon rank-sum test ("Wilcox"), Student's \emph{t}-test ("tStudent"), or \emph{F}-test ("Ftest")
	}
	\item{pvalue}{
		The maximum \emph{p}-value, associated to the \code{testType}, allowed for a term in the model (it will control the false selection rate)
	}
	\item{elimination.pValue}{
		The \emph{p}-value for back elimination
	}
	\item{update.pvalue}{
		The \emph{p}-value for update forward selection
	}
	\item{variableList}{
		A data frame with two columns. The first one must have the names of the candidate variables and the other one the description of such variables
	}
	\item{size}{
		The number of candidate variables to be tested (the first \code{size} variables from \code{variableList})
	}
	\item{loops}{
		The number of bootstrap loops for the forward selection procedure
	}
	\item{elimination.bootstrap.steps}{
		The number of bootstrap loops for the backwards elimination procedure
	}
	\item{unitPvalues}{
		The univariate pvalue of the association of each feature to the outcome
	}
	\item{adjsize}{
		The expected size of a random model for mutiple selection correction
	}
	\item{fraction}{
		The fraction of data (sampled with replacement) to be used as train
	}
	\item{maxTrainModelSize}{
		Maximum number of terms that can be included in the each forward selection model
	}
	\item{maxCycles}{
		The maximum number of model generation cycles 
	}
	\item{print}{
		Logical. If \code{TRUE}, information will be displayed
	}
	\item{plots}{
		Logical. If \code{TRUE}, plots are displayed
	}
}
\details{
This is a core function of FRESA.CAD. The function will generate a set of B:SWiMS models from the data based on the provided baseline formula. The function will loop extracting a models whose all terms are statistical significant. After each loop it will remove the significant terms, and it will repeat the model generation until no mode significant models are found or the maximum number of cycles is reached.
}
\value{
	\item{BSWiMS.model}{
		the output of the bootstrap backwards elimination step
	}
	\item{forward.model}{
		The output of the forward selection step
	}
	\item{update.model}{
		The output of the forward selection step
	}
	\item{univariate}{
		The univariate ranking of variables if no list of features was provided
	}
	\item{bagging}{
		The model after bagging the set of models
	}
	\item{formula.list}{
		The formulas extracted at each cycle
	}
	\item{forward.selection.list}{
		All formulas generated by the forward selection procedure
	}
}
\references{Pencina, M. J., D'Agostino, R. B., & Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: from area under the ROC curve to reclassification and beyond. \emph{Statistics in medicine} \bold{27}(2), 157-172.}

\examples{
	\dontrun{
	library(rpart)
	data(stagec)
	# Split the gleason into several columns

	dataCancer <- cbind(stagec[,c(1:3,5:6)],
						gleason4 = 1*(stagec[,7] == 4),
						gleason5 = 1*(stagec[,7] == 5),
						gleason6 = 1*(stagec[,7] == 6),
						gleason7 = 1*(stagec[,7] == 7),
						gleason8 = 1*(stagec[,7] == 8),
						gleason910 = 1*(stagec[,7] >= 9),
						eet = 1*(stagec[,4] == 2),
						diploid = 1*(stagec[,8] == "diploid"),
						tetraploid = 1*(stagec[,8] == "tetraploid"),
						notAneuploid = 1-1*(stagec[,8] == "aneuploid"))

	#Impute missing values
	dataCancerImputed <- nearestneighborimpute(dataCancer)
	# A simple B:SWiMS Model

	BSWiMSModel <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1, dataCancerImputed)

	#The performance of the final model
	sm <- summary(BSWiMSModel$BSWiMS.model$back.model)
	print(sm$coefficients)
	#The ROC plot and Survival Analysis
	pv <- plot(sm$bootstrap)
	#The plot provides the diagnosis confusion matrix.
	library("epiR")
	summary(epi.tests(pv$diagnosticMatrix))

	}
}
\author{Jose G. Tamez-Pena}

\keyword{Model_Generation}