% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seq_bin_model.R
\name{seq_bin_model}
\alias{seq_bin_model}
\title{The sequential logistic regression model for binary classification
  problem.}
\usage{
seq_bin_model(startnum, data.clust, xfix, yfix, dlen = 0.5,
  criterion = "BIC", pho = 0.05, ptarget = 0.5)
}
\arguments{
\item{startnum}{The initial number of subjects from original dataset.}

\item{data.clust}{Large list obtained through k-means clustering. The samples
of the element(data.clust[[1]]) in the data.clust is closer to each other
compared to another element.}

\item{xfix}{A dataframe that each row is a sample,each column represents an
independent variable. The sample has the minimum variance from each cluster
of the data.clust to represent the all samples for the corresponding
cluster.}

\item{yfix}{Numeric vector consists of 0 or 1. The length of yfix must be the
same as the xfix.}

\item{dlen}{A numeric number specifying the length of the fixed size
confidence set for our model. Note that the smaller the dlen, the larger
the sample size and the longer the time costs. The default value is 0.5.}

\item{criterion}{A character string that determines the model selection
criterion to be used, matching one of 'BIC' or 'AIC. The default value is
'BIC'.}

\item{pho}{A numeric number used in subject selection according to the
D-optimality. That is, select the  first (rho * length(data)) data from the
unlabeled data set and add it to the uncertainty set. The default value is
0.05.}

\item{ptarget}{A numeric number that help to choose the samples. The default
value is 0.5}
}
\value{
a list containing the following components
\item{dlen}{the length of the fixed size confidence set that we specify}
\item{n}{the current sample size when the stopping criterion is satisfied}
\item{lab.seq}{the label of sequential iterations stop or not. When the value
of lab.seq is 1, it means the iteration stops}
\item{betahat}{the parameters that we estimate when the the iteration is
finished}
}
\description{
\code{seq_bin_model} estimates the the effective variables and chooses the
subjects sequentially by the logistic regression model for the binary
classification case with adaptive shrinkage estimate method.
}
\details{
seq_bin_model is a binary logistic regression model that estimetes the
effective variables and determines the samples sequentially from original
training data set using adaptive shrinkage estimate given the fixed size
confidence set. It's a sequential method that we select sample one by one
from data pool. Once it stops, it means we select the enough samples that
satisfy the stopping criterion and we can conclude which are the effective
variables and its corresponding values and the number of the samples we
select.
}
\examples{
# generate the toy example
library(foreach)
beta <- c(-1,1,0.01,0.1)
N <- 10000
nclass <- 1000
seed <- 123
data  <- gen_bin_data(beta,N,nclass,seed)
xfix <- data[['X']]
yfix <- data[['y']]
data.clust <- data[['data.clust']]
startnum <- 24
dlen <- 0.75

# use seq_bin_model to binary classification problem. You can remove '#' to
# run the command.
# results <- seq_bin_model(startnum, data.clust, xfix, yfix, dlen,
#                          criterion = "BIC", pho = 0.05, ptarget = 0.5)
}
\references{
{
Wang, Z., & Chang, Y. I. (2013). Sequential estimate for linear regression
models with uncertain number of effective variables. \emph{Metrika}, 76(7), 949–978.
doi:10.1007/s00184-012-0426-4
}
}
\seealso{
{
   \code{\link{seq_GEE_model}} for generalized estimating equations case

   \code{\link{seq_bin_model}} for binary classification case

   \code{\link{seq_ord_model}} for ordinal case.
}
}
