# Functions to retrieve starting allocations for the ADPROCLUS algorithms
# (i.e. an initial matrix A, so called cluster membership matrix)

#' Generate initial random start
#'
#' Generate an initial random start for the (low dimensional) Additive Profile
#' Clustering algorithm (see \code{\link{adproclus}} and
#' \code{\link{adproclus_low_dim}}).
#'
#' \code{get_random} generates a random initial binary membership matrix
#' \strong{A} such that each entry is an independen draw from a
#' Bernoulli Distribution with \eqn{\pi = 0.5}.
#'
#' For generating an initial start from random draws from the data, see
#' \code{\link{get_semirandom}}.
#' For generating an initial start based on a specific set of initial cluster
#' centers, see \code{\link{get_rational}}.
#'
#' \strong{Warning:} This function does \emph{not} obtain an ADPRCOLUS model.
#' To perform aditive profile clustering, see \code{\link{adproclus}}.
#'
#' @param data Object-by-variable data matrix of class \code{matrix} or
#' \code{data.frame}.
#' @param nclusters Number of clusters to be used. Must be a positive integer.
#' @param seed Integer. Seed for the random number generator. Default: NULL
#'
#' @return \code{get_random()} returns a list with the following components:
#'   \describe{ \item{\code{type}}{A character string denoting the type of start
#'   ('Random Start')} \item{\code{A}}{A randomly generated initial Membership
#'   matrix}}
#'
#' @export
#'
#' @references Wilderjans, T. F., Ceulemans, E., Van Mechelen, I., & Depril, D.
#' (2010). ADPROCLUS: a graphical user interface for fitting additive profile
#' clustering models to object by variable data matrices. \emph{Behavior
#' Research Methods, 43}(1), 56-65.
#'
#' Depril, D., Van Mechelen, I., & Mirkin, B.
#' (2008). Algorithms for additive clustering of rectangular data tables.
#' \emph{Computational Statistics and Data Analysis, 52,} 4923-4938.
#'
#' Depril, D., Van Mechelen, I., & Wilderjans, T. F.
#' (2012). Lowdimensional additive overlapping clustering.
#' \emph{Journal of classification, 29,} 297-320.
#'
#' @examples
#' # Obtain data from data set "Stackloss" and generate start allocation
#' start_allocation <- get_random(stackloss, 3)$A
#'
#' @seealso
#' \describe{
#'   \item{\code{\link{adproclus}}, \code{\link{adproclus_low_dim}}}{for details
#'    about membership and profile matrices}
#'   \item{\code{\link{get_semirandom}}}{for generating semi-random starts}
#'   \item{\code{\link{get_rational}}}{for generating rational starts}
#' }
get_random <- function(data, nclusters, seed = NULL) {
  if (!is.null(seed)) {
    withr::local_seed(seed = seed)
  }

  data <- as.matrix(data)
  checkmate::assertMatrix(data)
  checkmate::assertCount(nclusters, positive = TRUE, coerce = TRUE)
  if (nrow(data) < nclusters) {
    stop("Number of clusters must be less or equal the number of objects in 'data'.")
  }

  k <- nclusters

  n <- nrow(data)

  A <- (matrix(stats::runif(n * k), n, k) < 0.5) * 1
  while (any(colSums(A) == 0) || qr(A)$rank < k) {
    A <- (matrix(stats::runif(n * k), n, k) < 0.5) * 1
  }

  list(type = "Random Start", A = A)
}

#' Generate initial semi-random start
#'
#' Generate an initial semi-random start for the (low dimensional) Additive
#' Profile Clustering
#' algorithm (see \code{\link{adproclus}} and \code{\link{adproclus_low_dim}}).
#'
#' An initial cluster membership matrix \eqn{A} is generated by
#' finding the best \eqn{A} conditional
#' on an initial profile matrix
#' \eqn{P} generated by drawing \emph{k} randomly chosen, distinct,
#'  rows from \code{data} (for details, see Depril et al., 2012).
#'
#' \strong{Warning:} This function does \emph{not} obtain an ADPRCOLUS model. To
#' perform aditive profile clustering, see \code{\link{adproclus}}.
#'
#' @param data Object-by-variable data matrix of class \code{matrix} or
#'   \code{data.frame}.
#' @param nclusters Number of clusters to be used. Must be a positive integer.
#' @param seed Integer. Seed for the random number generator. Default: NULL
#'
#' @return \code{get_semirandom} returns a list with the following components:
#'   \describe{
#'   \item{\code{type}}{A character string denoting the type of start
#'   ('Semi-random Start')}
#'   \item{\code{A}}{An initial Membership matrix}}
#'
#' @export
#'
#' @references Wilderjans, T. F., Ceulemans, E., Van Mechelen, I., & Depril, D.
#'   (2010). ADPROCLUS: a graphical user interface for fitting additive profile
#'   clustering models to object by variable data matrices. \emph{Behavior
#'   Research Methods, 43}(1), 56-65.
#'
#'   Depril, D., Van Mechelen, I., & Mirkin, B. (2008). Algorithms for additive
#'   clustering of rectangular data tables. \emph{Computational Statistics and
#'   Data Analysis, 52,} 4923-4938.
#'
#'   #' Depril, D., Van Mechelen, I., & Wilderjans, T. F.
#'   (2012). Lowdimensional additive overlapping clustering.
#'   \emph{Journal of classification, 29,} 297-320.
#'
#' @examples
#' # Obtain data from data set "Stackloss" and generate start allocation
#' start_allocation <- get_semirandom(stackloss, 3)$A
#'
#' @seealso
#' \describe{
#'   \item{\code{\link{adproclus}}, \code{\link{adproclus_low_dim}}}{for details
#'    about membership and profile matrices}
#'   \item{\code{\link{get_random}}}{for generating random starts}
#'   \item{\code{\link{get_rational}}}{for generating rational starts}
#' }
get_semirandom <- function(data, nclusters, seed = NULL) {
  if (!is.null(seed)) {
    withr::local_seed(seed = seed)
  }

  data <- as.matrix(data)
  checkmate::assertMatrix(data)
  checkmate::assertCount(nclusters, positive = TRUE, coerce = TRUE)

  if (nrow(data) < nclusters) {
    stop("Number of clusters must be less or equal the number of objects in 'data'.")
  }

  n <- nrow(data)

  rows_selected <- sample(n, nclusters)
  P <- data[rows_selected, ]

  npos <- 2^nclusters
  PossibA <- gtools::permutations(2, nclusters,
    v = c(0, 1),
    repeats.allowed = TRUE
  )
  PossibA <- apply(PossibA, 2, rev)
  if (nclusters > 1) {
    PossibA <- t(apply(PossibA, 1, rev))
  }
  PossibA <- .repmat(PossibA, n, 1)

  replX <- data.frame()
  for (i in 1:n) {
    reps <- matrix(.repmat(data[i, ], npos, 1),
      ncol = ncol(data), nrow = npos, byrow = TRUE
    )
    replX <- rbind(replX, reps)
  }
  replX <- as.matrix(replX)

  A <- as.matrix(.updateA_lf2(n, P, replX, PossibA))
  list(type = "Semi-random Start", A = A)
}

#' Generate start allocation based on a priori profiles
#'
#' If cluster profiles are given a priori, this function can be used to compute
#' the conditionally optimal cluster membership matrix A which can then be
#' used as a rational starting allocation for the (low dimensional) ADPROCLUS
#' procedure (see \code{\link{adproclus}} and \code{\link{adproclus_low_dim}}).
#'
#' The function uses the same quadratic loss function and minimization method as
#' the (low dimensional) ADPROCLUS procedure does to find the next conditionally
#'  optimal membership matrix A. (for details, see Depril et al., 2012). For the full
#'  dimensional ADPROCLUS it uses the algorithm \code{ALS2} and not \code{ALS1}.
#'
#' \strong{Warning:} This function does \emph{not} obtain an ADPRCOLUS model. To
#' perform additive profile clustering, see \code{\link{adproclus}}.
#'
#' @param data Object-by-variable data matrix of class \code{matrix} or
#'   \code{data.frame}.
#' @param starting_profiles A matrix where each row represents the profile
#' values for a cluster. Needs to be of same dimensions as \eqn{P}.
#'
#' @return \code{get_rational()} returns a list with the following components:
#'   \describe{
#'   \item{\code{type}}{A character string denoting the type of start
#'   ('Rational Start')}
#'   \item{\code{A}}{An initial Membership matrix}}
#' @export
#'
#' @references Depril, D., Van Mechelen, I., & Wilderjans, T. F.
#'   (2012). Lowdimensional additive overlapping clustering.
#'   \emph{Journal of classification, 29,} 297-320.
#'
#' @examples
#' # Obtain data from standard data set "Stackloss"
#' x <- stackloss
#'
#' # Obtaining a user-defined rational start profile matrix
#' # (here the first 4 rows of the data)
#' start_allocation <- get_rational(x, x[1:4, ])$A
#'
#' @seealso
#' \describe{
#'   \item{\code{\link{adproclus}}, \code{\link{adproclus_low_dim}}}{for details
#'    about membership and profile matrices}
#'   \item{\code{\link{get_random}}}{for generating random starts}
#'   \item{\code{\link{get_semirandom}}}{for generating semi-random starts}
#' }
get_rational <- function(data, starting_profiles) {
  data <- as.matrix(data)
  starting_profiles <- as.matrix(starting_profiles)
  checkmate::assertMatrix(data)
  checkmate::assertMatrix(starting_profiles)
  if (ncol(starting_profiles) != ncol(data)) {
    stop("Number of variables in data must equal number of columns of profile matrix.")
  }

  n <- nrow(data)

  nclusters <- nrow(starting_profiles)
  P <- starting_profiles

  npos <- 2^nclusters
  PossibA <- gtools::permutations(2, nclusters,
    v = c(0, 1),
    repeats.allowed = TRUE
  )
  PossibA <- apply(PossibA, 2, rev)
  if (nclusters > 1) {
    PossibA <- t(apply(PossibA, 1, rev))
  }
  PossibA <- .repmat(PossibA, n, 1)

  replX <- data.frame()
  for (i in 1:n) {
    reps <- matrix(.repmat(data[i, ], npos, 1),
      ncol = ncol(data), nrow = npos, byrow = TRUE
    )
    replX <- rbind(replX, reps)
  }
  replX <- as.matrix(replX)

  A <- as.matrix(.updateA_lf2(n, P, replX, PossibA))
  list(type = "Rational Start", A = A)
}
