% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SimCollect.R
\name{SimCollect}
\alias{SimCollect}
\alias{aggregate_simulations}
\title{Collapse separate simulation files into a single result}
\usage{
SimCollect(
  dir = NULL,
  files = NULL,
  filename = NULL,
  select = NULL,
  check.only = FALSE,
  target.reps = NULL,
  warning_details = FALSE,
  error_details = TRUE
)

aggregate_simulations(...)
}
\arguments{
\item{dir}{a \code{character} vector pointing to the directory name containing the \code{.rds} files.
All \code{.rds} files in this directory will be used after first checking
their status with \code{\link{SimCheck}}. For greater specificity use the
\code{files} argument}

\item{files}{a \code{character} vector containing the names of the simulation's final \code{.rds} files.}

\item{filename}{(optional) name of .rds file to save aggregate simulation file to. If not specified
then the results will only be returned in the R console.}

\item{select}{a character vector indicating columns to variables to select from the
\code{SimExtract(what='results')} information. This is mainly useful when RAM is an issue
given simulations with many stored estimates. Default includes the results objects
in their entirety, though to omit all internally stored simulation results pass the
character \code{'NONE'}}

\item{check.only}{logical; for larger simulations file sets, such as those generated by
\code{\link{runArraySimulation}}, return the design conditions that do no satisfy the
\code{target.reps} and throw warning if files are unexpectedly missing}

\item{target.reps}{(optional) number of replications to check against to evaluate whether
the simulation files returned the desired number of replications. If missing, the highest
detected value from the collected set of replication information will be used}

\item{warning_details}{logical; include the aggregate of the warnings to be extracted via
\code{\link{SimExtract}}?}

\item{error_details}{logical; include the aggregate of the errors to be extracted via
\code{\link{SimExtract}}?}

\item{...}{not used}
}
\value{
returns a \code{data.frame/tibble} with the (weighted) average/aggregate
  of the simulation results
}
\description{
This function collects and aggregates the results from
\code{SimDesign}'s \code{\link{runSimulation}} into a single
objects suitable for post-analyses, or combines all the saved results directories and combines
them into one. This is useful when results are run piece-wise on one node (e.g., 500 replications
in one batch, 500 again at a later date, though be careful about the \code{\link{set.seed}}
use as the random numbers will tend to correlate the more it is used) or run independently across different
nodes/computing cores (e.g., see \code{\link{runArraySimulation}}.
}
\examples{
\dontrun{

setwd('my_working_directory')

## run simulations to save the .rds files (or move them to the working directory)
# seeds1 <- genSeeds(design)
# seeds2 <- genSeeds(design, old.seeds=seeds1)
# ret1 <- runSimulation(design, ..., seed=seeds1, filename='file1')
# ret2 <- runSimulation(design, ..., seed=seeds2, filename='file2')

# saves to the hard-drive and stores in workspace
final <- SimCollect(files = c('file1.rds', 'file2.rds'))
final

# If filename not included, can be extracted from results
# files <- c(SimExtract(ret1, 'filename'), SimExtract(ret2, 'filename'))
# final <- SimCollect(files = files)


#################################################
# Example where each row condition is repeated, evaluated independently,
# and later collapsed into a single analysis object

# Each condition repeated four times (hence, replications
# should be set to desired.reps/4)
Design <- createDesign(mu = c(0,5),
                       N  = c(30, 60))
Design

# assume the N=60 takes longer, and should be spread out across more arrays
Design_long <- expandDesign(Design, c(2,2,4,4))
Design_long

replications <- c(rep(50, 4), rep(25,8))
data.frame(Design_long, replications)

#-------------------------------------------------------------------

Generate <- function(condition, fixed_objects) {
    dat <- with(condition, rnorm(N, mean=mu))
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    ret <- c(mean=mean(dat), SD=sd(dat))
    ret
}

Summarise <- function(condition, results, fixed_objects) {
    ret <- colMeans(results)
    ret
}

#-------------------------------------------------------------------

# create directory to store all final simulation files
dir.create('sim_files/')

iseed <- genSeeds()

# distribute jobs independently
sapply(1:nrow(Design_long), \(i) {
  runArraySimulation(design=Design_long, replications=replications,
                generate=Generate, analyse=Analyse, summarise=Summarise,
                arrayID=i, dirname='sim_files/', filename='job', iseed=iseed)
}) |> invisible()

# check that all replications satisfy target
SimCollect('sim_files/', check.only = TRUE)

# this would have been returned were the target.rep supposed to be 1000
SimCollect('sim_files/', check.only = TRUE, target.reps=1000)

# aggregate into single object
sim <- SimCollect('sim_files/')
sim

SimClean(dir='sim_files/')

}
}
\references{
Chalmers, R. P., & Adkins, M. C.  (2020). Writing Effective and Reliable Monte Carlo Simulations
with the SimDesign Package. \code{The Quantitative Methods for Psychology, 16}(4), 248-280.
\doi{10.20982/tqmp.16.4.p248}

Sigal, M. J., & Chalmers, R. P. (2016). Play it again: Teaching statistics with Monte
Carlo simulation. \code{Journal of Statistics Education, 24}(3), 136-156.
\doi{10.1080/10691898.2016.1246953}
}
\seealso{
\code{\link{runSimulation}}, \code{\link{runArraySimulation}},
  \code{\link{SimCheck}}
}
\author{
Phil Chalmers \email{rphilip.chalmers@gmail.com}
}
