From 3cbc7a9889bf5f2957559afa4d5dcaa5e34bd89d Mon Sep 17 00:00:00 2001 From: philchalmers Date: Fri, 26 Jul 2024 23:26:43 -0400 Subject: [PATCH] start parallel comp with subRNG increments --- R/runArraySimulation.R | 20 ++++++++++++++++++++ R/runSimulation.R | 2 +- man/runArraySimulation.Rd | 14 ++++++++++++++ man/runSimulation.Rd | 2 +- 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/R/runArraySimulation.R b/R/runArraySimulation.R index 05ca66ba..a25772ac 100644 --- a/R/runArraySimulation.R +++ b/R/runArraySimulation.R @@ -56,6 +56,17 @@ #' to. If omitted the files will be stored in the same working directory #' where the script was submitted #' +#' @param parallel logical; use parallel computations via the a "SOCK" cluster? +#' Only use when the instruction shell file requires more than 1 core +#' (number of cores detected via \code{ncores}) +#' +#' @param cl cluster definition. If omitted a "SOCK" cluster will be defined +#' +#' @param ncores number of cores to use when \code{parallel=TRUE}. Note that +#' the default uses 1 minus the number of available cores, therefore this +#' will only be useful when \code{ncores > 2} as defined in the shell instruction +#' file +#' #' @param filename_suffix suffix to add to the \code{filename}; #' default add '-' with the \code{arrayID} #' @@ -222,6 +233,8 @@ runArraySimulation <- function(design, ..., replications, arrayID = getArrayID(), filename_suffix = paste0("-", arrayID), addArrayInfo = TRUE, + parallel = FALSE, cl = NULL, + ncores = parallel::detectCores() - 1L, save_details = list(), control = list()){ dots <- list(...) @@ -254,6 +267,12 @@ runArraySimulation <- function(design, ..., replications, filename <- gsub("//", "/", filename) } save_details$arrayID <- arrayID + if(parallel){ + if(is.null(cl)){ + cl <- parallel::makeCluster(ncores, type="SOCK") + on.exit(parallel::stopCluster(cl), add=TRUE) + } + } seed <- genSeeds(design, iseed=iseed, arrayID=arrayID) dsub <- design[arrayID, , drop=FALSE] attr(dsub, 'Design.ID') <- attr(design, 'Design.ID')[arrayID] @@ -261,6 +280,7 @@ runArraySimulation <- function(design, ..., replications, ret <- runSimulation(design=dsub, replications=replications, filename=filename, seed=seed, verbose=FALSE, save_details=save_details, + parallel=parallel, cl=cl, control=control, save=FALSE, ...) if(addArrayInfo && (is.null(dots$store_results) || (!is.null(dots$store_results) && isTRUE(dots$store_results)))){ diff --git a/R/runSimulation.R b/R/runSimulation.R index fff86f4d..4c002d11 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -406,7 +406,7 @@ #' \item{\code{allow_nan}}{logical (default is \code{FALSE}); should \code{NaN}s be allowed in the #' analyse step as a valid result from the simulation analysis?} #' -#' \item{\code{type}}{default type of cluster to create for the \code{cl} object if no supplied. +#' \item{\code{type}}{default type of cluster to create for the \code{cl} object if not supplied. #' For Windows OS this defaults to \code{"PSOCK"}, otherwise \code{"SOCK"} is selected #' (suitable for Linux and Mac OSX). This is ignored if the user specifies their own \code{cl} object} #' diff --git a/man/runArraySimulation.Rd b/man/runArraySimulation.Rd index 28d2059c..46ca506b 100644 --- a/man/runArraySimulation.Rd +++ b/man/runArraySimulation.Rd @@ -14,6 +14,9 @@ runArraySimulation( arrayID = getArrayID(), filename_suffix = paste0("-", arrayID), addArrayInfo = TRUE, + parallel = FALSE, + cl = NULL, + ncores = parallel::detectCores() - 1L, save_details = list(), control = list() ) @@ -53,6 +56,17 @@ default add '-' with the \code{arrayID}} \item{addArrayInfo}{logical; should the array ID and original design row number be added to the \code{SimExtract(..., what='results')} output?} +\item{parallel}{logical; use parallel computations via the a "SOCK" cluster? +Only use when the instruction shell file requires more than 1 core +(number of cores detected via \code{ncores})} + +\item{cl}{cluster definition. If omitted a "SOCK" cluster will be defined} + +\item{ncores}{number of cores to use when \code{parallel=TRUE}. Note that +the default uses 1 minus the number of available cores, therefore this +will only be useful when \code{ncores > 2} as defined in the shell instruction +file} + \item{save_details}{optional list of extra file saving details. See \code{\link{runSimulation}}} diff --git a/man/runSimulation.Rd b/man/runSimulation.Rd index 195074f2..e9b36d95 100644 --- a/man/runSimulation.Rd +++ b/man/runSimulation.Rd @@ -470,7 +470,7 @@ in the simulation} \item{\code{allow_nan}}{logical (default is \code{FALSE}); should \code{NaN}s be allowed in the analyse step as a valid result from the simulation analysis?} - \item{\code{type}}{default type of cluster to create for the \code{cl} object if no supplied. + \item{\code{type}}{default type of cluster to create for the \code{cl} object if not supplied. For Windows OS this defaults to \code{"PSOCK"}, otherwise \code{"SOCK"} is selected (suitable for Linux and Mac OSX). This is ignored if the user specifies their own \code{cl} object}