From e6e7c5ddcc480388e24dd69209409ccef1a22461 Mon Sep 17 00:00:00 2001 From: philchalmers Date: Sat, 27 Jul 2024 21:46:55 -0400 Subject: [PATCH] enable subRNG --- DESCRIPTION | 3 ++- NAMESPACE | 1 + NEWS.md | 7 ++++++- R/SimDesign.R | 1 + R/analysis.R | 6 +++++- R/runArraySimulation.R | 15 +++++++++++++-- R/util.R | 17 +++++++++++++++++ man/runArraySimulation.Rd | 15 +++++++++++++-- 8 files changed, 58 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c430eaac..cca68d87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: SimDesign Title: Structure for Organizing Monte Carlo Simulation Designs -Version: 2.16.3 +Version: 2.16.4 Authors@R: c(person("Phil", "Chalmers", email = "rphilip.chalmers@gmail.com", role = c("aut", "cre"), comment = c(ORCID="0000-0001-5332-2810")), person("Matthew", "Sigal", role = c("ctb")), @@ -24,6 +24,7 @@ Imports: methods, testthat, parallel, + snow, dplyr, sessioninfo, beepr, diff --git a/NAMESPACE b/NAMESPACE index 3a4265f8..b1b44ff0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -85,6 +85,7 @@ importFrom(pbapply,pblapply) importFrom(pbapply,pboptions) importFrom(progressr,progressor) importFrom(sessioninfo,session_info) +importFrom(snow,sendCall) importFrom(testthat,capture_messages) importFrom(testthat,capture_output_lines) importFrom(utils,capture.output) diff --git a/NEWS.md b/NEWS.md index 9e38beb5..cdb542e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,11 @@ ## Changes in SimDesign 2.17 +- `runArraySimulation()` gains `parallel` flag and friends to use multi-core + processing within array distributions. RNG numbers within the L'Ecuyer-CMRG + algorithm are incremented using `parallel::nextRNGSubStream()` within each + defined core + - Better name checking when using the supported `list` inputs in `runSimulation()` and `runArraySimulation()` @@ -10,7 +15,7 @@ ## Changes in SimDesign 2.16 -- Fix for `SimCollect()` when `runArraySimulatino()` result contains +- Fix for `SimCollect()` when `runArraySimulation()` result contains mixed warning outputs (reported by Michael Troung) - `manageMessages()` added in a similar spirit to `manageWarnigns()`, though diff --git a/R/SimDesign.R b/R/SimDesign.R index b1cbb6fb..471368eb 100644 --- a/R/SimDesign.R +++ b/R/SimDesign.R @@ -34,6 +34,7 @@ #' @importFrom future.apply future_lapply #' @importFrom progressr progressor #' @importFrom beepr beep +#' @importFrom snow sendCall # @importFrom robustbase glmrob #' @importFrom utils recover packageVersion head tail capture.output object.size #' @keywords package diff --git a/R/analysis.R b/R/analysis.R index 1fddf945..3f092c61 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -71,7 +71,11 @@ Analysis <- function(Functions, condition, replications, fixed_objects, cl, MPI, stop('MPI structure no longer supported. Please use the parallel = \"future" approach', call. = FALSE) } else { - if(!is.null(seed)) parallel::clusterSetRNGStream(cl=cl, seed[condition$ID]) + if(!is.null(seed)){ + if(is.list(seed)){ + clusterSetRNGSubStream(cl=cl, seed=seed) + } else parallel::clusterSetRNGStream(cl=cl, seed[condition$ID]) + } results <- if(progress){ try(pbapply::pblapply(1L:replications, mainsim, condition=condition, generate=Functions$generate, diff --git a/R/runArraySimulation.R b/R/runArraySimulation.R index a25772ac..5930b811 100644 --- a/R/runArraySimulation.R +++ b/R/runArraySimulation.R @@ -19,7 +19,8 @@ #' function requires the seeds to be generated using #' \code{\link{gen_seeds}} with the \code{iseed} and \code{arrayID} #' inputs to ensure that each job is analyzing a high-quality -#' set of random numbers via L'Ecuyer-CMRG's (2002) method. +#' set of random numbers via L'Ecuyer-CMRG's (2002) method, incremented using +#' \code{\link[parallel]{nextRNGStream}}. #' #' Additionally, for timed simulations on HPC clusters it is also recommended to pass a #' \code{control = list(max_time)} value to avoid discarding @@ -58,7 +59,8 @@ #' #' @param parallel logical; use parallel computations via the a "SOCK" cluster? #' Only use when the instruction shell file requires more than 1 core -#' (number of cores detected via \code{ncores}) +#' (number of cores detected via \code{ncores}). For this application +#' the random seeds further distributed using \code{\link[parallel]{nextRNGSubStream}} #' #' @param cl cluster definition. If omitted a "SOCK" cluster will be defined #' @@ -161,6 +163,15 @@ #' res #' SimResults(res) # condition and replication count stored #' +#' # same, but evaluated with multiple cores +#' res <- runArraySimulation(design=Design, replications=50, +#' generate=Generate, analyse=Analyse, +#' summarise=Summarise, arrayID=arrayID, +#' parallel=TRUE, ncores=3, +#' iseed=iseed, filename='mysim') # saved as 'mysim-1.rds' +#' res +#' SimResults(res) # condition and replication count stored +#' #' dir() #' SimClean('mysim-1.rds') #' diff --git a/R/util.R b/R/util.R index e8f9c496..3ba698c7 100644 --- a/R/util.R +++ b/R/util.R @@ -672,6 +672,23 @@ set_seed <- function(seed){ invisible(NULL) } +recvResult_fun <- utils::getFromNamespace("recvResult", "snow") + +clusterSetRNGSubStream <- function(cl, seed){ + nc <- length(cl) + seeds <- vector("list", nc) + seeds[[1L]] <- seed[[1L]] + for (i in seq_len(nc - 1L)) seeds[[i + 1L]] <- + parallel::nextRNGSubStream(seeds[[i]]) + for (i in seq_along(cl)) { + expr <- substitute(assign(".Random.seed", seed, envir = .GlobalEnv), + list(seed = seeds[[i]])) + snow::sendCall(cl[[i]], eval, list(expr)) + } + snow::checkForRemoteErrors(lapply(cl, recvResult_fun)) + invisible() +} + valid_results <- function(x) is(x, 'numeric') || is(x, 'data.frame') || is(x, 'list') || is(x, 'logical') || is(x, 'try-error') diff --git a/man/runArraySimulation.Rd b/man/runArraySimulation.Rd index 46ca506b..868a8b97 100644 --- a/man/runArraySimulation.Rd +++ b/man/runArraySimulation.Rd @@ -58,7 +58,8 @@ be added to the \code{SimExtract(..., what='results')} output?} \item{parallel}{logical; use parallel computations via the a "SOCK" cluster? Only use when the instruction shell file requires more than 1 core -(number of cores detected via \code{ncores})} +(number of cores detected via \code{ncores}). For this application +the random seeds further distributed using \code{\link[parallel]{nextRNGSubStream}}} \item{cl}{cluster definition. If omitted a "SOCK" cluster will be defined} @@ -116,7 +117,8 @@ each isolated condition rather than between all conditions). As such, this function requires the seeds to be generated using \code{\link{gen_seeds}} with the \code{iseed} and \code{arrayID} inputs to ensure that each job is analyzing a high-quality -set of random numbers via L'Ecuyer-CMRG's (2002) method. +set of random numbers via L'Ecuyer-CMRG's (2002) method, incremented using +\code{\link[parallel]{nextRNGStream}}. Additionally, for timed simulations on HPC clusters it is also recommended to pass a \code{control = list(max_time)} value to avoid discarding @@ -168,6 +170,15 @@ res <- runArraySimulation(design=Design, replications=50, res SimResults(res) # condition and replication count stored +# same, but evaluated with multiple cores +res <- runArraySimulation(design=Design, replications=50, + generate=Generate, analyse=Analyse, + summarise=Summarise, arrayID=arrayID, + parallel=TRUE, ncores=3, + iseed=iseed, filename='mysim') # saved as 'mysim-1.rds' +res +SimResults(res) # condition and replication count stored + dir() SimClean('mysim-1.rds')