From a8994991089a41f189f189627e61a7680179aba8 Mon Sep 17 00:00:00 2001 From: schuemie Date: Tue, 13 Aug 2024 06:47:38 +0200 Subject: [PATCH] Added `controlType` argument to `runSccsAnalyses()` --- DESCRIPTION | 6 ++--- NEWS.md | 8 +++++++ R/RunAnalyses.R | 32 +++++++++++++++++++------ man/SelfControlledCaseSeries-package.Rd | 1 + man/runSccsAnalyses.Rd | 9 ++++++- vignettes/MultipleAnalyses.Rmd | 5 ++-- 6 files changed, 48 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fda315a..a395548 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: SelfControlledCaseSeries Type: Package Title: Self-Controlled Case Series -Version: 5.2.3 -Date: 2024-08-12 +Version: 5.3.0 +Date: 2024-08-13 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")), person("Patrick", "Ryan", role = c("aut")), @@ -19,7 +19,7 @@ Description: SelfControlledCaseSeries is an R package for performing self- included at once (MSCCS), with regularization on all coefficients except for the exposure of interest. VignetteBuilder: knitr -URL: https://ohdsi.github.io/SelfControlledCaseSeries, https://github.com/OHDSI/SelfControlledCaseSeries +URL: https://ohdsi.github.io/SelfControlledCaseSeries/, https://github.com/OHDSI/SelfControlledCaseSeries BugReports: https://github.com/OHDSI/SelfControlledCaseSeries/issues Depends: R (>= 4.0.0), diff --git a/NEWS.md b/NEWS.md index fa6e2b5..60c6e29 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +SelfControlledCaseSeries 5.3.0 +============================== + +Changes + +1. Added `controlType` argument to `runSccsAnalyses()`, explicitly setting the type of (negative) controls: outcome controls or exposure controls. Setting to "outcome" (the default) will now group by exposure (and nesting cohort if defined). + + SelfControlledCaseSeries 5.2.3 ============================== diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index 819c4eb..ef90efd 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -160,6 +160,11 @@ createDefaultSccsMultiThreadingSettings <- function(maxCores) { #' @param sccsMultiThreadingSettings An object of type `SccsMultiThreadingSettings` as created using #' the [createSccsMultiThreadingSettings()] or #' [createDefaultSccsMultiThreadingSettings()] functions. +#' @param controlType Type of negative (and positive) controls. Can be "outcome" or +#' "exposure". When set to "outcome", controls with the +#' same exposure (and nesting cohort) are grouped together for +#' calibration. When set to "exposure", controls with the same +#' outcome are grouped together. #' #' @return #' A tibble describing for each exposure-outcome-analysisId combination where the intermediary and @@ -183,7 +188,8 @@ runSccsAnalyses <- function(connectionDetails, exposuresOutcomeList, analysesToExclude = NULL, combineDataFetchAcrossOutcomes = FALSE, - sccsMultiThreadingSettings = createSccsMultiThreadingSettings()) { + sccsMultiThreadingSettings = createSccsMultiThreadingSettings(), + controlType = "outcome") { errorMessages <- checkmate::makeAssertCollection() if (is(connectionDetails, "connectionDetails")) { checkmate::assertClass(connectionDetails, "connectionDetails", add = errorMessages) @@ -213,6 +219,7 @@ runSccsAnalyses <- function(connectionDetails, checkmate::assertDataFrame(analysesToExclude, null.ok = TRUE, add = errorMessages) checkmate::assertLogical(combineDataFetchAcrossOutcomes, len = 1, add = errorMessages) checkmate::assertClass(sccsMultiThreadingSettings, "SccsMultiThreadingSettings", add = errorMessages) + checkmate::assertChoice(controlType, c("outcome", "exposure"), add = errorMessages) checkmate::reportAssertions(collection = errorMessages) uniqueExposuresOutcomeList <- unique(lapply(lapply(lapply(exposuresOutcomeList, unlist), as.character), paste, collapse = " ")) @@ -445,7 +452,8 @@ runSccsAnalyses <- function(connectionDetails, exposuresOutcomeList = exposuresOutcomeList, outputFolder = outputFolder, mainFileName = mainFileName, - calibrationThreads = sccsMultiThreadingSettings$calibrationThreads + calibrationThreads = sccsMultiThreadingSettings$calibrationThreads, + controlType = controlType ) } @@ -825,7 +833,12 @@ createSccsModelObject <- function(params) { } } -summarizeResults <- function(referenceTable, exposuresOutcomeList, outputFolder, mainFileName, calibrationThreads = 1) { +summarizeResults <- function(referenceTable, + exposuresOutcomeList, + outputFolder, + mainFileName, + calibrationThreads = 1, + controlType) { rows <- list() # i = 1 pb <- txtProgressBar(style = 3) @@ -875,6 +888,7 @@ summarizeResults <- function(referenceTable, exposuresOutcomeList, outputFolder, } row <- tibble( exposuresOutcomeSetId = refRow$exposuresOutcomeSetId, + nestingCohortId = refRow$nestingCohortId, outcomeId = refRow$outcomeId, analysisId = refRow$analysisId, covariateAnalysisId = covariateSettings$covariateAnalysisId, @@ -909,18 +923,22 @@ summarizeResults <- function(referenceTable, exposuresOutcomeList, outputFolder, mainResults <- bind_rows(rows) mainResults <- calibrateEstimates( results = mainResults, - calibrationThreads = calibrationThreads + calibrationThreads = calibrationThreads, + controlType = controlType ) saveRDS(mainResults, mainFileName) } -calibrateEstimates <- function(results, calibrationThreads) { +calibrateEstimates <- function(results, calibrationThreads, controlType) { if (nrow(results) == 0) { return(results) } message("Calibrating estimates") - groups <- split(results, paste(results$covariateId, results$analysisId)) - + if (controlType == "outcome") { + groups <- split(results, paste(results$eraId, results$nestingCohortId, results$covariateId, results$analysisId)) + } else { + groups <- split(results, paste(results$outcomeId, results$covariateId, results$analysisId)) + } cluster <- ParallelLogger::makeCluster(min(length(groups), calibrationThreads)) results <- ParallelLogger::clusterApply(cluster, groups, calibrateGroup) ParallelLogger::stopCluster(cluster) diff --git a/man/SelfControlledCaseSeries-package.Rd b/man/SelfControlledCaseSeries-package.Rd index 92907d5..f4a4332 100644 --- a/man/SelfControlledCaseSeries-package.Rd +++ b/man/SelfControlledCaseSeries-package.Rd @@ -11,6 +11,7 @@ SelfControlledCaseSeries is an R package for performing self- controlled case se \seealso{ Useful links: \itemize{ + \item \url{https://ohdsi.github.io/SelfControlledCaseSeries/} \item \url{https://github.com/OHDSI/SelfControlledCaseSeries} \item Report bugs at \url{https://github.com/OHDSI/SelfControlledCaseSeries/issues} } diff --git a/man/runSccsAnalyses.Rd b/man/runSccsAnalyses.Rd index b6042cb..51c1659 100644 --- a/man/runSccsAnalyses.Rd +++ b/man/runSccsAnalyses.Rd @@ -22,7 +22,8 @@ runSccsAnalyses( exposuresOutcomeList, analysesToExclude = NULL, combineDataFetchAcrossOutcomes = FALSE, - sccsMultiThreadingSettings = createSccsMultiThreadingSettings() + sccsMultiThreadingSettings = createSccsMultiThreadingSettings(), + controlType = "outcome" ) } \arguments{ @@ -88,6 +89,12 @@ the subjects that have the different outcomes.} \item{sccsMultiThreadingSettings}{An object of type \code{SccsMultiThreadingSettings} as created using the \code{\link[=createSccsMultiThreadingSettings]{createSccsMultiThreadingSettings()}} or \code{\link[=createDefaultSccsMultiThreadingSettings]{createDefaultSccsMultiThreadingSettings()}} functions.} + +\item{controlType}{Type of negative (and positive) controls. Can be "outcome" or +"exposure". When set to "outcome", controls with the +same exposure (and nesting cohort) are grouped together for +calibration. When set to "exposure", controls with the same +outcome are grouped together.} } \value{ A tibble describing for each exposure-outcome-analysisId combination where the intermediary and diff --git a/vignettes/MultipleAnalyses.Rmd b/vignettes/MultipleAnalyses.Rmd index bd0df01..e3089c2 100644 --- a/vignettes/MultipleAnalyses.Rmd +++ b/vignettes/MultipleAnalyses.Rmd @@ -325,13 +325,14 @@ referenceTable <- runSccsAnalyses( combineDataFetchAcrossOutcomes = TRUE, exposuresOutcomeList = exposuresOutcomeList, sccsAnalysisList = sccsAnalysisList, - sccsMultiThreadingSettings = multiThreadingSettings + sccsMultiThreadingSettings = multiThreadingSettings, + controlType = "exposure" ) ``` In the code above, we first specify how many parallel threads `SelfControlledCaseSeries` can use. Many of the computations can be computed in parallel, and providing more than one CPU core can greatly speed up the computation. Here we specify `SelfControlledCaseSeries` can use all but one of the CPU cores detected in the system (using the `parallel::detectCores()` function). -We call `runSccsAnalyses`, providing the arguments for connecting to the database, which schemas and tables to use, as well as the analyses and hypotheses of interest. The `outputFolder` specifies where the outcome models and intermediate files will be written. +We call `runSccsAnalyses`, providing the arguments for connecting to the database, which schemas and tables to use, as well as the analyses and hypotheses of interest. The `outputFolder` specifies where the outcome models and intermediate files will be written. Because in this example we use negative control exposures, we must explicitly specify `controlType = "exposure"`. This will cause the different negative control exposure-outcome pairs to be used for the same outcome. ## Restarting