Release v0.11

OHDSI · Sep 1, 2024 · 78c7463 · 78c7463
2 parents dc3bde3 + 922eb21
commit 78c7463
Show file tree

Hide file tree

Showing 265 changed files with 25,816 additions and 5,666 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: CohortGenerator
 Type: Package
 Title: An R Package for Cohort Generation Against the OMOP CDM
-Version: 0.10.0
-Date: 2024-07-14
+Version: 0.11.0
+Date: 2024-08-31
 Authors@R: c(
   person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
   person("Jamie", "Gilbert", role = c("aut")),
@@ -17,7 +17,6 @@ Depends:
   R (>= 3.6.0),
   R6
 Imports:
-  bit64,
   checkmate,
   digest,
   dplyr,
@@ -37,13 +36,9 @@ Suggests:
   Eunomia,
   knitr,
   rmarkdown,
-  ROhdsiWebApi,
   testthat,
   withr,
   zip
-Remotes:
-  ohdsi/ResultModelManager,
-  ohdsi/ROhdsiWebApi,
 License: Apache License
 VignetteBuilder: knitr
 URL: https://ohdsi.github.io/CohortGenerator/, https://github.com/OHDSI/CohortGenerator

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,28 @@
+CohortGenerator 0.11.0
+=======================
+
+New Features
+
+- Add support for minimum cell count (#176)
+
+Bug Fixes
+
+- Multiple calls to export stats causing duplicates in cohort inclusion file (#179)
+- Updates to subset documentation (#180, #181)
+- Negative control outcome generation bug (#177)
+
 CohortGenerator 0.10.0
 =======================
+
 New Features
+
 - Add `runCohortGeneration` function (Issue #165)
 - Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162)
 - Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145)
 - Unit tests now running on all OHDSI DB Platforms (#151)
 
 Bug Fixes
+
 - Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167)
 - Allow integer as cohort ID (#146)
 - Use native messaging functions for output vs. ParallelLogger (#97)

diff --git a/R/CohortStats.R b/R/CohortStats.R
@@ -226,7 +226,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
   # NOTE: This data frame must match the @cohort_inclusion_table
   # structure as defined in inst/sql/sql_server/CreateCohortTables.sql
   inclusionRules <- data.frame(
-    cohortDefinitionId = bit64::integer64(),
+    cohortDefinitionId = numeric(),
     ruleSequence = integer(),
     name = character(),
     description = character()
@@ -251,7 +251,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
           inclusionRules <- rbind(
             inclusionRules,
             data.frame(
-              cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
+              cohortDefinitionId = as.numeric(cohortDefinitionSet$cohortId[i]),
               ruleSequence = as.integer(j - 1),
               name = ruleName,
               description = ruleDescription

diff --git a/R/Export.R b/R/Export.R
@@ -45,6 +45,7 @@
 #'
 #' @param databaseId                  Optional - when specified, the databaseId will be added
 #'                                    to the exported results
+#' @template minCellCount
 #'
 #' @template CohortDefinitionSet
 #'
@@ -61,6 +62,7 @@ exportCohortStatsTables <- function(connectionDetails,
                                     fileNamesInSnakeCase = FALSE,
                                     incremental = FALSE,
                                     databaseId = NULL,
+                                    minCellCount = 5,
                                     cohortDefinitionSet = NULL,
                                     tablePrefix = "") {
   if (is.null(connection)) {
@@ -76,39 +78,62 @@ exportCohortStatsTables <- function(connectionDetails,
   # Internal function to export the stats
   exportStats <- function(data,
                           fileName,
+                          resultsDataModelTableName,
                           tablePrefix) {
     fullFileName <- file.path(cohortStatisticsFolder, paste0(tablePrefix, fileName))
+    primaryKeyColumns <- getPrimaryKey(resultsDataModelTableName)
+    columnsToCensor <- getColumnsToCensor(resultsDataModelTableName)
     rlang::inform(paste0("- Saving data to - ", fullFileName))
+
+    # Make sure the data is censored before saving
+    if (length(columnsToCensor) > 0) {
+      for (i in seq_along(columnsToCensor)) {
+        colName <- ifelse(isTRUE(snakeCaseToCamelCase), yes = columnsToCensor[i], no = SqlRender::camelCaseToSnakeCase(columnsToCensor[i]))
+        data <- data %>%
+          enforceMinCellValue(colName, minCellCount)
+      }
+    }
+
     if (incremental) {
-      if (snakeCaseToCamelCase) {
-        cohortDefinitionIds <- unique(data$cohortDefinitionId)
-        saveIncremental(data, fullFileName, cohortDefinitionId = cohortDefinitionIds)
-      } else {
-        cohortDefinitionIds <- unique(data$cohort_definition_id)
-        saveIncremental(data, fullFileName, cohort_definition_id = cohortDefinitionIds)
+      # Dynamically build the arguments to the saveIncremental
+      # to specify the primary key(s) for the file
+      args <- list(
+        data = data,
+        file = fullFileName
+      )
+      for (i in seq_along(primaryKeyColumns)) {
+        colName <- ifelse(isTRUE(snakeCaseToCamelCase), yes = primaryKeyColumns[i], no = SqlRender::camelCaseToSnakeCase(primaryKeyColumns[i]))
+        args[[colName]] <- data[[colName]]
       }
+      do.call(
+        what = CohortGenerator::saveIncremental,
+        args = args
+      )
     } else {
       .writeCsv(x = data, file = fullFileName)
     }
   }
 
   tablesToExport <- data.frame(
     tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
-    fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
+    fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv"),
+    resultsDataModelTableName = c("cg_cohort_inc_result", "cg_cohort_inc_stats", "cg_cohort_summary_stats", "cg_cohort_censor_stats")
   )
 
   if (is.null(cohortDefinitionSet)) {
     warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
     tablesToExport <- rbind(tablesToExport, data.frame(
       tableName = "cohortInclusionTable",
-      fileName = paste0(tablePrefix, "cohort_inclusion.csv")
+      fileName = "cohort_inclusion.csv",
+      resultsDataModelTableName = "cg_cohort_inclusion"
     ))
   } else {
     inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
     names(inclusionRules) <- SqlRender::camelCaseToSnakeCase(names(inclusionRules))
     exportStats(
       data = inclusionRules,
       fileName = "cohort_inclusion.csv",
+      resultsDataModelTableName = "cg_cohort_inclusion",
       tablePrefix = tablePrefix
     )
   }
@@ -131,6 +156,7 @@ exportCohortStatsTables <- function(connectionDetails,
     exportStats(
       data = cohortStats[[tablesToExport$tableName[i]]],
       fileName = fileName,
+      resultsDataModelTableName = tablesToExport$resultsDataModelTableName[[i]],
       tablePrefix = tablePrefix
     )
   }
@@ -203,3 +229,45 @@ createEmptyResult <- function(tableName) {
   result <- result[FALSE, ]
   return(result)
 }
+
+getPrimaryKey <- function(tableName) {
+  columns <- readCsv(
+    file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
+  ) %>%
+    dplyr::filter(.data$tableName == !!tableName & tolower(.data$primaryKey) == "yes") %>%
+    dplyr::pull(.data$columnName) %>%
+    SqlRender::snakeCaseToCamelCase()
+  return(columns)
+}
+
+getColumnsToCensor <- function(tableName) {
+  columns <- readCsv(
+    file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
+  ) %>%
+    dplyr::filter(.data$tableName == !!tableName & tolower(.data$minCellCount) == "yes") %>%
+    dplyr::pull(.data$columnName) %>%
+    SqlRender::snakeCaseToCamelCase()
+  return(columns)
+}
+
+enforceMinCellValue <- function(data, fieldName, minValues, silent = FALSE) {
+  toCensor <- !is.na(pull(data, fieldName)) & pull(data, fieldName) < minValues & pull(data, fieldName) != 0
+  if (!silent) {
+    percent <- round(100 * sum(toCensor) / nrow(data), 1)
+    message(
+      "    censoring ",
+      sum(toCensor),
+      " values (",
+      percent,
+      "%) from ",
+      fieldName,
+      " because value below minimum"
+    )
+  }
+  if (length(minValues) == 1) {
+    data[toCensor, fieldName] <- -minValues
+  } else {
+    data[toCensor, fieldName] <- -minValues[toCensor]
+  }
+  return(data)
+}
diff --git a/R/RunCohortGeneration.R b/R/RunCohortGeneration.R
@@ -59,6 +59,8 @@
 #' @param databaseId    A unique ID for the database. This will be appended to
 #'                      most tables.
 #'
+#' @template minCellCount
+#'
 #' @param incremental   Create only cohorts that haven't been created before?
 #'
 #' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where
@@ -78,6 +80,7 @@ runCohortGeneration <- function(connectionDetails,
                                 stopOnError = TRUE,
                                 outputFolder,
                                 databaseId = 1,
+                                minCellCount = 5,
                                 incremental = FALSE,
                                 incrementalFolder = NULL) {
   if (is.null(cohortDefinitionSet) && is.null(negativeControlOutcomeCohortSet)) {
@@ -127,6 +130,7 @@ runCohortGeneration <- function(connectionDetails,
     stopOnError = stopOnError,
     outputFolder = outputFolder,
     databaseId = databaseId,
+    minCellCount = minCellCount,
     incremental = incremental,
     incrementalFolder = incrementalFolder
   )
@@ -142,6 +146,7 @@ runCohortGeneration <- function(connectionDetails,
     detectOnDescendants = detectOnDescendants,
     outputFolder = outputFolder,
     databaseId = databaseId,
+    minCellCount = minCellCount,
     incremental = incremental,
     incrementalFolder = incrementalFolder
   )
@@ -164,6 +169,7 @@ generateAndExportCohorts <- function(connection,
                                      stopOnError,
                                      outputFolder,
                                      databaseId,
+                                     minCellCount,
                                      incremental,
                                      incrementalFolder) {
   # Generate the cohorts
@@ -220,6 +226,9 @@ generateAndExportCohorts <- function(connection,
   }
 
   rlang::inform("Saving cohort counts")
+  cohortCounts <- cohortCounts %>%
+    enforceMinCellValue("cohortEntries", minCellCount) %>%
+    enforceMinCellValue("cohortSubjects", minCellCount)
   writeCsv(
     x = cohortCounts,
     file = cohortCountsFileName
@@ -235,6 +244,7 @@ generateAndExportCohorts <- function(connection,
     fileNamesInSnakeCase = TRUE,
     incremental = incremental,
     databaseId = databaseId,
+    minCellCount = minCellCount,
     cohortDefinitionSet = cohortDefinitionSet,
     tablePrefix = "cg_"
   )
@@ -254,6 +264,7 @@ generateAndExportNegativeControls <- function(connection,
                                               detectOnDescendants,
                                               outputFolder,
                                               databaseId,
+                                              minCellCount,
                                               incremental,
                                               incrementalFolder) {
   # Generate any negative controls
@@ -299,6 +310,9 @@ generateAndExportNegativeControls <- function(connection,
   )
 
   rlang::inform("Saving negative control outcome cohort counts")
+  cohortCountsNegativeControlOutcomes <- cohortCountsNegativeControlOutcomes %>%
+    enforceMinCellValue("cohortEntries", minCellCount) %>%
+    enforceMinCellValue("cohortSubjects", minCellCount)
   writeCsv(
     x = cohortCountsNegativeControlOutcomes,
     file = cohortCountsNegativeControlOutcomesFileName

diff --git a/R/Subsets.R b/R/Subsets.R
@@ -420,7 +420,7 @@ createCohortSubset <- function(name = NULL, cohortIds, cohortCombinationOperator
 #' @title Demographic Subset Operator
 #' @description
 #' Operators for subsetting a cohort by demographic criteria
-#' 
+#'
 #' @export
 DemographicSubsetOperator <- R6::R6Class(
   classname = "DemographicSubsetOperator",
@@ -637,9 +637,9 @@ DemographicSubsetOperator <- R6::R6Class(
 #' @param name         Optional char name
 #' @param ageMin       The minimum age
 #' @param ageMax       The maximum age
-#' @param gender       Gender demographics - concepts - 0, 8532, 8507, 0 "male", "female".
-#'                     Any string that is not (case insensitive) "male" or "female" is converted to gender concept 0
-#'                     https://www.ohdsi.org/web/wiki/doku.php?id=documentation:vocabulary:gender
+#' @param gender       Gender demographics - concepts - 0, 8532, 8507, 0, "female", "male".
+#'                     Any string that is not "male" or "female" (case insensitive) is converted to gender concept 0.
+#'                     https://athena.ohdsi.org/search-terms/terms?standardConcept=Standard&domain=Gender&page=1&pageSize=15&query=
 #'                     Specific concept ids not in this set can be used but are not explicitly validated
 #' @param race         Race demographics - concept ID list
 #' @param ethnicity    Ethnicity demographics - concept ID list
@@ -838,8 +838,8 @@ LimitSubsetOperator <- R6::R6Class(
 #' Subset cohorts using specified limit criteria
 #' @export
 #' @param name              Name of operation
-#' @param priorTime                 Required prior observation window
-#' @param followUpTime              Required post observation window
+#' @param priorTime         Required prior observation window (specified as a positive integer)
+#' @param followUpTime      Required post observation window (specified as a positive integer)
 #' @param limitTo           character one of:
 #'                              "firstEver" - only first entry in patient history
 #'                              "earliestRemaining" - only first entry after washout set by priorTime

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -1,5 +1,8 @@
+url: https://ohdsi.github.io/CohortGenerator/
+
 template:
-  params:
+  bootstrap: 5
+  bslib:
     bootswatch: cosmo
 
 home:
@@ -129,6 +132,7 @@ reference:
       - sampleCohortDefinitionSet
 
 navbar:
+  bg: dark
   structure:
     right: [hades, github]
   components: