Skip to content

Commit

Permalink
Release v0.11
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonysena authored Sep 1, 2024
2 parents dc3bde3 + 922eb21 commit 78c7463
Show file tree
Hide file tree
Showing 265 changed files with 25,816 additions and 5,666 deletions.
9 changes: 2 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: CohortGenerator
Type: Package
Title: An R Package for Cohort Generation Against the OMOP CDM
Version: 0.10.0
Date: 2024-07-14
Version: 0.11.0
Date: 2024-08-31
Authors@R: c(
person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
person("Jamie", "Gilbert", role = c("aut")),
Expand All @@ -17,7 +17,6 @@ Depends:
R (>= 3.6.0),
R6
Imports:
bit64,
checkmate,
digest,
dplyr,
Expand All @@ -37,13 +36,9 @@ Suggests:
Eunomia,
knitr,
rmarkdown,
ROhdsiWebApi,
testthat,
withr,
zip
Remotes:
ohdsi/ResultModelManager,
ohdsi/ROhdsiWebApi,
License: Apache License
VignetteBuilder: knitr
URL: https://ohdsi.github.io/CohortGenerator/, https://github.com/OHDSI/CohortGenerator
Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
CohortGenerator 0.11.0
=======================

New Features

- Add support for minimum cell count (#176)

Bug Fixes

- Multiple calls to export stats causing duplicates in cohort inclusion file (#179)
- Updates to subset documentation (#180, #181)
- Negative control outcome generation bug (#177)

CohortGenerator 0.10.0
=======================

New Features

- Add `runCohortGeneration` function (Issue #165)
- Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162)
- Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145)
- Unit tests now running on all OHDSI DB Platforms (#151)

Bug Fixes

- Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167)
- Allow integer as cohort ID (#146)
- Use native messaging functions for output vs. ParallelLogger (#97)
Expand Down
4 changes: 2 additions & 2 deletions R/CohortStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
# NOTE: This data frame must match the @cohort_inclusion_table
# structure as defined in inst/sql/sql_server/CreateCohortTables.sql
inclusionRules <- data.frame(
cohortDefinitionId = bit64::integer64(),
cohortDefinitionId = numeric(),
ruleSequence = integer(),
name = character(),
description = character()
Expand All @@ -251,7 +251,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
inclusionRules <- rbind(
inclusionRules,
data.frame(
cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
cohortDefinitionId = as.numeric(cohortDefinitionSet$cohortId[i]),
ruleSequence = as.integer(j - 1),
name = ruleName,
description = ruleDescription
Expand Down
84 changes: 76 additions & 8 deletions R/Export.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#'
#' @param databaseId Optional - when specified, the databaseId will be added
#' to the exported results
#' @template minCellCount
#'
#' @template CohortDefinitionSet
#'
Expand All @@ -61,6 +62,7 @@ exportCohortStatsTables <- function(connectionDetails,
fileNamesInSnakeCase = FALSE,
incremental = FALSE,
databaseId = NULL,
minCellCount = 5,
cohortDefinitionSet = NULL,
tablePrefix = "") {
if (is.null(connection)) {
Expand All @@ -76,39 +78,62 @@ exportCohortStatsTables <- function(connectionDetails,
# Internal function to export the stats
exportStats <- function(data,
fileName,
resultsDataModelTableName,
tablePrefix) {
fullFileName <- file.path(cohortStatisticsFolder, paste0(tablePrefix, fileName))
primaryKeyColumns <- getPrimaryKey(resultsDataModelTableName)
columnsToCensor <- getColumnsToCensor(resultsDataModelTableName)
rlang::inform(paste0("- Saving data to - ", fullFileName))

# Make sure the data is censored before saving
if (length(columnsToCensor) > 0) {
for (i in seq_along(columnsToCensor)) {
colName <- ifelse(isTRUE(snakeCaseToCamelCase), yes = columnsToCensor[i], no = SqlRender::camelCaseToSnakeCase(columnsToCensor[i]))
data <- data %>%
enforceMinCellValue(colName, minCellCount)
}
}

if (incremental) {
if (snakeCaseToCamelCase) {
cohortDefinitionIds <- unique(data$cohortDefinitionId)
saveIncremental(data, fullFileName, cohortDefinitionId = cohortDefinitionIds)
} else {
cohortDefinitionIds <- unique(data$cohort_definition_id)
saveIncremental(data, fullFileName, cohort_definition_id = cohortDefinitionIds)
# Dynamically build the arguments to the saveIncremental
# to specify the primary key(s) for the file
args <- list(
data = data,
file = fullFileName
)
for (i in seq_along(primaryKeyColumns)) {
colName <- ifelse(isTRUE(snakeCaseToCamelCase), yes = primaryKeyColumns[i], no = SqlRender::camelCaseToSnakeCase(primaryKeyColumns[i]))
args[[colName]] <- data[[colName]]
}
do.call(
what = CohortGenerator::saveIncremental,
args = args
)
} else {
.writeCsv(x = data, file = fullFileName)
}
}

tablesToExport <- data.frame(
tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv"),
resultsDataModelTableName = c("cg_cohort_inc_result", "cg_cohort_inc_stats", "cg_cohort_summary_stats", "cg_cohort_censor_stats")
)

if (is.null(cohortDefinitionSet)) {
warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = "cohortInclusionTable",
fileName = paste0(tablePrefix, "cohort_inclusion.csv")
fileName = "cohort_inclusion.csv",
resultsDataModelTableName = "cg_cohort_inclusion"
))
} else {
inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
names(inclusionRules) <- SqlRender::camelCaseToSnakeCase(names(inclusionRules))
exportStats(
data = inclusionRules,
fileName = "cohort_inclusion.csv",
resultsDataModelTableName = "cg_cohort_inclusion",
tablePrefix = tablePrefix
)
}
Expand All @@ -131,6 +156,7 @@ exportCohortStatsTables <- function(connectionDetails,
exportStats(
data = cohortStats[[tablesToExport$tableName[i]]],
fileName = fileName,
resultsDataModelTableName = tablesToExport$resultsDataModelTableName[[i]],
tablePrefix = tablePrefix
)
}
Expand Down Expand Up @@ -203,3 +229,45 @@ createEmptyResult <- function(tableName) {
result <- result[FALSE, ]
return(result)
}

getPrimaryKey <- function(tableName) {
columns <- readCsv(
file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
) %>%
dplyr::filter(.data$tableName == !!tableName & tolower(.data$primaryKey) == "yes") %>%
dplyr::pull(.data$columnName) %>%
SqlRender::snakeCaseToCamelCase()
return(columns)
}

getColumnsToCensor <- function(tableName) {
columns <- readCsv(
file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
) %>%
dplyr::filter(.data$tableName == !!tableName & tolower(.data$minCellCount) == "yes") %>%
dplyr::pull(.data$columnName) %>%
SqlRender::snakeCaseToCamelCase()
return(columns)
}

enforceMinCellValue <- function(data, fieldName, minValues, silent = FALSE) {
toCensor <- !is.na(pull(data, fieldName)) & pull(data, fieldName) < minValues & pull(data, fieldName) != 0
if (!silent) {
percent <- round(100 * sum(toCensor) / nrow(data), 1)
message(
" censoring ",
sum(toCensor),
" values (",
percent,
"%) from ",
fieldName,
" because value below minimum"
)
}
if (length(minValues) == 1) {
data[toCensor, fieldName] <- -minValues
} else {
data[toCensor, fieldName] <- -minValues[toCensor]
}
return(data)
}
14 changes: 14 additions & 0 deletions R/RunCohortGeneration.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@
#' @param databaseId A unique ID for the database. This will be appended to
#' most tables.
#'
#' @template minCellCount
#'
#' @param incremental Create only cohorts that haven't been created before?
#'
#' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where
Expand All @@ -78,6 +80,7 @@ runCohortGeneration <- function(connectionDetails,
stopOnError = TRUE,
outputFolder,
databaseId = 1,
minCellCount = 5,
incremental = FALSE,
incrementalFolder = NULL) {
if (is.null(cohortDefinitionSet) && is.null(negativeControlOutcomeCohortSet)) {
Expand Down Expand Up @@ -127,6 +130,7 @@ runCohortGeneration <- function(connectionDetails,
stopOnError = stopOnError,
outputFolder = outputFolder,
databaseId = databaseId,
minCellCount = minCellCount,
incremental = incremental,
incrementalFolder = incrementalFolder
)
Expand All @@ -142,6 +146,7 @@ runCohortGeneration <- function(connectionDetails,
detectOnDescendants = detectOnDescendants,
outputFolder = outputFolder,
databaseId = databaseId,
minCellCount = minCellCount,
incremental = incremental,
incrementalFolder = incrementalFolder
)
Expand All @@ -164,6 +169,7 @@ generateAndExportCohorts <- function(connection,
stopOnError,
outputFolder,
databaseId,
minCellCount,
incremental,
incrementalFolder) {
# Generate the cohorts
Expand Down Expand Up @@ -220,6 +226,9 @@ generateAndExportCohorts <- function(connection,
}

rlang::inform("Saving cohort counts")
cohortCounts <- cohortCounts %>%
enforceMinCellValue("cohortEntries", minCellCount) %>%
enforceMinCellValue("cohortSubjects", minCellCount)
writeCsv(
x = cohortCounts,
file = cohortCountsFileName
Expand All @@ -235,6 +244,7 @@ generateAndExportCohorts <- function(connection,
fileNamesInSnakeCase = TRUE,
incremental = incremental,
databaseId = databaseId,
minCellCount = minCellCount,
cohortDefinitionSet = cohortDefinitionSet,
tablePrefix = "cg_"
)
Expand All @@ -254,6 +264,7 @@ generateAndExportNegativeControls <- function(connection,
detectOnDescendants,
outputFolder,
databaseId,
minCellCount,
incremental,
incrementalFolder) {
# Generate any negative controls
Expand Down Expand Up @@ -299,6 +310,9 @@ generateAndExportNegativeControls <- function(connection,
)

rlang::inform("Saving negative control outcome cohort counts")
cohortCountsNegativeControlOutcomes <- cohortCountsNegativeControlOutcomes %>%
enforceMinCellValue("cohortEntries", minCellCount) %>%
enforceMinCellValue("cohortSubjects", minCellCount)
writeCsv(
x = cohortCountsNegativeControlOutcomes,
file = cohortCountsNegativeControlOutcomesFileName
Expand Down
12 changes: 6 additions & 6 deletions R/Subsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ createCohortSubset <- function(name = NULL, cohortIds, cohortCombinationOperator
#' @title Demographic Subset Operator
#' @description
#' Operators for subsetting a cohort by demographic criteria
#'
#'
#' @export
DemographicSubsetOperator <- R6::R6Class(
classname = "DemographicSubsetOperator",
Expand Down Expand Up @@ -637,9 +637,9 @@ DemographicSubsetOperator <- R6::R6Class(
#' @param name Optional char name
#' @param ageMin The minimum age
#' @param ageMax The maximum age
#' @param gender Gender demographics - concepts - 0, 8532, 8507, 0 "male", "female".
#' Any string that is not (case insensitive) "male" or "female" is converted to gender concept 0
#' https://www.ohdsi.org/web/wiki/doku.php?id=documentation:vocabulary:gender
#' @param gender Gender demographics - concepts - 0, 8532, 8507, 0, "female", "male".
#' Any string that is not "male" or "female" (case insensitive) is converted to gender concept 0.
#' https://athena.ohdsi.org/search-terms/terms?standardConcept=Standard&domain=Gender&page=1&pageSize=15&query=
#' Specific concept ids not in this set can be used but are not explicitly validated
#' @param race Race demographics - concept ID list
#' @param ethnicity Ethnicity demographics - concept ID list
Expand Down Expand Up @@ -838,8 +838,8 @@ LimitSubsetOperator <- R6::R6Class(
#' Subset cohorts using specified limit criteria
#' @export
#' @param name Name of operation
#' @param priorTime Required prior observation window
#' @param followUpTime Required post observation window
#' @param priorTime Required prior observation window (specified as a positive integer)
#' @param followUpTime Required post observation window (specified as a positive integer)
#' @param limitTo character one of:
#' "firstEver" - only first entry in patient history
#' "earliestRemaining" - only first entry after washout set by priorTime
Expand Down
6 changes: 5 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
url: https://ohdsi.github.io/CohortGenerator/

template:
params:
bootstrap: 5
bslib:
bootswatch: cosmo

home:
Expand Down Expand Up @@ -129,6 +132,7 @@ reference:
- sampleCohortDefinitionSet

navbar:
bg: dark
structure:
right: [hades, github]
components:
Expand Down
Loading

0 comments on commit 78c7463

Please sign in to comment.