Merge pull request #336 from OHDSI/v0.3.1

v0.3.1
OHDSI · Oct 8, 2024 · ac32f5c · ac32f5c
2 parents a637b78 + 6f27230
commit ac32f5c
Show file tree

Hide file tree

Showing 8 changed files with 60 additions and 38 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: CohortConstructor
 Title: Build and Manipulate Study Cohorts Using a Common Data Model
-Version: 0.3.0.900
+Version: 0.3.1
 Authors@R: c(
     person("Edward", "Burn", , "[email protected]", 
     role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9286-1128")),

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/cran-comments.md b/cran-comments.md
@@ -1,3 +1,3 @@
 ## R CMD check results
 
-This is a new release. 
+This is a patch releasing, fixing a small issue from the previous release.
diff --git a/data-raw/data/Results_OHDSI postgres_20241007.zip b/data-raw/data/Results_OHDSI postgres_20241007.zip
diff --git a/data-raw/getBenchmarkResults.R b/data-raw/getBenchmarkResults.R
@@ -66,6 +66,22 @@ updateCDMname <- function(resultList, old, new) {
   resultList
 }
 
+updateResultType <- function(resultList, old, new) {
+  caseWhen <- "dplyr::case_when("
+  for (k in 1:length(old)) {
+    caseWhen <- glue::glue("{caseWhen} .data$result_type == '{old[k]}' ~ '{new[k]}', ")
+  }
+  caseWhen <- paste0(caseWhen, ".default = .data$result_type)") |>
+    rlang::parse_exprs() |> rlang::set_names("result_type")
+  for (res in names(resultList)) {
+    if ("summarised_result" %in% class(resultList[[res]])) {
+      attr(resultList[[res]], "settings") <- settings(resultList[[res]]) |>
+        dplyr::mutate(cdm_name = !!!caseWhen)
+    }
+  }
+  resultList
+}
+
 # Functions
 niceNum <- function(x, dec = 0) {
   trimws(format(round(as.numeric(x), dec), big.mark = ",", nsmall = dec, scientific = FALSE))
@@ -75,7 +91,8 @@ niceNum <- function(x, dec = 0) {
 resultPatterns <- c("time", "comparison", "details", "omop", "index_counts", "sql_indexes")
 benchmarkDataPre <- readData(here::here("data-raw", "data")) |>
   mergeData(resultPatterns) |>
-  updateCDMname(old = "AurumCDM_202403", new = "CPRD Aurum")
+  updateCDMname(old = "AurumCDM_202403", new = "CPRD Aurum") |>
+  updateResultType(old = "cohort_overlap", new = "summarise_cohort_overlap")
 benchmarkData <- list()
 
 ### omop
@@ -96,8 +113,10 @@ benchmarkData$omop <- benchmarkDataPre$omop |>
 ### details
 benchmarkData$details <- benchmarkDataPre$details |>
   filterSettings(result_type == "cohort_count") |>
+  formatEstimateValue() |>
   splitAll() |>
-  pivotEstimates() |>
+  select(!"estimate_type") |>
+  pivot_wider(names_from = "estimate_name", values_from = "estimate_value") |>
   select(-variable_level, - result_id) |>
   distinct() |>
   filter(grepl("cc_|atlas_", cohort_name)) |>
@@ -183,7 +202,7 @@ benchmarkData$time_strata <- benchmarkDataPre$time |>
 
 ### time comparison
 benchmarkData$comparison <- benchmarkDataPre$comparison |>
-  filterSettings(result_type == "cohort_overlap") |>
+  filterSettings(result_type == "summarise_cohort_overlap") |>
   splitGroup() |>
   filter(grepl("atlas_", cohort_name_comparator) & grepl("cc_", cohort_name_reference)) |>
   filter(gsub("atlas_", "", cohort_name_comparator) == gsub("cc_", "", cohort_name_reference)) |>

diff --git a/data/benchmarkData.rda b/data/benchmarkData.rda
diff --git a/man/CohortConstructor-package.Rd b/man/CohortConstructor-package.Rd
diff --git a/vignettes/a11_benchmark.Rmd b/vignettes/a11_benchmark.Rmd
@@ -9,12 +9,12 @@ vignette: >
 
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
-  collapse = TRUE,
-  eval = TRUE, 
-  warning = FALSE, 
-  message = FALSE,
-  comment = "#>",
-  echo = FALSE
+collapse = TRUE,
+eval = TRUE, 
+warning = FALSE, 
+message = FALSE,
+comment = "#>",
+echo = FALSE
 )
 ```
 
@@ -33,30 +33,32 @@ library(CohortConstructor)
 
 niceOverlapLabels <- function(labels) {
   new_labels <- gsub("_", " ", gsub(" and.*|cc_", "", labels))
-  tibble("Cohort name" = new_labels) |>
-    mutate(
-      "Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", new_labels))),
-      "Cohort name" = case_when(
-        grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
-        grepl("Covid", .data[["Cohort name"]]) ~ gsub("Covid|Covid", "COVID-19", `Cohort name`),
-        grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
-        grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
-        grepl("First", .data[["Cohort name"]]) ~ "First major depression",
-        grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
-        grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
-        .default = .data[["Cohort name"]]
-      ),
-      "Cohort name" = if_else(
-        grepl("COVID", .data[["Cohort name"]]),
-        gsub(" female", ": female", gsub(" male", ": male", .data[["Cohort name"]])),
-        .data[["Cohort name"]]
-      ),
-      "Cohort name" = if_else(
-        grepl(" to ", .data[["Cohort name"]]),
-        gsub("male ", "male, ", .data[["Cohort name"]]),
-        .data[["Cohort name"]]
+  return(
+    tibble("Cohort name" = new_labels) |>
+      mutate(
+        "Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", new_labels))),
+        "Cohort name" = case_when(
+          grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
+          grepl("Covid", .data[["Cohort name"]]) ~ gsub("Covid|Covid", "COVID-19", `Cohort name`),
+          grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
+          grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
+          grepl("First", .data[["Cohort name"]]) ~ "First major depression",
+          grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
+          grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
+          .default = .data[["Cohort name"]]
+        ),
+        "Cohort name" = if_else(
+          grepl("COVID", .data[["Cohort name"]]),
+          gsub(" female", ": female", gsub(" male", ": male", .data[["Cohort name"]])),
+          .data[["Cohort name"]]
+        ),
+        "Cohort name" = if_else(
+          grepl(" to ", .data[["Cohort name"]]),
+          gsub("male ", "male, ", .data[["Cohort name"]]),
+          .data[["Cohort name"]]
+        )
       )
-    )
+  )
 }
 ```
 
@@ -120,7 +122,7 @@ We also computed the overlap between patients in CIRCE and CohortConstructor coh
 
 ```{r, fig.width=10, fig.height=7}
 benchmarkData$comparison |>
-  plotCohortOverlap() +
+  plotCohortOverlap(uniqueCombinations = FALSE, facet = "cdm_name") +
   scale_y_discrete(labels = niceOverlapLabels) +
   theme(
     legend.text = element_text(size = 10),
@@ -129,8 +131,9 @@ benchmarkData$comparison |>
     axis.title.x = element_text(size = 14),
     axis.title.y = element_text(size = 14)
   ) +
-  facet_wrap("cdm_name") +
-  scale_fill_discrete(labels = c("CIRCE", "Both", "CohortConstructor"))
+  # facet_wrap("cdm_name") +
+  scale_fill_discrete(labels = c("Both", "CIRCE", "CohortConstructor")) +
+  scale_color_discrete(labels = c("Both", "CIRCE", "CohortConstructor"))
 ```
 
 # Performance