Merge pull request #333 from OHDSI/cran_0.3

prepare 0.3 release
OHDSI · Sep 25, 2024 · 78ff92a · 78ff92a
2 parents d68f826 + d11abaa
commit 78ff92a
Show file tree

Hide file tree

Showing 11 changed files with 65 additions and 36 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^docs$
 ^codecov\.yml$
 ^cran-comments\.md$
+^data-raw
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: CohortConstructor
 Title: Build and Manipulate Study Cohorts Using a Common Data Model
-Version: 0.2.2
+Version: 0.3.0
 Authors@R: c(
     person("Edward", "Burn", , "[email protected]", 
     role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9286-1128")),
@@ -17,7 +17,9 @@ Authors@R: c(
     person("Xihang", "Chen", , "[email protected]", 
     role = c("aut"), comment = c(ORCID = "0009-0001-8112-8959")),
     person("Kim", "Lopez", , "[email protected]", 
-    role = "aut", comment = c(ORCID = "0000-0002-8462-8668")))
+    role = "aut", comment = c(ORCID = "0000-0002-8462-8668")),
+    person("Elin", "Rowlands", , "[email protected]", 
+    role = "aut", comment = c(ORCID = "0009-0005-5166-0417")))
 Description: Create and manipulate study cohorts in data mapped to the 
     Observational Medical Outcomes Partnership Common Data Model.
 License: Apache License (>= 2)
@@ -67,5 +69,4 @@ VignetteBuilder: knitr
 Depends: 
     R (>= 4.1)
 URL: https://ohdsi.github.io/CohortConstructor/
-Remotes:
-    darwin-eu/visOmopResults
+LazyData: true
diff --git a/R/data.R b/R/data.R
@@ -0,0 +1,4 @@
+#' Benchmarking results
+#'
+#' @format A list of results from benchmarking
+"benchmarkData"
diff --git a/README.md b/README.md
@@ -117,15 +117,15 @@ cohort_count(cdm$fractures) %>% glimpse()
 #> $ number_records       <int> 464, 569, 138
 #> $ number_subjects      <int> 427, 510, 132
 attrition(cdm$fractures) %>% glimpse()
-#> Rows: 9
+#> Rows: 3
 #> Columns: 7
-#> $ cohort_definition_id <int> 1, 1, 1, 2, 2, 2, 3, 3, 3
-#> $ number_records       <int> 464, 464, 464, 569, 569, 569, 138, 138, 138
-#> $ number_subjects      <int> 427, 427, 427, 510, 510, 510, 132, 132, 132
-#> $ reason_id            <int> 1, 2, 3, 1, 2, 3, 1, 2, 3
-#> $ reason               <chr> "Initial qualifying events", "cohort requirements…
-#> $ excluded_records     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0
-#> $ excluded_subjects    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0
+#> $ cohort_definition_id <int> 1, 2, 3
+#> $ number_records       <int> 464, 569, 138
+#> $ number_subjects      <int> 427, 510, 132
+#> $ reason_id            <int> 1, 1, 1
+#> $ reason               <chr> "Initial qualifying events", "Initial qualifying …
+#> $ excluded_records     <int> 0, 0, 0
+#> $ excluded_subjects    <int> 0, 0, 0
 ```
 
 ### Create an overall fracture cohort
@@ -219,7 +219,7 @@ attrition(cdm$fractures) %>%
 #> $ cohort_definition_id <int> 1, 2, 3, 4
 #> $ number_records       <int> 43, 64, 22, 129
 #> $ number_subjects      <int> 43, 62, 22, 122
-#> $ reason_id            <int> 6, 6, 6, 4
+#> $ reason_id            <int> 4, 4, 4, 4
 #> $ reason               <chr> "Age requirement: 40 to 65", "Age requirement: 40…
 #> $ excluded_records     <int> 65, 88, 40, 193
 #> $ excluded_subjects    <int> 61, 81, 38, 165
@@ -232,7 +232,7 @@ attrition(cdm$fractures) %>%
 #> $ cohort_definition_id <int> 1, 2, 3, 4
 #> $ number_records       <int> 19, 37, 12, 68
 #> $ number_subjects      <int> 19, 36, 12, 65
-#> $ reason_id            <int> 7, 7, 7, 5
+#> $ reason_id            <int> 5, 5, 5, 5
 #> $ reason               <chr> "Sex requirement: Female", "Sex requirement: Fema…
 #> $ excluded_records     <int> 24, 27, 10, 61
 #> $ excluded_subjects    <int> 24, 26, 10, 57
@@ -265,7 +265,7 @@ attrition(cdm$fractures) %>%
 #> $ cohort_definition_id <int> 1, 2, 3, 4
 #> $ number_records       <int> 14, 30, 10, 54
 #> $ number_subjects      <int> 14, 30, 10, 52
-#> $ reason_id            <int> 10, 10, 10, 8
+#> $ reason_id            <int> 8, 8, 8, 8
 #> $ reason               <chr> "Not in cohort gibleed between -Inf & 0 days rela…
 #> $ excluded_records     <int> 5, 7, 2, 14
 #> $ excluded_subjects    <int> 5, 6, 2, 13

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -42,3 +42,6 @@ reference:
 - subtitle: Mock data
 - contents:
   - matches("mockCohortConstructor")
+- subtitle: Results from benchmarking
+- contents:
+  - matches("benchmarkData")
diff --git a/data-raw/getBenchmarkResults.R b/data-raw/getBenchmarkResults.R
@@ -192,3 +192,6 @@ benchmarkData$comparison <- benchmarkDataPre$comparison |>
 
 ### sql indexes
 benchmarkData$sql_indexes <- benchmarkDataPre$sql_indexes
+
+
+usethis::use_data(benchmarkData, internal = FALSE, overwrite = TRUE)
diff --git a/data-raw/internalData.R b/data-raw/internalData.R
@@ -1,3 +1,3 @@
 source(here::here("data-raw", "domainsData.R"))
-source(here::here("data-raw", "getBenchmarkResults.R"))
-usethis::use_data(domainsData, benchmarkData, internal = TRUE, overwrite = TRUE)
+usethis::use_data(domainsData, internal = TRUE, overwrite = TRUE)
+
diff --git a/data/benchmarkData.rda b/data/benchmarkData.rda
diff --git a/man/CohortConstructor-package.Rd b/man/CohortConstructor-package.Rd
diff --git a/man/benchmarkData.Rd b/man/benchmarkData.Rd
diff --git a/vignettes/a11_benchmark.Rmd b/vignettes/a11_benchmark.Rmd
@@ -87,7 +87,7 @@ The benchmark script was executed against the following four databases:
 The table below presents the number of records in the OMOP tables used in the benchmark script for each of the participating databases.
 
 ```{r}
-CohortConstructor:::benchmarkData$omop |>
+benchmarkData$omop |>
   visOmopResults::formatTable() |>
   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
             locations = cells_column_labels()) |>
@@ -108,7 +108,7 @@ The COVID-19 cohort was used to evaluate the performance of common cohort strati
 The following table displays the number of records and subjects for each cohort across the participating databases:
 
 ```{r}
-CohortConstructor:::benchmarkData$details |>
+benchmarkData$details |>
   visOmopResults::formatTable(groupColumn = "cdm_name") |>
   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
             locations = cells_column_labels()) |>
@@ -119,18 +119,18 @@ CohortConstructor:::benchmarkData$details |>
 We also computed the overlap between patients in CIRCE and CohortConstructor cohorts, with results shown in the plot below:
 
 ```{r, fig.width=10, fig.height=7}
-CohortConstructor:::benchmarkData$comparison |>
-  plotCohortOverlap() +
-  scale_y_discrete(labels = niceOverlapLabels) +
-  theme(
-    legend.text = element_text(size = 10),
-    strip.text = element_text(size = 14),
-    axis.text.x = element_text(size = 12),  
-    axis.title.x = element_text(size = 14),  
-    axis.title.y = element_text(size = 14)   
-  ) +
-  facet_wrap("cdm_name") +
-  scale_fill_discrete(labels = c("CIRCE", "Both", "CohortConstructor"))
+# benchmarkData$comparison |>
+#   plotCohortOverlap() +
+#   scale_y_discrete(labels = niceOverlapLabels) +
+#   theme(
+#     legend.text = element_text(size = 10),
+#     strip.text = element_text(size = 14),
+#     axis.text.x = element_text(size = 12),  
+#     axis.title.x = element_text(size = 14),  
+#     axis.title.y = element_text(size = 14)   
+#   ) +
+#   facet_wrap("cdm_name") +
+#   scale_fill_discrete(labels = c("CIRCE", "Both", "CohortConstructor"))
 ```
 
 # Performance
@@ -152,7 +152,7 @@ The following plot shows the times taken to create each cohort using CIRCE and C
 ## TABLE with same results as the plot below.
 
 # header_prefix <- "[header]Time by database (minutes)\n[header_level]"
-# CohortConstructor:::benchmarkData$time |>
+# benchmarkData$time |>
 #   distinct() |>
 #   filter(!grepl("male|set", msg)) |>
 #   mutate(
@@ -185,7 +185,7 @@ The following plot shows the times taken to create each cohort using CIRCE and C
 
 ```{r, fig.width=10, fig.height=7}
 
-CohortConstructor:::benchmarkData$time_definition |>
+benchmarkData$time_definition |>
   ggplot(aes(y = `Cohort name`, x = time, colour = Tool, fill = Tool)) +
   geom_col(position = "dodge", width = 0.6) +
   xlab("Time (minutes)") +
@@ -209,7 +209,7 @@ The table below depicts the total time it took to create the nine cohorts when u
 
 ```{r}
 header_prefix <- "[header]Time by tool (minutes)\n[header_level]"
-CohortConstructor:::benchmarkData$time_domain |>
+benchmarkData$time_domain |>
   gtTable(colsToMergeRows = "all_columns") |>
   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")), 
             locations = cells_column_labels()) |>
@@ -222,7 +222,7 @@ CohortConstructor:::benchmarkData$time_domain |>
 Cohorts are often stratified in studies. With Atlas cohort definitions, each stratum requires a new CIRCE JSON to be instantiated, while CohortConstructor allows stratifications to be generated from an overall cohort. The following table shows the time taken to create age and sex stratifications for the COVID-19 cohort with both CIRCE and CohortConstructor.
 
 ```{r}
-CohortConstructor:::benchmarkData$time_strata |>
+benchmarkData$time_strata |>
   gtTable(colsToMergeRows = "all_columns") |>
   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")), 
             locations = cells_column_labels()) |>
@@ -247,7 +247,7 @@ Four calls were made to `conceptCohort`, each involving a different number of OM
 The plot below shows the computation time with and without SQL indexes for each scenario:
 
 ```{r, fig.width=10, fig.height=7}
-CohortConstructor:::benchmarkData$sql_indexes |>
+benchmarkData$sql_indexes |>
   distinct() |>
   group_by(cdm_name, msg) |>
   summarise(time = sum(as.numeric(toc) - as.numeric(tic))/60, .groups = "drop") |>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -192,3 +192,6 @@ benchmarkData$comparison <- benchmarkDataPre$comparison \|>

		### sql indexes
		benchmarkData$sql_indexes <- benchmarkDataPre$sql_indexes


		usethis::use_data(benchmarkData, internal = FALSE, overwrite = TRUE)