From 12c195c89ad631637c75740c6dd6ed0d1bd331af Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Tue, 12 Nov 2024 17:52:04 +0000 Subject: [PATCH 1/8] Add duckdb --- .github/workflows/test-coverage.yaml | 4 +- DESCRIPTION | 24 +- NAMESPACE | 2 + R/checks.R | 28 +- R/mockOmopSketch.R | 3 + R/plotConceptCounts.R | 62 +- R/plotInObservation.R | 52 +- R/plotObservationPeriod.R | 14 +- R/plotRecordCount.R | 51 +- R/summariseAllConceptCounts.R | 183 ----- R/summariseClinicalRecords.R | 386 ++++----- R/summariseConceptCounts.R | 488 ++++++++---- R/summariseInObservation.R | 273 +++---- R/summariseMissingData.R | 156 ---- R/summariseObservationPeriod.R | 23 +- R/summariseOmopSnapshot.R | 2 +- R/summarisePopulationCharacteristics.R | 82 ++ R/summariseRecordCount.R | 153 ++-- R/tablePopulationCharacteristics.R | 57 ++ R/utilities.R | 9 +- _pkgdown.yml | 5 + man/OmopSketch-package.Rd | 1 - man/mockOmopSketch.Rd | 3 + man/plotConceptCounts.Rd | 6 +- man/plotInObservation.Rd | 2 +- man/summariseConceptCounts.Rd | 10 +- man/summariseInObservation.Rd | 12 +- man/summarisePopulationCharacteristics.Rd | 48 ++ man/summariseRecordCount.Rd | 11 +- man/tablePopulationCharacteristics.Rd | 37 + tests/testthat/test-plotInObservation.R | 8 +- .../testthat/test-summariseAllConceptCounts.R | 59 -- .../testthat/test-summariseClinicalRecords.R | 19 +- tests/testthat/test-summariseConceptCounts.R | 471 +++++------ tests/testthat/test-summariseInObservation.R | 124 ++- tests/testthat/test-summariseMissingData.R | 52 -- .../test-summariseObservationPeriod.R | 746 +++++++++--------- .../test-summarisePopulationCharacteristics.R | 229 ++++++ tests/testthat/test-summariseRecordCount.R | 141 ++-- .../A-summarise_clinical_tables_records.Rmd | 15 +- vignettes/B-summarise_concept_counts.Rmd | 13 +- vignettes/C-summarise_pop_characteristics.Rmd | 94 +++ 42 files changed, 2095 insertions(+), 2063 deletions(-) delete mode 100644 R/summariseAllConceptCounts.R delete mode 100644 R/summariseMissingData.R create mode 100644 R/summarisePopulationCharacteristics.R create mode 100644 R/tablePopulationCharacteristics.R create mode 100644 man/summarisePopulationCharacteristics.Rd create mode 100644 man/tablePopulationCharacteristics.Rd delete mode 100644 tests/testthat/test-summariseAllConceptCounts.R delete mode 100644 tests/testthat/test-summariseMissingData.R create mode 100644 tests/testthat/test-summarisePopulationCharacteristics.R create mode 100644 vignettes/C-summarise_pop_characteristics.Rmd diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 6602e653..98822609 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -13,6 +13,8 @@ permissions: read-all jobs: test-coverage: runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 @@ -42,7 +44,7 @@ jobs: file: ./cobertura.xml plugin: noop disable_search: true - token: 8e3843c1-7a05-4c43-89ac-ddc8f0edff7d + token: ${{ secrets.CODECOV_TOKEN }} - name: Show testthat output if: always() diff --git a/DESCRIPTION b/DESCRIPTION index 9699fbce..5f459c8c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: OmopSketch Title: Characterise Tables of an OMOP Common Data Model Instance -Version: 0.1.1 +Version: 0.1.2 Authors@R: c( person( "Marta", "Alcalde-Herraiz", @@ -15,10 +15,6 @@ Authors@R: c( "Elin", "Rowlands", email = "elin.rowlands@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0009-0005-5166-0417") ), - person( - "Cecilia", "Campanile", email = "cecilia.campanile@ndorms.ox.ac.uk", - role = c("aut"), comment = c(ORCID = "0009-0007-6629-4661") - ), person( "Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut"), comment = c(ORCID = "0000-0002-9286-1128") @@ -39,9 +35,6 @@ RoxygenNote: 7.3.2 Suggests: CodelistGenerator, DBI, - duckdb, - flextable, - gt, here, knitr, odbc, @@ -49,30 +42,31 @@ Suggests: rmarkdown, RPostgres, testthat (>= 3.0.0), - withr, - omock (>= 0.3.0), - covr + withr Config/testthat/edition: 3 Config/testthat/parallel: true Imports: CDMConnector (>= 1.3.0), cli, clock, + gt, + flextable, + CohortCharacteristics (>= 0.3.0), CohortConstructor (>= 0.3.1), dplyr, ggplot2, + omock (>= 0.3.0), omopgenerics (>= 0.3.1), - PatientProfiles (>= 1.2.1), + PatientProfiles (>= 1.2.0), purrr, rlang, stringr, tibble, tidyr, - visOmopResults (>= 0.4.0) + visOmopResults (>= 0.4.0), + duckdb Depends: R (>= 2.10) URL: https://OHDSI.github.io/OmopSketch/ BugReports: https://github.com/OHDSI/OmopSketch/issues VignetteBuilder: knitr -Remotes: - darwin-eu-dev/omopgenerics diff --git a/NAMESPACE b/NAMESPACE index 7ac37804..072eb118 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,11 +14,13 @@ export(summariseConceptCounts) export(summariseInObservation) export(summariseObservationPeriod) export(summariseOmopSnapshot) +export(summarisePopulationCharacteristics) export(summariseRecordCount) export(suppress) export(tableClinicalRecords) export(tableObservationPeriod) export(tableOmopSnapshot) +export(tablePopulationCharacteristics) importFrom(dplyr,"%>%") importFrom(omopgenerics,bind) importFrom(omopgenerics,exportSummarisedResult) diff --git a/R/checks.R b/R/checks.R index 83402e6b..299513b9 100644 --- a/R/checks.R +++ b/R/checks.R @@ -1,32 +1,12 @@ #' @noRd -checkInterval <- function(interval, call = parent.frame()){ - omopgenerics::assertCharacter(interval, length = 1, na = FALSE, null = FALSE, call = call) +checkUnit <- function(unit, call = parent.frame()){ + omopgenerics::assertCharacter(unit, length = 1, na = FALSE, null = FALSE, call = call) - if(!interval %in% c("year","month")){ - cli::cli_abort("Interval argument {interval} is not valid. Valid options are either `year` or `month`.", call = call) + if(!unit %in% c("year","month")){ + cli::cli_abort("Unit argument {unit} is not valid. Valid options are either `year` or `month`.", call = call) } } -validateIntervals <- function(interval, call = parent.frame()){ - - omopgenerics::assertCharacter(interval, length = 1, na = FALSE, null = FALSE, call = call) - - if(!interval %in% c("overall","years","months","quarters")){ - cli::cli_abort("Interval argument {interval} is not valid. Valid options are either `overall`, `years`, `quarters` or `months`.", call = call) - } - - unitInterval <- dplyr::case_when( - interval == "overall" ~ NA, - interval == "quarters" ~ 4, - interval == "months" ~ 1, - interval == "years" ~ 1 - ) - - if(interval == "quarters"){quarters <- "month"}else{interval <- gsub("s$","",interval)} - - return(list("interval" = interval, "unitInterval" = unitInterval)) -} - #' @noRd checkCategory <- function(category, overlap = FALSE, type = "numeric", call = parent.frame()) { omopgenerics::assertList( diff --git a/R/mockOmopSketch.R b/R/mockOmopSketch.R index d746c8bc..840a8738 100644 --- a/R/mockOmopSketch.R +++ b/R/mockOmopSketch.R @@ -14,7 +14,10 @@ #' @return A mock cdm_reference object. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' mockOmopSketch(numberIndividuals = 100) +#' } mockOmopSketch <- function(con = NULL, writeSchema = NULL, numberIndividuals = 100, diff --git a/R/plotConceptCounts.R b/R/plotConceptCounts.R index ae0b2b06..045d1d78 100644 --- a/R/plotConceptCounts.R +++ b/R/plotConceptCounts.R @@ -9,7 +9,7 @@ #' @export #' @examples #' \donttest{ -#' library(dplyr) +#' library(dplyr, warn.conflicts = FALSE) #' #' cdm <- mockOmopSketch() #' @@ -22,8 +22,8 @@ #' ) #' #' result |> -#' filter(variable_name == "Number subjects") |> -#' plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") +#' filter(estimate_name == "person_count", variable_name == "overall") |> +#' plotConceptCounts(facet = "codelist_name", colour = "codelist_name") #' #' PatientProfiles::mockDisconnect(cdm) #' } @@ -36,55 +36,29 @@ plotConceptCounts <- function(result, # subset to results of interest result <- result |> visOmopResults::filterSettings(.data$result_type == "summarise_concept_counts") - if (nrow(result) == 0) { cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_counts")) } # check only one estimate is contained - variable <- unique(result$variable_name) - if (length(variable) > 1) { + estimate <- unique(result$estimate_name) + if (length(estimate) > 1) { cli::cli_abort(c( - "!" = "Subset to the variable of interest, there are results from: {variable}.", - "i" = "result |> dplyr::filter(variable_name == '{variable[1]}')" + "!" = "Subset to the estimate of interest, there are results from: {estimate}.", + "i" = "result |> dplyr::filter(estimate_name == '{estimate[1]}')" )) } - result1 <- result |> visOmopResults::splitAdditional() - # Detect if there are several time intervals - if("time_interval" %in% colnames(result1)){ - # Line plot where each concept is a different line - p <- result1 |> - dplyr::filter(.data$time_interval != "overall") |> - visOmopResults::uniteAdditional(cols = c("time_interval", "standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id")) |> - visOmopResults::scatterPlot(x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = TRUE, - group = "standard_concept_name", - facet = facet, - colour = colour) - }else{ - if("standard_concept_name" %in% colnames(result1)){ - p <- result |> - visOmopResults::barPlot(x = "standard_concept_name", - y = "count", - facet = facet, - colour = colour) - }else{ - p <- result |> - visOmopResults::barPlot(x = "codelist_name", - y = "count", - facet = facet, - colour = colour) - } - p <- p + - ggplot2::labs( - x = "Concept name" - ) - } - - p + + order <- c("overall", sort(unique(result$variable_name[result$variable_name != "overall"]))) + result |> + dplyr::mutate(variable_name = factor(.data$variable_name, + levels = order)) |> + visOmopResults::barPlot(x = "variable_name", + y = estimate, + facet = facet, + colour = colour) + + ggplot2::labs( + x = "Concept name" + ) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)) } diff --git a/R/plotInObservation.R b/R/plotInObservation.R index 7910f207..b4cd6ecf 100644 --- a/R/plotInObservation.R +++ b/R/plotInObservation.R @@ -10,7 +10,7 @@ #' @export #' @examples #' \donttest{ -#' library(dplyr) +#' library(dplyr, warn.conflicts = FALSE) #' #' cdm <- mockOmopSketch() #' @@ -52,35 +52,27 @@ plotInObservation <- function(result, } # warn - warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "additional_level")) + warnFacetColour(result, list(facet = asCharacterFacet(facet), colour = colour, "variable_level")) # plot - if(length(unique(result$additional_level)) > 1 ){ - result |> - dplyr::mutate(additional_level = as.character(gsub("-01$","",as.Date(gsub(" to.*","",.data$additional_level))))) |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::scatterPlot( - x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = FALSE, - ymin = NULL, - ymax = NULL, - facet = facet, - colour = colour, - group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) - ) + - ggplot2::labs( - y = variable, - x = "Date" - ) - }else{ - result |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::barPlot(x = "variable_name", - y = "count", - facet = facet, - colour = colour) - } + result |> + dplyr::mutate(variable_level = as.Date(stringr::str_extract( + .data$variable_level, "^[^ to]+"))) |> + dplyr::filter(.data$estimate_name == "count") |> + visOmopResults::scatterPlot( + x = "variable_level", + y = "count", + line = TRUE, + point = TRUE, + ribbon = FALSE, + ymin = NULL, + ymax = NULL, + facet = facet, + colour = colour, + group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) + ) + + ggplot2::labs( + y = variable, + x = "Date" + ) } diff --git a/R/plotObservationPeriod.R b/R/plotObservationPeriod.R index 88b711fa..0011ccd8 100644 --- a/R/plotObservationPeriod.R +++ b/R/plotObservationPeriod.R @@ -57,9 +57,9 @@ plotObservationPeriod <- function(result, validateFacet(facet, result) - optFacetColour <- c("cdm_name", "observation_period_ordinal", - visOmopResults::strataColumns(result)) - omopgenerics::assertChoice(facet, optFacetColour, null = TRUE) + optFacetColour <- visOmopResults::tidyColumns(result) + optFacetColour <- optFacetColour[optFacetColour %in% visOmopResults::tidyColumns(result)] + omopgenerics::assertChoice(facet, optFacetColour, null = TRUE, call = call) # this is due to bug in visOmopResults to remove in next release # https://github.com/darwin-eu/visOmopResults/issues/246 @@ -68,7 +68,8 @@ plotObservationPeriod <- function(result, if(length(visOmopResults::groupColumns(result)) == 0){ result <- result |> - dplyr::mutate(group_name = "observation_period_ordinal") + dplyr::mutate(group_name = "observation_period_ordinal", + group_level = "Overall") } if (plotType == "barplot") { @@ -85,7 +86,7 @@ plotObservationPeriod <- function(result, x = "observation_period_ordinal", facet = facet, colour = colour) - } else if (plotType == "densityplot") { + } else { p <- visOmopResults::scatterPlot( result = result, x = "density_x", @@ -95,8 +96,7 @@ plotObservationPeriod <- function(result, ribbon = FALSE, facet = facet, colour = colour, - group = optFacetColour - ) + + group = optFacetColour) + ggplot2::xlab(stringr::str_to_sentence(unique(result$variable_name))) + ggplot2::ylab("Density") } diff --git a/R/plotRecordCount.R b/R/plotRecordCount.R index 221cb4bd..874d6ab7 100644 --- a/R/plotRecordCount.R +++ b/R/plotRecordCount.R @@ -37,34 +37,25 @@ plotRecordCount <- function(result, cli::cli_abort(c("!" = "No records found with result_type == summarise_record_count")) } - # Detect if there are several time intervals - if(length(unique(result$additional_level)) > 1 ){ - # Line plot where each concept is a different line - p <- result |> - dplyr::filter(.data$additional_level != "overall") |> - dplyr::filter(.data$estimate_name == "count") |> - visOmopResults::scatterPlot(x = "time_interval", - y = "count", - line = TRUE, - point = TRUE, - ribbon = FALSE, - facet = facet, - colour = colour, - group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result))) + - ggplot2::labs( - y = "Number records", - x = "Date" - ) - }else{ - p <- result |> - visOmopResults::barPlot(x = "variable_name", - y = "count", - facet = facet, - colour = colour) + - ggplot2::labs( - y = "Count", - x = "" - ) - } - p + # plot + result |> + dplyr::mutate(variable_level = as.Date(stringr::str_extract( + .data$variable_level, "^[^ to]+"))) |> + dplyr::filter(.data$estimate_name == "count") |> + visOmopResults::scatterPlot( + x = "variable_level", + y = "count", + line = TRUE, + point = TRUE, + ribbon = FALSE, + ymin = NULL, + ymax = NULL, + facet = facet, + colour = colour, + group = c("cdm_name", "omop_table", visOmopResults::strataColumns(result)) + ) + + ggplot2::labs( + y = "Incident records", + x = "Date" + ) } diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R deleted file mode 100644 index 273e7970..00000000 --- a/R/summariseAllConceptCounts.R +++ /dev/null @@ -1,183 +0,0 @@ - -my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){ - - strata <- as.character() - - if(!is.null(ageGroup)){ - strata <- append(strata, "age_group") - } - - if(sex){ - strata <- append(strata, "sex") - } - if(year){ - strata <- append(strata, "year") - } - return(strata) -} - -checkFeasibility <- function(omopTable, tableName, conceptId){ - - if (omopgenerics::isTableEmpty(omopTable)){ - cli::cli_warn(paste0(tableName, " omop table is empty.")) - return(NULL) - } - - if (is.na(conceptId)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - - y <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) - - if (omopgenerics::isTableEmpty(y)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - return(TRUE) -} -#' Summarise concept use in patient-level data -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param countBy Either "record" for record-level counts or "person" for -#' person-level counts -#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export -summariseAllConceptCounts <- function(cdm, - omopTableName, - countBy = "record", - year = FALSE, - sex = FALSE, - ageGroup = NULL){ - - omopgenerics::validateCdmArgument(cdm) - checkCountBy(countBy) - omopgenerics::assertLogical(year, length = 1) - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup) - - stratification <- omopgenerics::combineStrata(strata) - - result_tables <- purrr::map(omopTableName, function(table){ - - - - omopTable <- cdm[[table]] |> - dplyr::ungroup() - - - conceptId <- standardConcept(omopgenerics::tableName(omopTable)) - - if (is.null(checkFeasibility(omopTable, table, conceptId))){ - return(NULL) - } - - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - - x <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) |> - dplyr::left_join( - cdm$concept |> dplyr::select("concept_id", "concept_name"), - by = stats::setNames("concept_id", conceptId)) |> - PatientProfiles::addDemographicsQuery(age = FALSE, - ageGroup = ageGroup, - sex = sex, - indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - level <- c(conceptId, "concept_name") - - groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x))) - - result <- list() - if ("record" %in% countBy){ - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - - - grouped_results <- purrr::map(groupings, \(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop") - - }) - - result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate("estimate_name" = "record_count") - result<-dplyr::bind_rows(result,result_record) - } - - if ("person" %in% countBy){ - - grouped_results <- purrr::map(groupings, \(g) { - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - }) - - result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |> - dplyr::mutate("estimate_name" = "person_count") - result<-dplyr::bind_rows(result,result_person) - } - result<- result |> - dplyr::mutate("omop_table" = table, - "variable_level" = as.character(.data[[conceptId]])) |> - dplyr::select(-dplyr::all_of(conceptId)) - return(result) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - sr <-purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm) - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer" - ) |> - dplyr::rename("variable_name" = "concept_name") - # |> - # dplyr::select(!c()) - - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_all_concept_counts" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - return(sr) - -} - diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index 423f666e..b6f67e05 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -55,16 +55,15 @@ summariseClinicalRecords <- function(cdm, ageGroup = NULL) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) - opts <- omopgenerics::omopTables() - opts <- opts[opts %in% names(cdm)] - omopgenerics::assertChoice(omopTableName, choices = opts) + omopTableName |> + omopgenerics::assertChoice(choices = omopgenerics::omopTables()) estimates <- PatientProfiles::availableEstimates( variableType = "numeric", fullQuantiles = TRUE) |> dplyr::pull("estimate_name") omopgenerics::assertChoice(recordsPerPerson, choices = estimates, null = TRUE) + recordsPerPerson <- unique(recordsPerPerson) - if (is.null(recordsPerPerson)) recordsPerPerson <- character() omopgenerics::assertLogical(inObservation, length = 1) omopgenerics::assertLogical(standardConcept, length = 1) @@ -72,50 +71,43 @@ summariseClinicalRecords <- function(cdm, omopgenerics::assertLogical(domainId, length = 1) omopgenerics::assertLogical(typeConcept, length = 1) omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, multipleAgeGroup = FALSE)[[1]] - - result <- purrr::map(omopTableName, \(x) { - if(omopgenerics::isTableEmpty(cdm[[x]])) { - cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) - return(omopgenerics::emptySummarisedResult()) - } - summariseClinicalRecord( - x, - cdm = cdm, - recordsPerPerson = recordsPerPerson, - inObservation = inObservation, - standardConcept = standardConcept, - sourceVocabulary = sourceVocabulary, - domainId = domainId, - typeConcept = typeConcept, - sex = sex, - ageGroup = ageGroup - ) - }) |> - omopgenerics::bind() + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + result <- purrr::map(omopTableName, + function(x) { + if(omopgenerics::isTableEmpty(cdm[[x]])) { + cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) + return(omopgenerics::emptySummarisedResult()) + } + summariseClinicalRecord(x, + cdm = cdm, + recordsPerPerson = recordsPerPerson, + inObservation = inObservation, + standardConcept = standardConcept, + sourceVocabulary = sourceVocabulary, + domainId = domainId, + typeConcept = typeConcept, + sex = sex, + ageGroup = ageGroup) + } + ) |> + dplyr::bind_rows() return(result) } #' @noRd -summariseClinicalRecord <- function(omopTableName, - cdm, - recordsPerPerson, - inObservation, - standardConcept, - sourceVocabulary, - domainId, - typeConcept, - sex, - ageGroup, - call = parent.frame(3)) { +summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, + inObservation, standardConcept, + sourceVocabulary, domainId, typeConcept, + sex, ageGroup, call = parent.frame(3)) { tablePrefix <- omopgenerics::tmpPrefix() # Initial checks omopgenerics::assertClass(cdm[[omopTableName]], "omop_table", call = call) - date <- startDate(omopTableName) + date <- startDate(omopgenerics::tableName(cdm[[omopTableName]])) omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() @@ -123,21 +115,21 @@ summariseClinicalRecord <- function(omopTableName, omopTable <- filterPersonId(omopTable) |> addStrataToOmopTable(date, ageGroup, sex) - if ("observation_period" == omopTableName) { - if (standardConcept) { - if (!missing(standardConcept)) { + if ("observation_period" == omopTableName) { + if(standardConcept){ + if(!missing(standardConcept)){ cli::cli_inform("standardConcept turned to FALSE for observation_period OMOP table", call = call) } standardConcept <- FALSE } - if (sourceVocabulary) { - if (!missing(sourceVocabulary)) { + if(sourceVocabulary){ + if(!missing(sourceVocabulary)){ cli::cli_inform("sourceVocabulary turned to FALSE for observation_period OMOP table", call = call) } sourceVocabulary <- FALSE } - if (domainId) { - if (!missing(domainId)) { + if(domainId){ + if(!missing(domainId)){ cli::cli_inform("domainId turned to FALSE for observation_period OMOP table", call = call) } domainId <- FALSE @@ -145,46 +137,57 @@ summariseClinicalRecord <- function(omopTableName, } strata <- getStrataList(sex, ageGroup) - strata <- c(list(character()), strata) + + peopleStrata <- suppressWarnings(addStrataToPeopleInObservation(cdm, ageGroup, sex, tablePrefix)) + + people <- getNumberPeopleInCdm(cdm, strata, peopleStrata) + result <- omopgenerics::emptySummarisedResult() # Counts summary ---- - cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}} counts and records per person")) - result <- summariseRecordsPerPerson( - omopTable, date, sex, ageGroup, recordsPerPerson) + cli::cli_inform(c("i" = "Summarising table counts")) + result <- result |> + addCounts(strata, omopTable) |> + addSubjectsPercentage(omopTable, people, strata) + + # Records per person summary ---- + if(!is.null(recordsPerPerson)){ + cli::cli_inform(c("i" = "Summarising records per person")) + result <- result |> + addRecordsPerPerson(omopTable, recordsPerPerson, cdm, peopleStrata, strata) + } + + denominator <- result |> + dplyr::filter(.data$variable_name == "number records") |> + dplyr::collect("strata_name", "strata_level", "estimate_value") # Summary concepts ---- if (inObservation | standardConcept | sourceVocabulary | domainId | typeConcept) { - denominator <- result |> - dplyr::filter(.data$variable_name == "number records") |> - dplyr::select("strata_name", "strata_level", "estimate_value") - variables <- columnsVariables( inObservation, standardConcept, sourceVocabulary, domainId, typeConcept ) - cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}}: {.var {variables}}.")) + cli::cli_inform(c("i" = "Summarising {variables} information")) result <- result |> dplyr::bind_rows( omopTable |> - addVariables(variables) |> - dplyr::group_by(dplyr::across(dplyr::everything())) |> - dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> + addVariables(variables, strata) |> + dplyr::group_by(dplyr::across(dplyr::all_of(variables)), .data$age_group, .data$sex) |> + dplyr::tally() |> dplyr::collect() |> - summaryData(denominator, strata, cdm) + dplyr::mutate("n" = as.integer(.data$n)) |> + summaryData(variables, cdm, denominator, result) ) } # Format output as a summarised result result <- result |> + tidyr::fill("result_id", "cdm_name", "group_name", "group_level", + "additional_name", "additional_level", .direction = "downup") |> dplyr::mutate( - "result_id" = 1L, - "cdm_name" = omopgenerics::cdmName(cdm), "group_name" = "omop_table", - "group_level" = omopTableName, - "additional_name" = "overall", - "additional_level" = "overall" + "group_level" = omopgenerics::tableName(omopTable) ) |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( "result_id" = 1L, @@ -200,129 +203,33 @@ summariseClinicalRecord <- function(omopTableName, # Functions ----- getStrataList <- function(sex, ageGroup){ - omopgenerics::combineStrata(c("age_group"[!is.null(ageGroup)], "sex"[sex])) -} - -summariseRecordsPerPerson <- function(omopTable, date, sex, ageGroup, recordsPerPerson) { - # get strata - strataCols <- c("sex"[sex], "age_group"[!is.null(ageGroup)]) - - cdm <- omopgenerics::cdmReference(omopTable) - tablePrefix <- omopgenerics::tmpPrefix() - nm <- omopgenerics::uniqueTableName(tablePrefix) - - # denominator - demographics <- CohortConstructor::demographicsCohort( - cdm = cdm, name = nm, ageRange = ageGroup - ) |> - suppressMessages() - set <- omopgenerics::settings(demographics) - if (sex) demographics <- PatientProfiles::addSexQuery(demographics) - if (is.null(ageGroup)) { - set <- set |> dplyr::select("cohort_definition_id") - } else { - set <- set |> - dplyr::left_join( - dplyr::tibble( - age_group = names(ageGroup), - age_range = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])) - ), - by = "age_range" - ) |> - dplyr::mutate(age_group = dplyr::coalesce(.data$age_group, .data$age_range)) |> - dplyr::select("cohort_definition_id", "age_group") - } - - # records per person - x <- demographics |> - dplyr::select(dplyr::any_of(c( - "cohort_definition_id", "person_id" = "subject_id", "sex" - ))) |> - dplyr::distinct() |> - dplyr::collect() |> - dplyr::left_join(set, by = "cohort_definition_id") |> - dplyr::select(!"cohort_definition_id") |> - dplyr::left_join( - omopTable |> - dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strataCols)))) |> - dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> - dplyr::collect(), - by = c("person_id", strataCols) - ) |> - dplyr::mutate(n = dplyr::coalesce(.data$n, 0L)) - - omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) - - result <- list() - result[["overall"]] <- summariseCounts(x, character(), recordsPerPerson) + strata <- as.character() - if (!is.null(ageGroup)) { - result[["age_group"]] <- x |> - summariseCounts(c("age_group"), recordsPerPerson) + if(!is.null(ageGroup)){ + strata <- append(strata, "age_group") } - if (sex) { - result[["sex"]] <- x |> - summariseCounts(c("sex"), recordsPerPerson) + if(sex){ + strata <- append(strata, "sex") } - if (!is.null(ageGroup) & sex) { - result[["age_group_sex"]] <- x |> - summariseCounts(c("age_group", "sex"), recordsPerPerson) - } - - result <- result |> - dplyr::bind_rows() |> - dplyr::mutate( - variable_name = dplyr::if_else( - .data$variable_name == "n", - dplyr::if_else(.data$estimate_name == "sum", "number records", "records_per_person"), - .data$variable_name - ), - estimate_name = dplyr::if_else( - .data$variable_name == "number records", "count", .data$estimate_name - ) - ) - - return(result) -} -summariseCounts <- function(x, strata, recordsPerPerson) { - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strata)))) |> - dplyr::summarise(n = sum(.data$n), .groups = "drop") |> - dplyr::mutate(number_subjects = dplyr::if_else(.data$n == 0, 0L, 1L)) |> - dplyr::select(!"person_id") |> - PatientProfiles::summariseResult( - group = character(), - includeOverallGroup = FALSE, - strata = strata, - includeOverallStrata = FALSE, - counts = FALSE, - variables = list("number_subjects", "n"), - estimates = list(c("count", "percentage"), c(recordsPerPerson, "sum")) - ) |> - suppressMessages() + strata <- omopgenerics::combineStrata(levels = strata) + return(strata) } -getNumberPeopleInCdm <- function(cdm, ageGroup, sex, strata) { - tablePrefix <- omopgenerics::tmpPrefix() +getNumberPeopleInCdm <- function(cdm, strata, peopleStrata){ - x <- cdm |> - addStrataToPeopleInObservation(ageGroup, sex, tablePrefix) |> + peopleStrata |> + dplyr::select(-c("observation_period_start_date","observation_period_end_date")) |> + dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 - PatientProfiles::summariseResult( - strata = strata, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = character() - ) |> + PatientProfiles::summariseResult(strata = strata, + includeOverallStrata = TRUE, + counts = TRUE, + estimates = c("")) |> suppressMessages() |> dplyr::filter(.data$variable_name != "number records") - - omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) - - return(x) } addCounts <- function(result, strata, omopTable){ @@ -333,7 +240,7 @@ addCounts <- function(result, strata, omopTable){ rbind( omopTable |> dplyr::select("person_id", dplyr::any_of(c("age_group","sex"))) |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult(strata = strata, includeOverallStrata = TRUE, counts = TRUE, @@ -384,7 +291,7 @@ addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm, people .data$records_per_person )) |> dplyr::distinct() |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( strata = strata, includeOverallStrata = TRUE, @@ -395,7 +302,7 @@ addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm, people ) } -addVariables <- function(x, variables) { +addVariables <- function(x, variables, strata) { name <- omopgenerics::tableName(x) @@ -413,7 +320,7 @@ addVariables <- function(x, variables) { cdm <- omopgenerics::cdmReference(x) x <- x |> - dplyr::select(dplyr::all_of(newNames), dplyr::any_of(c("age_group", "sex"))) + dplyr::select(dplyr::all_of(newNames), "age_group", "sex") # Domain and standard ---- if (any(c("domain_id", "standard") %in% variables)) { @@ -474,9 +381,27 @@ addVariables <- function(x, variables) { } x <- x |> - dplyr::select(dplyr::all_of(variables), dplyr::any_of(c("age_group", "sex"))) |> + dplyr::select(dplyr::all_of(variables), "age_group", "sex") |> dplyr::mutate(dplyr::across(dplyr::everything(), ~as.character(.))) + # Create overall groups - This chunk will need efficiency improvement + if(length(strata) == 3){ + x <- x |> + dplyr::union_all( + x |> + dplyr::mutate(age_group = "overall") + ) |> + dplyr::union_all( + x |> + dplyr::mutate(sex = "overall") + ) |> + dplyr::union_all( + x |> + dplyr::mutate(sex = "overall") |> + dplyr::mutate(age_group = "overall") + ) + + } return(x) } @@ -486,43 +411,38 @@ columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, d )] } -summaryData <- function(x, denominator, strata, cdm) { - - cols <- colnames(x) - +summaryData <- function(x, variables, cdm, denominator, result) { results <- list() # in observation ---- - if ("in_observation" %in% cols) { + if ("in_observation" %in% variables) { results[["obs"]] <- x |> dplyr::mutate("in_observation" = dplyr::if_else( - .data$in_observation == "1", "Yes", "No" + !is.na(.data$in_observation), "Yes", "No" )) |> - formatResults("In observation", "in_observation", denominator, strata) + formatResults("In observation", "in_observation", denominator, result) } # standard ----- - if ("standard" %in% cols) { + if ("standard" %in% variables) { results[["standard"]] <- x |> - formatResults("Standard concept", "standard", denominator, strata) + formatResults("Standard concept", "standard", denominator, result) } # source ---- - if ("source" %in% cols) { - results[["source"]] <- x |> - formatResults("Source vocabulary", "source", denominator, strata) + if ("source" %in% variables) { + results[["source"]] <- x |> formatResults("Source vocabulary", "source", denominator, result) } # domain ---- - if ("domain_id" %in% cols) { - results[["domain"]] <- x |> - formatResults("Domain", "domain_id", denominator, strata) + if ("domain_id" %in% variables) { + results[["domain"]] <- x |> formatResults("Domain", "domain_id", denominator, result) } # type ---- - if ("type" %in% cols) { + if ("type" %in% variables) { xx <- x |> - formatResults("Type concept id", "type", denominator, strata) |> + formatResults("Type concept id", "type", denominator, result) |> dplyr::left_join( conceptTypes |> dplyr::select( @@ -559,56 +479,56 @@ summaryData <- function(x, denominator, strata, cdm) { paste0(.data$new_variable_level, " (", .data$variable_level, ")") )) } - results[["type"]] <- xx |> - dplyr::select(-"new_variable_level") + results[["type"]] <- xx |> dplyr::select(-"new_variable_level") } - results <- dplyr::bind_rows(results) + results <- results |> + dplyr::bind_rows() return(results) } -formatResults <- function(x, variableName, variableLevel, denominator, strata) { +formatResults <- function(x, variableName, variableLevel, denominator, result) { denominator <- denominator |> dplyr::select("strata_name", "strata_level", "denominator" = "estimate_value") |> visOmopResults::splitStrata() - strataCols <- unique(unlist(strata)) + if(!"age_group" %in% colnames(denominator)){ + denominator <- denominator |> + dplyr::mutate("age_group" = "overall") + } - result <- list() - for (strat in strata) { - res <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel, strat)))) |> - dplyr::summarise("count" = sum(.data$n), .groups = "drop") - for (col in strataCols) { - if (!col %in% colnames(res)) { - res <- res |> dplyr::mutate(!!col := "overall") - } - } - result[[paste0(strat, collapse = "_")]] <- res |> - dplyr::inner_join(denominator, by = strataCols) |> - dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - tidyr::pivot_longer( - cols = c("count", "percentage"), - names_to = "estimate_name", - values_to = "estimate_value" - ) |> - dplyr::mutate( - "variable_name" = .env$variableName, - "variable_level" = as.character(.data[[variableLevel]]), - "estimate_type" = dplyr::if_else( - .data$estimate_name == "count", "integer", "percentage" - ) - ) |> - visOmopResults::uniteStrata(cols = strataCols) |> - dplyr::select( - "strata_name", "strata_level", "variable_name", "variable_level", - "estimate_name", "estimate_type", "estimate_value" - ) |> - dplyr::ungroup() + if(!"sex" %in% colnames(denominator)){ + denominator <- denominator |> + dplyr::mutate("sex" = "overall") } - dplyr::bind_rows(result) + x |> + dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel,"age_group","sex")))) |> + dplyr::summarise("count" = sum(.data$n), .groups = "drop") |> + dplyr::inner_join( + denominator, + by = c("age_group","sex") + ) |> + dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + tidyr::pivot_longer( + cols = c("count", "percentage"), + names_to = "estimate_name", + values_to = "estimate_value" + ) |> + dplyr::mutate( + "variable_name" = .env$variableName, + "variable_level" = as.character(.data[[variableLevel]]), + "estimate_type" = dplyr::if_else( + .data$estimate_name == "count", "integer", "percentage" + ) + ) |> + visOmopResults::uniteStrata(cols = c("age_group","sex")) |> + dplyr::select( + "strata_name", "strata_level", "variable_name", "variable_level", + "estimate_name", "estimate_type", "estimate_value" + ) |> + dplyr::ungroup() } diff --git a/R/summariseConceptCounts.R b/R/summariseConceptCounts.R index e6ea43a8..1e8dc898 100644 --- a/R/summariseConceptCounts.R +++ b/R/summariseConceptCounts.R @@ -1,12 +1,12 @@ -#' Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +#' Summarise code use in patient-level data #' #' @param cdm A cdm object #' @param conceptId List of concept IDs to summarise. #' @param countBy Either "record" for record-level counts or "person" for #' person-level counts #' @param concept TRUE or FALSE. If TRUE code use will be summarised by concept. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. #' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. #' @param ageGroup A list of ageGroup vectors of length two. Code use will be #' thus summarised by age groups. @@ -15,7 +15,6 @@ #' @export #' @examples #' \donttest{ -#' library(OmopSketch) #' #' cdm <- mockOmopSketch() #' @@ -26,28 +25,22 @@ #' results #' #' PatientProfiles::mockDisconnect(cdm) -#' #' } summariseConceptCounts <- function(cdm, conceptId, countBy = c("record", "person"), concept = TRUE, - interval = "overall", + year = FALSE, sex = FALSE, ageGroup = NULL){ omopgenerics::validateCdmArgument(cdm) omopgenerics::assertList(conceptId, named = TRUE) checkCountBy(countBy) - omopgenerics::assertChoice(countBy, choices = c("record", "person")) - countBy <- gsub("persons","subjects",paste0("number ",countBy,"s")) - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval - omopgenerics::assertNumeric(unitInterval, length = 1, min = 1, na = TRUE) - omopgenerics::assertLogical(concept, length = 1) - omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + if(!is.null(conceptId) && length(names(conceptId)) != length(conceptId)){ + cli::cli_abort("conceptId must be a named list") + } # Get all concepts in concept table if conceptId is NULL # if(is.null(conceptId)) { @@ -59,78 +52,88 @@ summariseConceptCounts <- function(cdm, # tibble::deframe() # } - codeUse <- list() - cli::cli_progress_bar("Getting use of codes", total = length(conceptId)) - for(i in 1:length(conceptId)) { - cli::cli_alert_info("Getting concept counts of {names(conceptId)[i]}") - codeUse[[i]] <- getCodeUse(conceptId[i], - cdm = cdm, - countBy = countBy, - concept = concept, - interval = interval, - unitInterval = unitInterval, - sex = sex, - ageGroup = ageGroup) - Sys.sleep(i/length(conceptId)) - cli::cli_progress_update() + getAllCodeUse <- function() { + codeUse <- list() + cli::cli_progress_bar("Getting use of codes", total = length(conceptId)) + for(i in 1:length(conceptId)) { + cli::cli_alert_info("Getting use of codes from {names(conceptId)[i]}") + codeUse[[i]] <- getCodeUse(conceptId[i], + cdm = cdm, + cohortTable = NULL, + cohortId = NULL, + timing = "any", + countBy = countBy, + concept = concept, + year = year, + sex = sex, + ageGroup = ageGroup) + Sys.sleep(i/length(conceptId)) + cli::cli_progress_update() + } + codeUse <- codeUse |> + dplyr::bind_rows() + cli::cli_progress_done() + return(codeUse) } - codeUse <- codeUse |> - dplyr::bind_rows() - cli::cli_progress_done() + codeUse <- getAllCodeUse() if(nrow(codeUse) > 0) { codeUse <- codeUse %>% dplyr::mutate( result_id = as.integer(1), cdm_name = omopgenerics::cdmName(cdm) + ) %>% + omopgenerics::newSummarisedResult( + settings = dplyr::tibble( + result_id = as.integer(1), + result_type = "summarise_concept_counts", + package_name = "OmopSketch", + package_version = as.character(utils::packageVersion("OmopSketch")) + ) ) } else { codeUse <- omopgenerics::emptySummarisedResult() } - codeUse <- codeUse %>% - omopgenerics::newSummarisedResult( - settings = dplyr::tibble( - result_id = 1L, - result_type = "summarise_concept_counts", - package_name = "OmopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")) - ) - ) return(codeUse) } getCodeUse <- function(x, cdm, + cohortTable, + cohortId, + timing, countBy, concept, - interval, - unitInterval, + year, sex, ageGroup, - call = parent.frame()){ - - tablePrefix <- omopgenerics::tmpPrefix() + call = parent.frame()) { + omopgenerics::assertCharacter(timing, len = 1) + omopgenerics::assertChoice(timing, choices = c("any", "entry")) + omopgenerics::assertChoice(countBy, choices = c("record", "person")) omopgenerics::assertNumeric(x[[1]], integerish = TRUE) omopgenerics::assertList(x) + omopgenerics::assertLogical(concept) + omopgenerics::assertLogical(year) + omopgenerics::assertLogical(sex) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - # Create code list table - tableCodelist <- paste0(tablePrefix,"codelist") + tableCodelist <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableCodelist, table = dplyr::tibble(concept_id = x[[1]]), overwrite = TRUE, temporary = FALSE) - cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% dplyr::left_join( cdm[["concept"]] %>% dplyr::select("concept_id", "domain_id"), - by = "concept_id" - ) + by = "concept_id") - # Create domains table - tableDomainsData <- paste0(tablePrefix,"domains_data") + tableDomainsData <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableDomainsData, table = tables, @@ -145,102 +148,133 @@ getCodeUse <- function(x, temporary = FALSE, overwrite = TRUE) - # Create records table - intermediateTable <- paste0(tablePrefix,"intermediate_table") + CDMConnector::dropTable(cdm = cdm, name = tableDomainsData) + cdm[[tableDomainsData]] <- NULL + + intermediateTable <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) records <- getRelevantRecords(cdm = cdm, tableCodelist = tableCodelist, - intermediateTable = intermediateTable, - tablePrefix = tablePrefix) - if(is.null(records)){ - cc <- dplyr::tibble() + cohortTable = cohortTable, + cohortId = cohortId, + timing = timing, + intermediateTable = intermediateTable) + + if(!is.null(records) && + (records %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") > 0)) { + if(sex == TRUE | !is.null(ageGroup)){ + records <- records %>% + PatientProfiles::addDemographicsQuery(age = !is.null(ageGroup), + ageGroup = ageGroup, + sex = sex, + priorObservation = FALSE, + futureObservation = FALSE, + indexDate = "date") |> + dplyr::compute(overwrite = TRUE, + name = omopgenerics::tableName(records), + temporary = FALSE) + } + + byAgeGroup <- !is.null(ageGroup) + codeCounts <- getSummaryCounts(records = records, + cdm = cdm, + countBy = countBy, + concept = concept, + year = year, + sex = sex, + byAgeGroup = byAgeGroup) + + if (is.null(cohortTable)) { + cohortName <- NA + } else { + cohortName <- omopgenerics::settings(cdm[[cohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == cohortId) %>% + dplyr::pull("cohort_name") + } + + codeCounts <- codeCounts %>% + dplyr::mutate( + "codelist_name" := !!names(x), + "cohort_name" = .env$cohortName, + "estimate_type" = "integer", + "variable_name" = dplyr::if_else(is.na(.data$standard_concept_name), "overall", .data$standard_concept_name), + "variable_level" = as.character(.data$standard_concept_id) + ) %>% + visOmopResults::uniteGroup(cols = c("cohort_name", "codelist_name")) %>% + visOmopResults::uniteAdditional( + cols = c("source_concept_name", "source_concept_id", "domain_id") + ) %>% + dplyr::select( + "group_name", "group_level", "strata_name", "strata_level", + "variable_name", "variable_level", "estimate_name", "estimate_type", + "estimate_value", "additional_name", "additional_level" + ) + } else { + codeCounts <- dplyr::tibble() cli::cli_inform(c( "i" = "No records found in the cdm for the concepts provided." )) - return(omopgenerics::emptySummarisedResult()) } - records <- addStrataToOmopTable(records, "date", ageGroup, sex) - strata <- getStrataList(sex, ageGroup) - - if(interval != "overall"){ - intervalTibble <- getIntervalTibble(omopTable = records, - start_date_name = "date", - end_date_name = "date", - interval = interval, - unitInterval = unitInterval) - - cdm <- cdm |> omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = intervalTibble) - - records <- splitIncidenceBetweenIntervals(cdm, records, "date", tablePrefix) - - strata <- omopgenerics::combineStrata(c(unique(unlist(getStrataList(sex,ageGroup))), "interval_group")) - } - - if(!"number subjects" %in% c(countBy)){records <- records |> dplyr::select(-"person_id")} - if(concept){ - group <- list("standard_concept_id") - }else{ - group <- list() - records <- records |> - dplyr::mutate("standard_concept_name" = !!names(x)) - } - - cc <- records |> - PatientProfiles::summariseResult(strata = strata, - variable = "standard_concept_name", - group = group, - includeOverallGroup = TRUE, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = as.character()) |> - suppressMessages() |> - dplyr::filter(.data$variable_name %in% .env$countBy) |> - dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> - dplyr::mutate(standard_concept_id = .data$group_level) |> - dplyr::mutate(group_name = "codelist_name") |> - dplyr::mutate(group_level = names(x)) |> - dplyr::mutate(cdm_name = omopgenerics::cdmName(cdm)) |> - dplyr::select(-c("additional_name", "additional_level")) |> - dplyr::left_join( - getConceptsInfo(records), - by = "standard_concept_id" - ) |> - dplyr::select(-"standard_concept_id") - - if(interval != "overall"){ - cc <- cc |> - visOmopResults::splitStrata() |> - dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |> - dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |> - dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |> - dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |> - visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> - dplyr::select(-"interval_group") - } - CDMConnector::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) + CDMConnector::dropTable(cdm = cdm, + name = tableCodelist) + cdm[[tableCodelist]] <- NULL + CDMConnector::dropTable( + cdm = cdm, + name = dplyr::starts_with(intermediateTable) + ) - return(cc) + return(codeCounts) } getRelevantRecords <- function(cdm, tableCodelist, - intermediateTable, - tablePrefix){ + cohortTable, + cohortId, + timing, + intermediateTable){ codes <- cdm[[tableCodelist]] |> dplyr::collect() tableName <- purrr::discard(unique(codes$table_name), is.na) standardConceptIdName <- purrr::discard(unique(codes$standard_concept), is.na) - sourceConceptIdName <- purrr::discard(unique(codes$source_concept), is.na) + sourceConceptIdName <- purrr::discard(unique(codes$source_concept), is.na) dateName <- purrr::discard(unique(codes$start_date), is.na) + if(!is.null(cohortTable)){ + if(is.null(cohortId)){ + cohortSubjects <- cdm[[cohortTable]] %>% + dplyr::select("subject_id", "cohort_start_date") %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::distinct() + } else { + cohortSubjects <- cdm[[cohortTable]] %>% + dplyr::filter(.data$cohort_definition_id %in% cohortId) %>% + dplyr::select("subject_id", "cohort_start_date") %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::distinct() + } + } + if(length(tableName)>0){ codeRecords <- cdm[[tableName[[1]]]] + if(!is.null(cohortTable)){ + # keep only records of those in the cohorts of interest + codeRecords <- codeRecords %>% + dplyr::inner_join(cohortSubjects, + by = "person_id") + if(timing == "entry"){ + codeRecords <- codeRecords %>% + dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[1]])) + } + } - if(is.null(codeRecords)){return(NULL)} - - tableCodes <- paste0(tablePrefix, "table_codes") + if(is.null(codeRecords)){ + return(NULL) + } + tableCodes <- paste0(omopgenerics::uniqueTableName(), + omopgenerics::uniqueId()) cdm <- omopgenerics::insertTable(cdm = cdm, name = tableCodes, table = codes %>% @@ -251,15 +285,15 @@ getRelevantRecords <- function(cdm, codeRecords <- codeRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% + dplyr::mutate(year = clock::get_year(date)) %>% dplyr::select(dplyr::all_of(c("person_id", standardConceptIdName[[1]], sourceConceptIdName[[1]], - "date"))) %>% + "date", "year"))) %>% dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[1]], "source_concept_id" = .env$sourceConceptIdName[[1]]) %>% dplyr::inner_join(cdm[[tableCodes]], by = c("standard_concept_id"="concept_id")) %>% - filterInObservation(indexDate = "date") |> dplyr::compute( name = paste0(intermediateTable,"_grr"), temporary = FALSE, @@ -278,7 +312,16 @@ getRelevantRecords <- function(cdm, if(length(tableName) > 1) { for(i in 1:(length(tableName)-1)) { workingRecords <- cdm[[tableName[[i+1]]]] - + if(!is.null(cohortTable)){ + # keep only records of those in the cohorts of interest + workingRecords <- workingRecords %>% + dplyr::inner_join(cohortSubjects, + by = "person_id") + if(timing == "entry"){ + workingRecords <- workingRecords %>% + dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[i+1]])) + } + } workingRecords <- workingRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% dplyr::mutate(year = clock::get_year(date)) %>% @@ -330,18 +373,181 @@ getRelevantRecords <- function(cdm, return(codeRecords) } -getConceptsInfo <- function(records){ - records |> - dplyr::select("standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id") |> - dplyr::distinct() |> - dplyr::collect() |> - dplyr::mutate("additional_name" = "standard_concept_name &&& standard_concept_id &&& source_concept_name &&& source_concept_id &&& domain_id") |> - dplyr::mutate("additional_level" = paste0(.data$standard_concept_name, " &&& ",.data$standard_concept_id, " &&& ", .data$source_concept_name, " &&& ", .data$source_concept_id, " &&& ", .data$domain_id)) |> - dplyr::select("standard_concept_id","additional_name", "additional_level") |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - dplyr::add_row( - "standard_concept_id" = "overall", - "additional_name" = "overall", - "additional_level" = "overall" +getSummaryCounts <- function(records, + cdm, + countBy, + concept, + year, + sex, + byAgeGroup) { + + if ("record" %in% countBy) { + recordSummary <- records %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + if(isTRUE(concept)) { + recordSummary <- dplyr::bind_rows( + recordSummary, + records %>% + dplyr::group_by( + .data$standard_concept_id, .data$standard_concept_name, + .data$source_concept_id, .data$source_concept_name, .data$domain_id + ) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) + } + recordSummary <- recordSummary %>% + dplyr::mutate( + strata_name = "overall", + strata_level = "overall", + estimate_name = "record_count" + ) + } else { + recordSummary <- dplyr::tibble() + } + + if ("person" %in% countBy) { + personSummary <- records %>% + dplyr::select("person_id") %>% + dplyr::distinct() %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + + if (isTRUE(concept)) { + personSummary <- dplyr::bind_rows( + personSummary, + records %>% + dplyr::select( + "person_id", "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id" + ) %>% + dplyr::distinct() %>% + dplyr::group_by( + .data$standard_concept_id, .data$standard_concept_name, + .data$source_concept_id, .data$source_concept_name, .data$domain_id + ) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) + } + personSummary <- personSummary %>% + dplyr::mutate( + strata_name = "overall", + strata_level = "overall", + estimate_name = "person_count") + } else { + personSummary <- dplyr::tibble() + } + + if ("record" %in% countBy & year == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "year") + ) + } + if ("person" %in% countBy & year == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "year") + ) + } + if ("record" %in% countBy & sex == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "sex") + ) + } + if ("person" %in% countBy & sex == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "sex") + ) + } + if ("record" %in% countBy & byAgeGroup == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = "age_group") + ) + } + if ("person" %in% countBy & byAgeGroup == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = "age_group") ) + } + if ("record" %in% countBy && byAgeGroup == TRUE && sex == TRUE) { + recordSummary <- dplyr::bind_rows( + recordSummary, + getGroupedRecordCount(records = records, cdm = cdm, groupBy = c("age_group", "sex")) + ) + } + if ("person" %in% countBy && byAgeGroup == TRUE && sex == TRUE) { + personSummary <- dplyr::bind_rows( + personSummary, + getGroupedPersonCount(records = records, cdm = cdm, groupBy = c("age_group", "sex")) + ) + } + summary <- dplyr::bind_rows(recordSummary, personSummary) + return(summary) +} + +getGroupedRecordCount <- function(records, + cdm, + groupBy){ + + groupedCounts <- dplyr::bind_rows( + records %>% + dplyr::group_by(dplyr::pick(.env$groupBy)) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect(), + records %>% + dplyr::group_by(dplyr::pick(.env$groupBy, + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", + "domain_id")) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect() + ) %>% + visOmopResults::uniteStrata(cols = groupBy) %>% + dplyr::mutate(estimate_name = "record_count") + + return(groupedCounts) +} + +getGroupedPersonCount <- function(records, + cdm, + groupBy){ + + groupedCounts <- dplyr::bind_rows( + records %>% + dplyr::select(dplyr::all_of(c("person_id", .env$groupBy))) %>% + dplyr::distinct() %>% + dplyr::group_by(dplyr::pick(.env$groupBy)) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect(), + records %>% + dplyr::select(dplyr::all_of(c( + "person_id", "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id", .env$groupBy + ))) %>% + dplyr::distinct() %>% + dplyr::group_by(dplyr::pick( + .env$groupBy, "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id" + )) %>% + dplyr::tally(name = "estimate_value") %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::collect()) %>% + visOmopResults::uniteStrata(cols = groupBy) %>% + dplyr::mutate(estimate_name = "person_count") + + return(groupedCounts) } diff --git a/R/summariseInObservation.R b/R/summariseInObservation.R index bc0e2a52..4345bc63 100644 --- a/R/summariseInObservation.R +++ b/R/summariseInObservation.R @@ -3,7 +3,9 @@ #' #' @param observationPeriod An observation_period omop table. It must be part of #' a cdm_reference object. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param unit Whether to stratify by "year" or by "month". +#' @param unitInterval Number of years or months to include within the time +#' interval. #' @param output Output format. It can be either the number of records #' ("records") that are in observation in the specific interval of time, the #' number of person-days ("person-days"), or both c("records","person-days"). @@ -20,7 +22,8 @@ #' #' result <- summariseInObservation( #' cdm$observation_period, -#' interval = "months", +#' unit = "month", +#' unitInterval = 6, #' output = c("person-days","records"), #' ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), #' sex = TRUE @@ -30,10 +33,10 @@ #' glimpse() #' #' PatientProfiles::mockDisconnect(cdm) -#' #' } summariseInObservation <- function(observationPeriod, - interval = "overall", + unit = "year", + unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE){ @@ -49,13 +52,11 @@ summariseInObservation <- function(observationPeriod, return(omopgenerics::emptySummarisedResult()) } + checkUnit(unit) + omopgenerics::assertNumeric(unitInterval, length = 1, min = 1) checkOutput(output) ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] omopgenerics::assertLogical(sex, length = 1) - original_interval <- interval - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval if(length(output) > 1){output <- "all"} if(missing(ageGroup) | is.null(ageGroup)){ageGroup <- list("overall" = c(0,Inf))}else{ageGroup <- append(ageGroup, list("overall" = c(0, Inf)))} @@ -63,33 +64,30 @@ summariseInObservation <- function(observationPeriod, # Create initial variables ---- cdm <- omopgenerics::cdmReference(observationPeriod) observationPeriod <- addStrataToPeopleInObservation(cdm, ageGroup, sex, tablePrefix) - strata <- getStrataList(sex, ageGroup) - - # Calculate denominator ---- - denominator <- cdm |> getDenominator(output) + # Observation period ---- name <- "observation_period" start_date_name <- startDate(name) end_date_name <- endDate(name) - # Observation period ---- - if(interval != "overall"){ - timeInterval <- getIntervalTibbleForObservation(observationPeriod, start_date_name, end_date_name, interval, unitInterval) + interval <- getIntervalTibbleForObservation(observationPeriod, start_date_name, end_date_name, unit, unitInterval) - # Insert interval table to the cdm ---- - cdm <- cdm |> - omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = timeInterval) - } + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = interval) + + # Calculate denominator ---- + denominator <- cdm |> getDenominator(output) # Count records ---- result <- observationPeriod |> - countRecords(cdm, start_date_name, end_date_name, interval, output, tablePrefix) + countRecords(cdm, start_date_name, end_date_name, unit, output, tablePrefix) # Add category sex overall result <- addSexOverall(result, sex) # Create summarisedResult - result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, original_interval) + result <- createSummarisedResultObservationPeriod(result, observationPeriod, name, denominator, unit, unitInterval) CDMConnector::dropTable(cdm, name = dplyr::starts_with(tablePrefix)) return(result) @@ -136,23 +134,23 @@ getDenominator <- function(cdm, output){ } } -getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date_name, interval, unitInterval){ +getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){ startDate <- getOmopTableStartDate(omopTable, start_date_name) endDate <- getOmopTableEndDate(omopTable, end_date_name) tibble::tibble( - "group" = seq.Date(startDate, endDate, .env$interval) + "group" = seq.Date(startDate, endDate, .env$unit) ) |> dplyr::rowwise() |> dplyr::mutate("interval" = max(which( - .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$interval)) + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) ), na.rm = TRUE)) |> dplyr::ungroup() |> dplyr::group_by(.data$interval) |> dplyr::mutate( "interval_start_date" = min(.data$group), - "interval_end_date" = dplyr::if_else(.env$interval == "year", + "interval_end_date" = dplyr::if_else(.env$unit == "year", clock::add_years(min(.data$group),.env$unitInterval)-1, clock::add_months(min(.data$group),.env$unitInterval)-1) ) |> @@ -168,99 +166,81 @@ getIntervalTibbleForObservation <- function(omopTable, start_date_name, end_date dplyr::distinct() } -countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, interval, output, tablePrefix){ +countRecords <- function(observationPeriod, cdm, start_date_name, end_date_name, unit, output, tablePrefix){ if(output == "person-days" | output == "all"){ - if(interval != "overall"){ - x <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("additional_level" = "interval_group") |> - dplyr::cross_join( - observationPeriod |> - dplyr::select("start_date" = "observation_period_start_date", - "end_date" = "observation_period_end_date", - "age_group", "sex","person_id") - ) |> - dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | - (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) %>% - dplyr::mutate(start_date = pmax(.data$interval_start_date, .data$start_date, na.rm = TRUE)) |> - dplyr::mutate(end_date = pmin(.data$interval_end_date, .data$end_date, na.rm = TRUE)) |> - dplyr::compute(temporary = FALSE, name = tablePrefix) - }else{ - x <- observationPeriod |> - dplyr::rename("start_date" = "observation_period_start_date", - "end_date" = "observation_period_end_date") |> - dplyr::mutate("additional_level" = "overall", - "additional_name" = "overall") - } + x <- cdm[[paste0(tablePrefix, "interval")]] |> + dplyr::cross_join( + observationPeriod |> + dplyr::select("start_date" = "observation_period_start_date", + "end_date" = "observation_period_end_date", + "age_group", "sex","person_id") + ) |> + dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | + (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) %>% + dplyr::mutate(start_date = pmax(.data$interval_start_date, .data$start_date, na.rm = TRUE)) |> + dplyr::mutate(end_date = pmin(.data$interval_end_date, .data$end_date, na.rm = TRUE)) |> + dplyr::compute(temporary = FALSE, name = tablePrefix) personDays <- x %>% dplyr::mutate(estimate_value = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> - dplyr::group_by(dplyr::across(dplyr::any_of(c("additional_level", "sex", "age_group")))) |> + dplyr::group_by(.data$interval_group, .data$sex, .data$age_group) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number person-days", - "additional_name" = "time_interval") |> + dplyr::mutate(variable_name = "Number person-days") |> dplyr::collect() }else{ - personDays <- createEmptyIntervalTable(interval) + personDays <- createEmptyIntervalTable() } if(output == "records" | output == "all"){ - - if(interval != "overall"){ - x <- observationPeriod |> - dplyr::mutate("start_date" = as.Date(paste0(clock::get_year(.data[[start_date_name]]),"/",clock::get_month(.data[[start_date_name]]),"/01"))) |> - dplyr::mutate("end_date" = as.Date(paste0(clock::get_year(.data[[end_date_name]]),"/",clock::get_month(.data[[end_date_name]]),"/01"))) |> - dplyr::group_by(.data$start_date, .data$end_date, .data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> - dplyr::compute(temporary = FALSE, name = tablePrefix) - - records <- cdm[[paste0(tablePrefix, "interval")]] |> - dplyr::rename("additional_level" = "interval_group") |> - dplyr::cross_join(x) |> - dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | - (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |> - dplyr::group_by(.data$additional_level, .data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number records in observation", - "additional_name" = "time_interval") |> - dplyr::collect() - }else{ - records <- observationPeriod |> - dplyr::group_by(.data$age_group, .data$sex) |> - dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> - dplyr::mutate("variable_name" = "Number records in observation", - "additional_level" = "overall", - "additional_name" = "overall") |> - dplyr::collect() - } + x <- observationPeriod |> + dplyr::mutate("start_date" = as.Date(paste0(clock::get_year(.data[[start_date_name]]),"/",clock::get_month(.data[[start_date_name]]),"/01"))) |> + dplyr::mutate("end_date" = as.Date(paste0(clock::get_year(.data[[end_date_name]]),"/",clock::get_month(.data[[end_date_name]]),"/01"))) |> + dplyr::group_by(.data$start_date, .data$end_date, .data$age_group, .data$sex) |> + dplyr::summarise(estimate_value = dplyr::n(), .groups = "drop") |> + dplyr::compute(temporary = FALSE, name = tablePrefix) + + records <- cdm[[paste0(tablePrefix, "interval")]] |> + dplyr::cross_join(x) |> + dplyr::filter((.data$start_date < .data$interval_start_date & .data$end_date >= .data$interval_start_date) | + (.data$start_date >= .data$interval_start_date & .data$start_date <= .data$interval_end_date)) |> + dplyr::group_by(.data$interval_group, .data$age_group, .data$sex) |> + dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate(variable_name = "Number records in observation") |> + dplyr::collect() }else{ - records <- createEmptyIntervalTable(interval) + records <- createEmptyIntervalTable() } x <- personDays |> rbind(records) |> - dplyr::arrange(dplyr::across(dplyr::any_of("additional_level"))) + dplyr::arrange(.data$interval_group) |> + dplyr::rename("time_interval" = "interval_group") + + omopgenerics::dropTable(cdm = cdm, name = c(dplyr::starts_with(tablePrefix))) return(x) } -createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, original_interval){ +createSummarisedResultObservationPeriod <- function(result, observationPeriod, name, denominator, unit, unitInterval){ result <- result |> dplyr::mutate("estimate_value" = as.character(.data$estimate_value)) |> + dplyr::rename("variable_level" = "time_interval") |> visOmopResults::uniteStrata(cols = c("sex", "age_group")) |> dplyr::mutate( "result_id" = as.integer(1), "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(observationPeriod)), "group_name" = "omop_table", "group_level" = name, - "variable_level" = as.character(NA), "estimate_name" = "count", - "estimate_type" = "integer" + "estimate_type" = "integer", + "additional_name" = "overall", + "additional_level" = "overall" ) result <- result |> rbind(result) |> - dplyr::group_by(.data$additional_level, .data$strata_level, .data$variable_name) |> + dplyr::group_by(.data$variable_level, .data$strata_level, .data$variable_name) |> dplyr::mutate(estimate_type = dplyr::if_else(dplyr::row_number() == 2, "percentage", .data$estimate_type)) |> dplyr::inner_join(denominator, by = "variable_name") |> dplyr::mutate(estimate_value = dplyr::if_else(.data$estimate_type == "percentage", as.character(as.numeric(.data$estimate_value)/denominator*100), .data$estimate_value)) |> @@ -271,62 +251,62 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n "result_type" = "summarise_in_observation", "package_name" = "OmopSketch", "package_version" = as.character(utils::packageVersion("OmopSketch")), - "interval" = .env$original_interval + "unit" = .env$unit, + "unitInterval" = .env$unitInterval )) return(result) } -addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix) { - demographics <- cdm |> - CohortConstructor::demographicsCohort( - name = paste0(tablePrefix, "demographics_table"), - sex = NULL, - ageRange = ageGroup, - minPriorObservation = NULL - ) |> - suppressMessages() - - if (sex) { - demographics <- demographics |> - PatientProfiles::addSexQuery() - } else { - demographics <- demographics |> - dplyr::mutate("sex" = "overall") - } +addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix){ + demographics_table <- suppressWarnings(suppressMessages( + cdm |> + CohortConstructor::demographicsCohort(name = paste0(tablePrefix, "demographics_table"), + sex = NULL, + ageRange = ageGroup, + minPriorObservation = NULL) + )) + + if(is.null(ageGroup)){ + demographics <- demographics_table |> + dplyr::rename("observation_period_start_date" = "cohort_start_date", + "observation_period_end_date" = "cohort_end_date", + "person_id" = "subject_id") |> + dplyr::select(-c("cohort_definition_id")) |> + dplyr::mutate("age_group" = "overall") |> + dplyr::compute(temporary = FALSE, name = paste0(tablePrefix, "demographics")) + }else{ + age_tibble <- dplyr::tibble( + "age_range" = gsub(",","_",gsub("\\)","",gsub("c\\(","",gsub(" ","",ageGroup)))), + "age_group" = names(ageGroup) + ) + + settings <- demographics_table |> + CDMConnector::settings() |> + dplyr::inner_join(age_tibble, by = "age_range") |> + dplyr::select("cohort_definition_id","age_group") - if (!is.null(ageGroup)) { - set <- omopgenerics::settings(demographics) |> - dplyr::select("cohort_definition_id", dplyr::any_of("age_range")) - set <- set |> - dplyr::left_join( - dplyr::tibble( - "age_range" = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])), - "age_group" = names(ageGroup) - ), - by = "age_range" + cdm <- cdm |> + omopgenerics::insertTable(name = paste0(tablePrefix, "settings"), table = settings) + + demographics <- demographics_table |> + dplyr::inner_join(cdm[[paste0(tablePrefix,"settings")]], by = "cohort_definition_id") |> + dplyr::rename("observation_period_start_date" = "cohort_start_date", + "observation_period_end_date" = "cohort_end_date", + "person_id" = "subject_id") |> + dplyr::select(-c("cohort_definition_id")) |> + dplyr::inner_join( + cdm[["person"]] |> dplyr::select("person_id"), by = "person_id" ) |> - dplyr::mutate("age_group" = dplyr::if_else( - is.na(.data$age_group), .data$age_range, .data$age_group - )) |> - dplyr::select(!"age_range") - nm <- paste0(tablePrefix, "_settings") - cdm <- omopgenerics::insertTable(cdm = cdm, name = nm, table = set) - demographics <- demographics |> - dplyr::left_join(cdm[[nm]], by = "cohort_definition_id") - } else { - demographics <- demographics |> - dplyr::mutate("age_group" = "overall") + dplyr::compute(name = paste0(tablePrefix, "demographics"), temporary = FALSE) } - nm <- paste0(tablePrefix, "_demographics") - demographics <- demographics |> - dplyr::select( - "observation_period_start_date" = "cohort_start_date", - "observation_period_end_date" = "cohort_end_date", - "person_id" = "subject_id", "age_group", "sex" - ) |> - dplyr::compute(name = nm, temporary = FALSE) + + if(sex){ + demographics <- demographics |> PatientProfiles::addSexQuery() + }else{ + demographics <- demographics |> dplyr::mutate(sex = "overall") + } return(demographics) } @@ -335,30 +315,19 @@ addSexOverall <- function(result, sex){ if(sex){ result <- result |> rbind( result |> - dplyr::group_by(.data$age_group, .data$additional_level, .data$variable_name) |> + dplyr::group_by(.data$age_group, .data$time_interval, .data$variable_name) |> dplyr::summarise(estimate_value = sum(.data$estimate_value, na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(sex = "overall", - additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) + dplyr::mutate(sex = "overall") ) } return(result) } -createEmptyIntervalTable <- function(interval){ - if(interval == "overall"){ - tibble::tibble( - "sex" = as.character(), - "age_group" = as.character(), - "estimate_value" = as.double() - ) - - }else{ - tibble::tibble( - "interval_group" = as.character(), - "sex" = as.character(), - "age_group" = as.character(), - "estimate_value" = as.double() - ) - } - +createEmptyIntervalTable <- function(){ + tibble::tibble( + "interval_group" = as.character(), + "sex" = as.character(), + "age_group" = as.character(), + "estimate_value" = as.double() + ) } diff --git a/R/summariseMissingData.R b/R/summariseMissingData.R deleted file mode 100644 index 7bbc4871..00000000 --- a/R/summariseMissingData.R +++ /dev/null @@ -1,156 +0,0 @@ -#' Summarise missing data in omop tables -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param col A character vector of column names to check for missing values. -#' If `NULL`, all columns in the specified tables are checked. Default is `NULL`. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export - -summariseMissingData <- function(cdm, - omopTableName, - col = NULL, - sex = FALSE, - year = FALSE, - ageGroup = NULL){ - - - omopgenerics::validateCdmArgument(cdm) - - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, ageGroup = ageGroup, year = year) - stratification <- c(list(character()),omopgenerics::combineStrata(strata)) - - result_tables <- purrr::map(omopTableName, function(table) { - - if (omopgenerics::isTableEmpty(cdm[[table]])){ - cli::cli_warn(paste0(table, " omop table is empty.")) - return(NULL) - } - - omopTable <- cdm[[table]] - col_table <- intersect(col, colnames(omopTable)) - if (is.null(col_table) | rlang::is_empty(col_table)){ - col_table<-colnames(omopTable) - } - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - x <- omopTable |> PatientProfiles::addDemographicsQuery(age = FALSE, ageGroup = ageGroup, sex = sex, indexDate = indexDate) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - result_columns <- purrr::map(col_table, function(c) { - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(strata))) |> - dplyr::summarise( - na_count = sum(as.integer(is.na(.data[[c]])), na.rm = TRUE), - total_count = dplyr::n(), - .groups = "drop" - ) |> - dplyr::collect() - - # Group results for each level of stratification - grouped_results <- purrr::map(stratification, function(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise( - na_count = sum(na_count, na.rm = TRUE), - total_count = sum(total_count, na.rm = TRUE), - colName = c, - .groups = "drop" - ) |> - dplyr::mutate(na_percentage = dplyr::if_else(.data$total_count > 0, (.data$na_count / .data$total_count) * 100, 0)) - }) - - return(purrr::reduce(grouped_results, dplyr::bind_rows)) - - }) - - res <- purrr::reduce(result_columns, dplyr::union)|> - dplyr::mutate(omop_table = table) - - warningDataRequire(cdm = cdm, res = res, table = table) - - return(res) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - - result <- purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate( - na_count = as.double(na_count), # Cast na_count to double - na_percentage = as.double(na_percentage) - )|> - tidyr::pivot_longer( - cols = c(na_count, na_percentage), - names_to = "estimate_name", - values_to = "estimate_value" - ) - - - sr <- result |> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm), - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer", - "variable_level" = NA_character_ - ) |> - dplyr::rename("variable_name" = "colName") |> - dplyr::select(!c(total_count)) - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_missing_data" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - - return(sr) - -} - -warningDataRequire <- function(cdm, table, res){ -required_cols <- omopgenerics::omopTableFields(CDMConnector::cdmVersion(cdm))|> - dplyr::filter(.data$cdm_table_name==table)|> - dplyr::filter(.data$is_required==TRUE)|> - dplyr::pull(.data$cdm_field_name) -warning_columns <- res |> - dplyr::filter(.data$colName %in% required_cols)|> - dplyr::filter(.data$na_count>0)|> - dplyr::distinct(.data$colName)|> - dplyr::pull() - -if (length(warning_columns) > 0) { - cli::cli_warn(c( - "These columns contain missing values, which are not permitted:", - "{.val {warning_columns}}" - )) -} -} - - diff --git a/R/summariseObservationPeriod.R b/R/summariseObservationPeriod.R index 98b54773..4aae2672 100644 --- a/R/summariseObservationPeriod.R +++ b/R/summariseObservationPeriod.R @@ -1,6 +1,5 @@ #' Summarise the observation period table getting some overall statistics in a #' summarised_result object. -#' #' @param observationPeriod observation_period omop table. #' @param estimates Estimates to summarise the variables of interest ( #' `records per person`, `duration in days` and @@ -8,11 +7,8 @@ #' @param ageGroup A list of age groups to stratify results by. #' @param sex Boolean variable. Whether to stratify by sex (TRUE) or not #' (FALSE). -#' #' @return A summarised_result object with the summarised data. -#' #' @export -#' #' @examples #' \donttest{ #' library(dplyr, warn.conflicts = FALSE) @@ -32,7 +28,7 @@ summariseObservationPeriod <- function(observationPeriod, "median", "q75", "q95", "max", "density"), ageGroup = NULL, - sex = FALSE) { + sex = FALSE){ # input checks omopgenerics::assertClass(observationPeriod, class = "omop_table") omopgenerics::assertTrue(omopgenerics::tableName(observationPeriod) == "observation_period") @@ -50,7 +46,7 @@ summariseObservationPeriod <- function(observationPeriod, if (omopgenerics::isTableEmpty(observationPeriod)) { obsSr <- observationPeriod |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( variables = NULL, estimates = NULL, counts = TRUE) } else { @@ -74,7 +70,7 @@ summariseObservationPeriod <- function(observationPeriod, dplyr::collect() obsSr <- obs |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( strata = strataId, variables = c("duration", "next_obs"), @@ -86,7 +82,7 @@ summariseObservationPeriod <- function(observationPeriod, dplyr::group_by(.data$person_id, dplyr::across(dplyr::any_of(c("sex","age_group")))) |> dplyr::tally(name = "n") |> dplyr::ungroup() |> - # dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( variables = c("n"), estimates = estimates, @@ -123,8 +119,13 @@ summariseObservationPeriod <- function(observationPeriod, } addOrdinalLevels <- function(x) { - strata_cols <- visOmopResults::strataColumns(x) - strata_cols <- strata_cols[strata_cols != "id"] + strata_cols <- x |> + dplyr::select("strata_name") |> + dplyr::filter(!grepl("&&&", x$strata_name), + .data$strata_name != "overall", + .data$strata_name != "id") |> + dplyr::distinct() |> + dplyr::pull("strata_name") x <- x |> visOmopResults::splitStrata() @@ -144,7 +145,7 @@ addOrdinalLevels <- function(x) { x <- x |> dplyr::mutate("group_level" = .env$val) |> dplyr::select(-c("id")) |> - dplyr::mutate("group_name" = "observation_period_ordinal") |> + dplyr::mutate("group_name" = dplyr::if_else(.data$group_level == "overall", "overall", "observation_period_ordinal")) |> visOmopResults::uniteStrata(cols = strata_cols) return(x) diff --git a/R/summariseOmopSnapshot.R b/R/summariseOmopSnapshot.R index 7e2fa575..c569c186 100644 --- a/R/summariseOmopSnapshot.R +++ b/R/summariseOmopSnapshot.R @@ -19,7 +19,7 @@ summariseOmopSnapshot <- function(cdm) { internalTibble() |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( result_id = unique(summaryTable$result_id), - package_name = "OmopSketch", + package_name = "omopSketch", package_version = as.character(utils::packageVersion("OmopSketch")), result_type = "summarise_omop_snapshot" )) diff --git a/R/summarisePopulationCharacteristics.R b/R/summarisePopulationCharacteristics.R new file mode 100644 index 00000000..aa0e6dbf --- /dev/null +++ b/R/summarisePopulationCharacteristics.R @@ -0,0 +1,82 @@ + +#' Summarise the characteristics of the base population of a cdm_reference +#' object. +#' +#' @param cdm A cdm_reference object. +#' @param studyPeriod Dates to trim the observation period. If NA, +#' min(observation_period_start_date) and/or max(observation_period_end_date) +#' are used. +#' @param sex Whether to stratify the results by sex. +#' @param ageGroup List of age groups to stratify by at index date. +#' @return A summarised_result object. +#' @export +#' @examples +#' \donttest{ +#' cdm <- mockOmopSketch() +#' +#' summarisedPopulation <- summarisePopulationCharacteristics( +#' cdm = cdm, +#' studyPeriod = c("2010-01-01", NA), +#' sex = TRUE, +#' ageGroup = NULL +#' ) +#' +#' summarisedPopulation |> print() +#' +#' PatientProfiles::mockDisconnect(cdm = cdm) +#' } +summarisePopulationCharacteristics <- function(cdm, + studyPeriod = c(NA, NA), + sex = FALSE, + ageGroup = NULL) { + # check inputs + omopgenerics::validateCdmArgument(cdm) + studyPeriod <- validateStudyPeriod(cdm, studyPeriod) + omopgenerics::assertLogical(sex, length = 1) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + + cohort <- CohortConstructor::demographicsCohort(cdm = cdm, + name = omopgenerics::uniqueTableName()) |> + CohortConstructor::trimToDateRange(dateRange = studyPeriod) |> + PatientProfiles::addAgeQuery(indexDate = "cohort_end_date", + ageName = "age_at_end") + + cohort <- cohort |> + PatientProfiles::addDemographicsQuery(ageGroup = ageGroup, sex = sex, + priorObservation = F, + futureObservation = F, + age = F) + if(!is.null(ageGroup)) { + cohort <- cohort |> + dplyr::rename("age_group_at_start" = "age_group") + } + + strata <- switch( + paste(is.null(ageGroup), sex), + "TRUE TRUE" = list("sex"), + "TRUE FALSE" = list(), + "FALSE TRUE" = list("age_group_at_start", "sex", c("age_group_at_start", "sex")), + "FALSE FALSE" = list("age_group_at_start") + ) + + summarisedCohort <- cohort |> + CohortCharacteristics::summariseCharacteristics(strata = strata, + otherVariables = "age_at_end") |> + dplyr::mutate(variable_name = dplyr::if_else(.data$variable_name == "Age", "Age at start", .data$variable_name)) |> + dplyr::mutate(variable_name = factor(.data$variable_name, + levels = c("Number records", "Number subjects", "Cohort start date", "Cohort end date", + "Age at start", "Age at end", "Sex", "Prior observation", "Future observation", + "Days in cohort"))) |> + dplyr::arrange(.data$variable_name) |> + omopgenerics::newSummarisedResult( + settings = dplyr::tibble( + "result_id" = 1L, + "package_name" = "OmopSketch", + "package_version" = as.character(utils::packageVersion( + "OmopSketch" + )), + "result_type" = "summarise_population_characteristics" + )) + + return(summarisedCohort) +} diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index c5cb0f2f..8c3db977 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -4,7 +4,9 @@ #' #' @param cdm A cdm_reference object. #' @param omopTableName A character vector of omop tables from the cdm. -#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param unit Time unit it can either be "year" or "month". +#' @param unitInterval Number of years or months to include within the same +#' interval. #' @param ageGroup A list of age groups to stratify results by. #' @param sex Whether to stratify by sex (TRUE) or not (FALSE). #' @return A summarised_result object. @@ -18,7 +20,8 @@ #' summarisedResult <- summariseRecordCount( #' cdm = cdm, #' omopTableName = c("condition_occurrence", "drug_exposure"), -#' interval = "years", +#' unit = "year", +#' unitInterval = 10, #' ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), #' sex = TRUE #' ) @@ -30,17 +33,16 @@ #' } summariseRecordCount <- function(cdm, omopTableName, - interval = "overall", + unit = "year", + unitInterval = 1, ageGroup = NULL, sex = FALSE) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) omopgenerics::assertCharacter(omopTableName) - original_interval <- interval - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval + checkUnit(unit) + omopgenerics::assertNumeric(unitInterval, length = 1, min = 1) ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] omopgenerics::assertLogical(sex, length = 1) @@ -53,9 +55,8 @@ summariseRecordCount <- function(cdm, } summariseRecordCountInternal(x, cdm = cdm, - interval = interval, + unit = unit, unitInterval = unitInterval, - original_interval, ageGroup = ageGroup, sex = sex) } @@ -66,10 +67,9 @@ summariseRecordCount <- function(cdm, } #' @noRd -summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval, - original_interval, ageGroup, sex) { +summariseRecordCountInternal <- function(omopTableName, cdm, unit, unitInterval, + ageGroup, sex) { - prefix <- omopgenerics::tmpPrefix() omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() # Create initial variables ---- @@ -78,39 +78,37 @@ summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInter result <- omopgenerics::emptySummarisedResult() date <- startDate(omopTableName) - strata <- getStrataList(sex, ageGroup) + # Create strata variable ---- + strata <- c("age_group","sex") # Incidence counts ---- omopTable <- omopTable |> dplyr::select(dplyr::all_of(date), "person_id") - result <- addStrataToOmopTable(omopTable, date, ageGroup, sex) + omopTable <- addStrataToOmopTable(omopTable, date, ageGroup, sex) if(omopTableName != "observation_period") { - result <- result |> + omopTable <- omopTable |> filterInObservation(indexDate = date) } - if(interval != "overall"){ - # interval sequence ---- - timeInterval <- getIntervalTibble(omopTable = omopTable, - start_date_name = date, - end_date_name = date, - interval = interval, - unitInterval = unitInterval) - - # Insert interval table to the cdm ---- - cdm <- cdm |> omopgenerics::insertTable(name = paste0(prefix, "interval"), table = timeInterval) + # interval sequence ---- + interval <- getIntervalTibble(omopTable = omopTable, + start_date_name = date, + end_date_name = date, + unit = unit, + unitInterval = unitInterval) - # Obtain record counts for each interval ---- - result <- splitIncidenceBetweenIntervals(cdm, result, date, prefix) + # Insert interval table to the cdm ---- + cdm <- cdm |> + omopgenerics::insertTable(name = "interval", table = interval) - strata <- omopgenerics::combineStrata(c(unique(unlist(strata)), "interval_group")) - } + # Obtain record counts for each interval ---- + result <- splitIncidenceBetweenIntervals(cdm, omopTable, date, strata) # Create summarised result ---- - result <- createSummarisedResultRecordCount(result, strata, omopTable, omopTableName, original_interval) - omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(prefix)) + result <- createSummarisedResultRecordCount(result, sex, ageGroup, omopTable, omopTableName, unit, unitInterval) + omopgenerics::dropTable(cdm = cdm, name = "interval") return(result) } @@ -134,19 +132,20 @@ filterPersonId <- function(omopTable){ return(omopTable) } -addStrataToOmopTable <- function(omopTable, date, ageGroup, sex) { - omopTable |> - PatientProfiles::addDemographicsQuery( - indexDate = date, - age = FALSE, - ageGroup = ageGroup, - missingAgeGroupValue = "unknown", - sex = sex, - missingSexValue = "unknown", - priorObservation = FALSE, - futureObservation = FALSE, - dateOfBirth = FALSE - ) +addStrataToOmopTable <- function(omopTable, date, ageGroup, sex){ + suppressWarnings(omopTable |> + dplyr::mutate(sex = "overall") |> + dplyr::mutate(age_group = "overall") |> + PatientProfiles::addDemographicsQuery(indexDate = date, + age = FALSE, + ageGroup = ageGroup, + missingAgeGroupValue = "unknown", + sex = sex, + missingSexValue = "unknown", + priorObservation = FALSE, + futureObservation = FALSE, + dateOfBirth = FALSE)) + } filterInObservation <- function(x, indexDate){ @@ -186,7 +185,7 @@ getOmopTableEndDate <- function(omopTable, date){ dplyr::pull("end_date") } -getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interval, unitInterval){ +getIntervalTibble <- function(omopTable, start_date_name, end_date_name, unit, unitInterval){ startDate <- getOmopTableStartDate(omopTable, start_date_name) endDate <- getOmopTableEndDate(omopTable, end_date_name) @@ -195,14 +194,14 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interva ) |> dplyr::rowwise() |> dplyr::mutate("interval" = max(which( - .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$interval)) + .data$group >= seq.Date(from = startDate, to = endDate, by = paste(.env$unitInterval, .env$unit)) ), na.rm = TRUE)) |> dplyr::ungroup() |> dplyr::group_by(.data$interval) |> dplyr::mutate( "interval_start_date" = min(.data$group), - "interval_end_date" = dplyr::if_else(.env$interval == "year", + "interval_end_date" = dplyr::if_else(.env$unit == "year", clock::add_years(min(.data$group),.env$unitInterval)-1, clock::add_months(min(.data$group),.env$unitInterval)-1) ) |> @@ -219,8 +218,8 @@ getIntervalTibble <- function(omopTable, start_date_name, end_date_name, interva dplyr::distinct() } -splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){ - cdm[[paste0(prefix, "interval")]] |> +splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, strata){ + cdm$interval |> dplyr::inner_join( omopTable |> dplyr::rename("incidence_date" = dplyr::all_of(.env$date)) |> @@ -229,45 +228,35 @@ splitIncidenceBetweenIntervals <- function(cdm, omopTable, date, prefix){ ) |> dplyr::select(-c("my")) |> dplyr::relocate("person_id") |> - dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date")) + dplyr::select(-c("interval_start_date", "interval_end_date", "incidence_date", "person_id")) } -createSummarisedResultRecordCount <- function(result, strata, omopTable, omopTableName, original_interval){ +createSummarisedResultRecordCount <- function(result, sex, ageGroup, omopTable, omopTableName, unit, unitInterval){ - result <- result |> - dplyr::mutate(n = 1) |> - dplyr::select(-"person_id") |> + result |> + dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 PatientProfiles::summariseResult( - variables = "n", - strata = strata, + strata = getStrataList(sex, ageGroup), includeOverallStrata = TRUE, - estimates = as.character(), - counts = TRUE, + estimates = "count", + counts = FALSE ) |> - suppressMessages() |> - dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> + dplyr::filter(!.data$variable_name %in% c("sex", "age_group")) |> + dplyr::mutate("variable_name" = "incidence_records") |> dplyr::mutate( + "result_id" = as.integer(1), + "cdm_name" = omopgenerics::cdmName(omopgenerics::cdmReference(omopTable)), "group_name" = "omop_table", - "group_level" = omopTableName - ) - - if(original_interval != "overall"){ - result <- result |> - visOmopResults::splitStrata() |> - dplyr::mutate(additional_level = .data$interval_group) |> - dplyr::mutate(additional_name = dplyr::if_else(.data$additional_level == "overall", "overall", "time_interval")) |> - visOmopResults::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> - dplyr::select(-"interval_group") - } - - result |> - omopgenerics::newSummarisedResult( - settings = dplyr::tibble( - "result_id" = 1L, - "result_type" = "summarise_record_count", - "package_name" = "OmopSketch", - "package_version" = as.character(utils::packageVersion("OmopSketch")), - "interval" = .env$original_interval - ) - ) + "group_level" = omopTableName, + "additional_name" = "time_interval", + "additional_level" = gsub(" to.*","",.data$variable_level) + ) |> + omopgenerics::newSummarisedResult(settings = dplyr::tibble( + "result_id" = 1L, + "result_type" = "summarise_record_count", + "package_name" = "OmopSketch", + "package_version" = as.character(utils::packageVersion("OmopSketch")), + "unit" = .env$unit, + "unitInterval" = .env$unitInterval + )) } diff --git a/R/tablePopulationCharacteristics.R b/R/tablePopulationCharacteristics.R new file mode 100644 index 00000000..886d4de3 --- /dev/null +++ b/R/tablePopulationCharacteristics.R @@ -0,0 +1,57 @@ + +#' Create a visual table from a summarise_population_characteristics result. +#' +#' @param result Output from summarisePopulationCharacteristics(). +#' @param type Type of formatting output table, either "gt" or "flextable". +#' @return A gt or flextable object with the summarised data. +#' @export +#' @examples +#' \donttest{ +#' cdm <- mockOmopSketch() +#' +#' summarisedPopulation <- summarisePopulationCharacteristics( +#' cdm = cdm, +#' studyPeriod = c("2010-01-01", NA), +#' sex = TRUE, +#' ageGroup = list("<=60" = c(0, 60), ">60" = c(61, Inf)) +#' ) +#' +#' summarisedPopulation |> +#' suppress(minCellCount = 5) |> +#' tablePopulationCharacteristics() +#' +#' PatientProfiles::mockDisconnect(cdm = cdm) +#'} +tablePopulationCharacteristics <- function(result, + type = "gt") { + # Initial checks ---- + omopgenerics::validateResultArgument(result) + omopgenerics::assertChoice(type, choicesTables()) + + # subset to result_type of interest + result <- result |> + visOmopResults::filterSettings( + .data$result_type == "summarise_population_characteristics") + + # check if it is empty + if (nrow(result) == 0) { + warnEmpty("summarise_population_characteristics") + return(emptyTable(type)) + } + + # Function + result <- result |> + visOmopResults::visOmopTable( + hide = c("cohort_name"), + estimateName = c( + "N%" = " ()", + "N" = "", + "Median [Q25 - Q75]" = " [ - ]", + "Mean (SD)" = " ()", + "Range" = " to "), + rename = c("Database name" = "cdm_name"), + header = c("cdm_name"), + groupColumn = visOmopResults::strataColumns(result)) + + return(result) +} diff --git a/R/utilities.R b/R/utilities.R index 007e8582..aa23797f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,23 +1,18 @@ startDate <- function(name) { tables$start_date[tables$table_name == name] } - endDate <- function(name) { tables$end_date[tables$table_name == name] } - standardConcept <- function(name) { tables$standard_concept[tables$table_name == name] } - sourceConcept <- function(name) { tables$source_concept[tables$table_name == name] } - typeConcept <- function(name) { tables$type_concept[tables$table_name == name] } - tableId <- function(name) { tables$id[tables$table_name == name] } @@ -26,7 +21,7 @@ warnFacetColour <- function(result, cols) { colsToWarn <- result |> dplyr::select( "cdm_name", "group_name", "group_level", "strata_name", "strata_level", - "variable_name", "variable_level" + "variable_name", "variable_level", "additional_name", "additional_level" ) |> dplyr::distinct() |> visOmopResults::splitAll() |> @@ -41,14 +36,12 @@ warnFacetColour <- function(result, cols) { } invisible(NULL) } - collapseStr <- function(x, sep) { x <- x[x != ""] if (length(x) == 1) return(x) len <- length(x) paste0(paste0(x[-len], collapse = ", "), " ", sep, " ", x[len]) } - asCharacterFacet <- function(facet) { if (rlang::is_formula(facet)) { facet <- as.character(facet) diff --git a/_pkgdown.yml b/_pkgdown.yml index 1e9f648a..1c5f743d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -28,6 +28,11 @@ reference: - contents: - summariseConceptCounts - plotConceptCounts +- subtitle: Individual Characteristics + desc: Summarise characteristics of the individuals in the OMOP Common Data Model +- contents: + - summarisePopulationCharacteristics + - tablePopulationCharacteristics - subtitle: Mock Database desc: Create a mock database to test the OmopSketch package - contents: diff --git a/man/OmopSketch-package.Rd b/man/OmopSketch-package.Rd index 459a2b9e..0bc315f4 100644 --- a/man/OmopSketch-package.Rd +++ b/man/OmopSketch-package.Rd @@ -25,7 +25,6 @@ Authors: \itemize{ \item Kim Lopez-Guell \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) \item Elin Rowlands \email{elin.rowlands@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0005-5166-0417}{ORCID}) - \item Cecilia Campanile \email{cecilia.campanile@ndorms.ox.ac.uk} \item Edward Burn \email{edward.burn@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-9286-1128}{ORCID}) \item Martí Català \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) } diff --git a/man/mockOmopSketch.Rd b/man/mockOmopSketch.Rd index 11424ae4..2433e7a1 100644 --- a/man/mockOmopSketch.Rd +++ b/man/mockOmopSketch.Rd @@ -34,5 +34,8 @@ A mock cdm_reference object. Creates a mock database to test OmopSketch package. } \examples{ +\donttest{ +library(OmopSketch) mockOmopSketch(numberIndividuals = 100) } +} diff --git a/man/plotConceptCounts.Rd b/man/plotConceptCounts.Rd index 2492a3d5..dc5387f8 100644 --- a/man/plotConceptCounts.Rd +++ b/man/plotConceptCounts.Rd @@ -23,7 +23,7 @@ Plot the concept counts of a summariseConceptCounts output. } \examples{ \donttest{ -library(dplyr) +library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() @@ -36,8 +36,8 @@ result <- cdm |> ) result |> - filter(variable_name == "Number subjects") |> - plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") + filter(estimate_name == "person_count", variable_name == "overall") |> + plotConceptCounts(facet = "codelist_name", colour = "codelist_name") PatientProfiles::mockDisconnect(cdm) } diff --git a/man/plotInObservation.Rd b/man/plotInObservation.Rd index f0d322ae..a16a84c6 100644 --- a/man/plotInObservation.Rd +++ b/man/plotInObservation.Rd @@ -23,7 +23,7 @@ Create a ggplot2 plot from the output of summariseInObservation(). } \examples{ \donttest{ -library(dplyr) +library(dplyr, warn.conflicts = FALSE) cdm <- mockOmopSketch() diff --git a/man/summariseConceptCounts.Rd b/man/summariseConceptCounts.Rd index 4e826319..3132736d 100644 --- a/man/summariseConceptCounts.Rd +++ b/man/summariseConceptCounts.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/summariseConceptCounts.R \name{summariseConceptCounts} \alias{summariseConceptCounts} -\title{Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted.} +\title{Summarise code use in patient-level data} \usage{ summariseConceptCounts( cdm, conceptId, countBy = c("record", "person"), concept = TRUE, - interval = "overall", + year = FALSE, sex = FALSE, ageGroup = NULL ) @@ -24,7 +24,7 @@ person-level counts} \item{concept}{TRUE or FALSE. If TRUE code use will be summarised by concept.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} \item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} @@ -36,11 +36,10 @@ A summarised_result object with results overall and, if specified, by strata. } \description{ -Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +Summarise code use in patient-level data } \examples{ \donttest{ -library(OmopSketch) cdm <- mockOmopSketch() @@ -51,6 +50,5 @@ results <- summariseConceptCounts(cdm, conceptId = cs) results PatientProfiles::mockDisconnect(cdm) - } } diff --git a/man/summariseInObservation.Rd b/man/summariseInObservation.Rd index 59c4e224..db3b8c2d 100644 --- a/man/summariseInObservation.Rd +++ b/man/summariseInObservation.Rd @@ -7,7 +7,8 @@ time.} \usage{ summariseInObservation( observationPeriod, - interval = "overall", + unit = "year", + unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE @@ -17,7 +18,10 @@ summariseInObservation( \item{observationPeriod}{An observation_period omop table. It must be part of a cdm_reference object.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{unit}{Whether to stratify by "year" or by "month".} + +\item{unitInterval}{Number of years or months to include within the time +interval.} \item{output}{Output format. It can be either the number of records ("records") that are in observation in the specific interval of time, the @@ -43,7 +47,8 @@ cdm <- mockOmopSketch() result <- summariseInObservation( cdm$observation_period, - interval = "months", + unit = "month", + unitInterval = 6, output = c("person-days","records"), ageGroup = list("<=60" = c(0,60), ">60" = c(61, Inf)), sex = TRUE @@ -53,6 +58,5 @@ result |> glimpse() PatientProfiles::mockDisconnect(cdm) - } } diff --git a/man/summarisePopulationCharacteristics.Rd b/man/summarisePopulationCharacteristics.Rd new file mode 100644 index 00000000..528473f3 --- /dev/null +++ b/man/summarisePopulationCharacteristics.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarisePopulationCharacteristics.R +\name{summarisePopulationCharacteristics} +\alias{summarisePopulationCharacteristics} +\title{Summarise the characteristics of the base population of a cdm_reference +object.} +\usage{ +summarisePopulationCharacteristics( + cdm, + studyPeriod = c(NA, NA), + sex = FALSE, + ageGroup = NULL +) +} +\arguments{ +\item{cdm}{A cdm_reference object.} + +\item{studyPeriod}{Dates to trim the observation period. If NA, +min(observation_period_start_date) and/or max(observation_period_end_date) +are used.} + +\item{sex}{Whether to stratify the results by sex.} + +\item{ageGroup}{List of age groups to stratify by at index date.} +} +\value{ +A summarised_result object. +} +\description{ +Summarise the characteristics of the base population of a cdm_reference +object. +} +\examples{ +\donttest{ +cdm <- mockOmopSketch() + +summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("2010-01-01", NA), + sex = TRUE, + ageGroup = NULL +) + +summarisedPopulation |> print() + +PatientProfiles::mockDisconnect(cdm = cdm) +} +} diff --git a/man/summariseRecordCount.Rd b/man/summariseRecordCount.Rd index 07d4c507..b502d76c 100644 --- a/man/summariseRecordCount.Rd +++ b/man/summariseRecordCount.Rd @@ -8,7 +8,8 @@ records that fall within the observation period are considered.} summariseRecordCount( cdm, omopTableName, - interval = "overall", + unit = "year", + unitInterval = 1, ageGroup = NULL, sex = FALSE ) @@ -18,7 +19,10 @@ summariseRecordCount( \item{omopTableName}{A character vector of omop tables from the cdm.} -\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} +\item{unit}{Time unit it can either be "year" or "month".} + +\item{unitInterval}{Number of years or months to include within the same +interval.} \item{ageGroup}{A list of age groups to stratify results by.} @@ -40,7 +44,8 @@ cdm <- mockOmopSketch() summarisedResult <- summariseRecordCount( cdm = cdm, omopTableName = c("condition_occurrence", "drug_exposure"), - interval = "years", + unit = "year", + unitInterval = 10, ageGroup = list("<=20" = c(0,20), ">20" = c(21, Inf)), sex = TRUE ) diff --git a/man/tablePopulationCharacteristics.Rd b/man/tablePopulationCharacteristics.Rd new file mode 100644 index 00000000..c395f68b --- /dev/null +++ b/man/tablePopulationCharacteristics.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tablePopulationCharacteristics.R +\name{tablePopulationCharacteristics} +\alias{tablePopulationCharacteristics} +\title{Create a visual table from a summarise_population_characteristics result.} +\usage{ +tablePopulationCharacteristics(result, type = "gt") +} +\arguments{ +\item{result}{Output from summarisePopulationCharacteristics().} + +\item{type}{Type of formatting output table, either "gt" or "flextable".} +} +\value{ +A gt or flextable object with the summarised data. +} +\description{ +Create a visual table from a summarise_population_characteristics result. +} +\examples{ +\donttest{ +cdm <- mockOmopSketch() + +summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("2010-01-01", NA), + sex = TRUE, + ageGroup = list("<=60" = c(0, 60), ">60" = c(61, Inf)) +) + +summarisedPopulation |> + suppress(minCellCount = 5) |> + tablePopulationCharacteristics() + +PatientProfiles::mockDisconnect(cdm = cdm) +} +} diff --git a/tests/testthat/test-plotInObservation.R b/tests/testthat/test-plotInObservation.R index 69a06126..b937eeeb 100644 --- a/tests/testthat/test-plotInObservation.R +++ b/tests/testthat/test-plotInObservation.R @@ -4,17 +4,17 @@ test_that("plotInObservation works", { cdm <- cdmEunomia() # summariseInObservationPlot plot ---- - x <- summariseInObservation(cdm$observation_period, interval = "years") + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8) expect_no_error(inherits(plotInObservation(x), "ggplot")) x <- x |> dplyr::filter(result_id == -1) expect_error(plotInObservation(x)) - expect_error(plotInObservation(summariseInObservation(cdm$observation_period, interval = "years", output = c("person-days", "records"), ageGroup = NULL, sex = FALSE))) + expect_error(plotInObservation(summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = c("person-days", "records"), ageGroup = NULL, sex = FALSE))) - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", ageGroup = NULL, sex = FALSE) + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = "person-days", ageGroup = NULL, sex = FALSE) expect_true(inherits(plotInObservation(x), "ggplot")) - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "records", ageGroup = NULL, sex = FALSE) + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1, output = "records", ageGroup = NULL, sex = FALSE) expect_true(inherits(plotInObservation(x), "ggplot")) result <- cdm$observation_period |> diff --git a/tests/testthat/test-summariseAllConceptCounts.R b/tests/testthat/test-summariseAllConceptCounts.R deleted file mode 100644 index f90bbc62..00000000 --- a/tests/testthat/test-summariseAllConceptCounts.R +++ /dev/null @@ -1,59 +0,0 @@ -test_that("summariseAllConceptCount works", { - skip_on_cran() - - cdm <- cdmEunomia() - - expect_true(inherits(summariseAllConceptCounts(cdm, "drug_exposure"), "summarised_result")) - expect_warning(summariseAllConceptCounts(cdm, "observation_period")) - expect_no_error(x <- summariseAllConceptCounts(cdm, "visit_occurrence")) - expect_no_error(summariseAllConceptCounts(cdm, "condition_occurrence", countBy = c("record", "person"))) - expect_no_error(summariseAllConceptCounts(cdm, "drug_exposure")) - expect_no_error(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "person")) - expect_warning(summariseAllConceptCounts(cdm, "device_exposure")) - expect_no_error(y <- summariseAllConceptCounts(cdm, "measurement")) - expect_no_error(summariseAllConceptCounts(cdm, "observation", year = TRUE)) - expect_warning(summariseAllConceptCounts(cdm, "death")) - - expect_no_error(all <- summariseAllConceptCounts(cdm, c("visit_occurrence", "measurement"))) - expect_equal(all, dplyr::bind_rows(x, y)) - expect_equal(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "record"), summariseAllConceptCounts(cdm, "procedure_occurrence")) - - expect_error(summariseAllConceptCounts(cdm, omopTableName = "")) - expect_error(summariseAllConceptCounts(cdm, omopTableName = "visit_occurrence", countBy = "dd")) - - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> - dplyr::distinct(.data$strata_level) |> - dplyr::tally() |> - dplyr::pull() == 9) - - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", ageGroup = list(c(0, 50))) |> - dplyr::distinct(.data$strata_level) |> - dplyr::tally() |> - dplyr::pull() == 3) - - s <- summariseAllConceptCounts(cdm, "procedure_occurrence") - z <- summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, year = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) - - x <- z |> - dplyr::filter(strata_level == "overall") |> - dplyr::select(variable_level, estimate_value) - s <- s |> - dplyr::select(variable_level, estimate_value) - expect_equal(x, s) - - x <- z |> - dplyr::filter(strata_name == "age_group") |> - dplyr::group_by(variable_level) |> - dplyr::summarise(estimate_value = sum(as.numeric(estimate_value), na.rm = TRUE), .groups = "drop") |> - dplyr::mutate(estimate_value = as.character(estimate_value)) - - p <- s |> - dplyr::select(variable_level, estimate_value) - - expect_true(all.equal( - as.data.frame(x) |> dplyr::arrange(variable_level), - as.data.frame(p) |> dplyr::arrange(variable_level), - check.attributes = FALSE - )) - -}) diff --git a/tests/testthat/test-summariseClinicalRecords.R b/tests/testthat/test-summariseClinicalRecords.R index f2b76f4c..82b65cf3 100644 --- a/tests/testthat/test-summariseClinicalRecords.R +++ b/tests/testthat/test-summariseClinicalRecords.R @@ -193,17 +193,14 @@ test_that("summariseClinicalRecords() sex and ageGroup argument work", { cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) - result <- summariseClinicalRecords( - cdm = cdm, - omopTableName = "observation_period", - inObservation = FALSE, - standardConcept = FALSE, - sourceVocabulary = FALSE, - domainId = FALSE, - typeConcept = FALSE, - sex = TRUE, - ageGroup = list("old" = c(10, Inf), "young" = c(0, 9)) - ) + result <- summariseClinicalRecords(cdm, "observation_period", + inObservation = FALSE, + standardConcept = FALSE, + sourceVocabulary = FALSE, + domainId = FALSE, + typeConcept = FALSE, + sex = TRUE, + ageGroup = list("old" = c(10, Inf), "young" = c(0, 9))) # Check num records records <- result |> diff --git a/tests/testthat/test-summariseConceptCounts.R b/tests/testthat/test-summariseConceptCounts.R index 6e63e46e..c64300bc 100644 --- a/tests/testthat/test-summariseConceptCounts.R +++ b/tests/testthat/test-summariseConceptCounts.R @@ -9,9 +9,7 @@ test_that("summarise code use - eunomia", { startNames <- CDMConnector::listSourceTables(cdm) results <- summariseConceptCounts(cdm = cdm, conceptId = cs, - interval = "years", - countBy = c("record", "person"), - concept = TRUE, + year = TRUE, sex = TRUE, ageGroup = list(c(0,17), c(18,65), @@ -23,17 +21,16 @@ test_that("summarise code use - eunomia", { checkResultType(results, "summarise_concept_counts") # min cell counts: - expect_equal( - omopgenerics::suppress(results, 5) |> - visOmopResults::splitAdditional() |> + expect_true( + all(is.na( + omopgenerics::suppress(results) |> dplyr::filter( - strata_level == "overall", - variable_name == "Number records", - standard_concept_id == "overall", - time_interval == "1909-01-01 to 1909-12-31", - group_level == "acetiminophen") |> - dplyr::pull("estimate_value"), - as.character(NA) + variable_name == "overall", + strata_level == "1909", + group_level == "acetiminophen" + ) |> + dplyr::pull("estimate_value") + )) ) # check is a summarised result @@ -43,17 +40,16 @@ test_that("summarise code use - eunomia", { # overall record count expect_true(results %>% - dplyr::filter(group_name == "codelist_name", - strata_name == "overall", - strata_level == "overall", - additional_level == "overall", - group_level == "acetiminophen", - variable_name == "Number records") %>% + dplyr::filter(group_name == "codelist_name" & + strata_name == "overall" & + strata_level == "overall" & + group_level == "acetiminophen" & + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% - dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> - dplyr::filter(drug_concept_id %in% c(acetiminophen)) %>% + dplyr::filter(drug_concept_id %in% acetiminophen) %>% dplyr::tally() %>% dplyr::pull("n")) @@ -63,8 +59,8 @@ test_that("summarise code use - eunomia", { strata_name == "overall" & strata_level == "overall" & group_level == "acetiminophen" & - variable_name == "Number subjects", - additional_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -77,13 +73,12 @@ test_that("summarise code use - eunomia", { # by year # overall record count expect_true(results %>% - visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & - strata_name == "overall" & - time_interval == "2008-01-01 to 2008-12-31" & + strata_name == "year" & + strata_level == "2008" & group_level == "acetiminophen" & - variable_name == "Number records", - standard_concept_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -94,13 +89,12 @@ test_that("summarise code use - eunomia", { # overall person count expect_true(results %>% - visOmopResults::splitAdditional() |> dplyr::filter(group_name == "codelist_name" & - strata_name == "overall" & - time_interval == "2008-01-01 to 2008-12-31" & + strata_name == "year" & + strata_level == "2008" & group_level == "acetiminophen" & - variable_name == "Number subjects", - standard_concept_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -118,8 +112,8 @@ test_that("summarise code use - eunomia", { strata_name == "sex" & strata_level == "Male" & group_level == "acetiminophen" & - variable_name == "Number records" & - additional_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -134,8 +128,8 @@ test_that("summarise code use - eunomia", { strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & group_level == "acetiminophen" & - variable_name == "Number records", - additional_name == "overall") %>% + estimate_name == "record_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -154,8 +148,8 @@ test_that("summarise code use - eunomia", { strata_name == "age_group &&& sex" & strata_level == "18 to 65 &&& Male" & group_level == "acetiminophen" & - variable_name == "Number subjects", - additional_name == "overall") %>% + estimate_name == "person_count", + variable_name == "overall") %>% dplyr::pull("estimate_value") |> as.numeric() == cdm$drug_exposure %>% @@ -170,140 +164,92 @@ test_that("summarise code use - eunomia", { dplyr::tally() %>% dplyr::pull("n")) - results1 <- summariseConceptCounts(cdm = cdm, - conceptId = cs, - interval = "years", - concept = FALSE, - sex = TRUE, - ageGroup = list(c(0,17), - c(18,65), - c(66, 100))) - - expect_equal( - results1 |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number records") |> - dplyr::arrange(dplyr::across(dplyr::everything())), - results |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number records", standard_concept_name == "overall") |> - dplyr::select(-c(starts_with("standard_"), starts_with("source_"), "domain_id")) |> - dplyr::arrange(dplyr::across(dplyr::everything())) - ) - expect_true(results1 |> - visOmopResults::splitAdditional() |> - dplyr::filter(variable_name == "Number subjects", - group_level == "acetiminophen", - time_interval == "1909-01-01 to 1909-12-31", - strata_level == "0 to 17") |> - dplyr::pull("estimate_value") |> - as.numeric() == - cdm$drug_exposure %>% - dplyr::filter(drug_concept_id %in% acetiminophen) %>% - PatientProfiles::addAge(indexDate = "drug_exposure_start_date") %>% - PatientProfiles::addSex() %>% - dplyr::filter(age >= "0", age <= "17", clock::get_year(drug_exposure_start_date) == 1909) |> - dplyr::select("person_id") %>% - dplyr::distinct() %>% - dplyr::tally() %>% - dplyr::pull("n")) - expect_true(results1$group_level |> unique() |> length() == 2) - results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "person", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number subjects")) > 0) + dplyr::filter(estimate_name == "person_count")) > 0) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number records")) == 0) - + dplyr::filter(estimate_name == "record_count")) == 0) results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "record", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number subjects")) == 0) + dplyr::filter(estimate_name == "person_count")) == 0) expect_true(nrow(results %>% - dplyr::filter(variable_name == "Number records")) > 0) + dplyr::filter(estimate_name == "record_count")) > 0) # domains covered # condition expect_true(nrow(summariseConceptCounts(list(cs= c(4112343)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # visit expect_true(nrow(summariseConceptCounts(list(cs= c(9201)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # drug expect_true(nrow(summariseConceptCounts(list(cs= c(40213160)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # measurement expect_true(nrow(summariseConceptCounts(list(cs= c(3006322)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # procedure and condition expect_true(nrow(summariseConceptCounts(list(cs= c(4107731,4112343)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL))>1) # no records expect_message(results <- summariseConceptCounts(list(cs= c(999999)), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_true(nrow(results) == 0) # conceptId NULL (but reduce the computational time by filtering concepts first) - # cdm$concept <- cdm$concept |> - # dplyr::filter(grepl("k", concept_name)) - # - # skip("conceptId = NULL not supported yet") - # results <- summariseConceptCounts(cdm = cdm, - # year = FALSE, - # sex = FALSE, - # ageGroup = NULL) - # - # results_concepts <- results |> - # dplyr::select(variable_name) |> - # dplyr::distinct() |> - # dplyr::pull() - # concepts <- cdm$concept |> - # dplyr::select(concept_name) |> - # dplyr::distinct() |> - # dplyr::pull() - # - # expect_true(all(results_concepts %in% c("overall",concepts))) + cdm$concept <- cdm$concept |> + dplyr::filter(grepl("k", concept_name)) - # check attributes + skip("conceptId = NULL not supported yet") results <- summariseConceptCounts(cdm = cdm, - conceptId = cs, - interval = "years", - sex = TRUE, - ageGroup = list(c(0,17), - c(18,65), - c(66, 100))) + year = FALSE, + sex = FALSE, + ageGroup = NULL) + + results_concepts <- results |> + dplyr::select(variable_name) |> + dplyr::distinct() |> + dplyr::pull() + concepts <- cdm$concept |> + dplyr::select(concept_name) |> + dplyr::distinct() |> + dplyr::pull() + expect_true(all(results_concepts %in% c("overall",concepts))) + + # check attributes expect_true(omopgenerics::settings(results)$package_name == "OmopSketch") expect_true(omopgenerics::settings(results)$result_type == "summarise_concept_counts") expect_true(omopgenerics::settings(results)$package_version == packageVersion("OmopSketch")) @@ -311,42 +257,42 @@ test_that("summarise code use - eunomia", { # expected errors# expected errors expect_error(summariseConceptCounts("not a concept", cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts("123", cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list("123"), # not named cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = "not a cdm", - interval = "years", + year = FALSE, sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "Maybe", + year = "Maybe", sex = FALSE, ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = "Maybe", ageGroup = NULL)) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = list(c(18,17)))) expect_error(summariseConceptCounts(list(a = 123), cdm = cdm, - interval = "years", + year = FALSE, sex = FALSE, ageGroup = list(c(0,17), c(15,20)))) @@ -355,153 +301,129 @@ test_that("summarise code use - eunomia", { }) test_that("summarise code use - mock data", { - skip_on_cran() - person <- tibble::tibble( - person_id = c(1L,2L), - gender_concept_id = c(8532L,8507L), - year_of_birth = c(1997L,1963L), - month_of_birth = c(8L,1L), - day_of_birth = c(22L,27L), - race_concept_id = c(1L,1L), - ethnicity_concept_id = c(1L,1L) - ) - observation_period <- tibble::tibble( - person_id = c(1L,2L), - observation_period_id = c(1L,2L), - observation_period_start_date = c(as.Date("2000-06-03"), as.Date("1999-05-04")), - observation_period_end_date = c(as.Date("2013-08-03"), as.Date("2004-01-04")), - period_type_concept_id = c(1L,1L) - ) - condition_occurrence <- tibble::tibble( - person_id = c(1L,1L,1L,2L,2L,2L,2L,2L), - condition_concept_id = c(1L,3L,5L,1L,5L,5L,17L,17L), - condition_start_date = c(as.Date("2002-06-30"), as.Date("2004-05-29"), as.Date("2001-12-20"), - as.Date("2000-03-10"), as.Date("2000-02-25"), as.Date("1999-07-15"), - as.Date("1999-06-06"), as.Date("2000-07-17")), - condition_end_date = c(as.Date("2004-09-30"), as.Date("2009-05-29"), as.Date("2008-12-20"), - as.Date("2001-03-10"), as.Date("2001-12-25"), as.Date("2001-07-15"), - as.Date("2002-06-06"), as.Date("2000-11-17")), - condition_occurrence_id = c(1L,2L,3L,4L,5L,6L,7L,8L), - condition_type_concept_id = c(1L), - condition_source_concept_id = c(as.integer(NA)) - ) - concept <- tibble::tibble( - concept_id = c(1L,3L,5L,17L), - concept_name = c("Musculoskeletal disorder", "Arthritis", "Osteoarthritis of hip", "Arthritis"), - domain_id = c("Condition"), - standard_concept = c("S","S","S",NA), - vocabulary_id = c("SNOMED", "SNOMED", "SNOMED", "ICD10"), - concept_class_id = c("Clinical Finding", "Clinical Finding", "Clinical Finding", "ICD Code"), - concept_code = c("1234"), - valid_start_date = c(as.Date(NA)), - valid_end_date = c(as.Date(NA)) - ) - - cdm <- omopgenerics::cdmFromTables( - tables = list( - person = person, - observation_period = observation_period, - condition_occurrence = condition_occurrence, - concept = concept - ), - cdmName = "mock data" - ) - cdm <- CDMConnector::copyCdmTo( - con = connection(), cdm = cdm, schema = schema()) - - conceptId <- list( - "Arthritis" = c(17,3), - "Musculoskeletal disorder" = c(1), - "Osteoarthritis of hip" = c(5) - ) - - result <- summariseConceptCounts(cdm, conceptId) - - # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2", "1", "1", "1")) - - # Osteoarthritis (code 5), two records ind 2, one record ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Osteoarthritis of hip", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("3","2")) - - # Musculoskeletal disorder (code 1), one record each ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder", - strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) - - result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) - # Individuals belong to the same age group but to different sex groups - - # Arthritis (codes 3 and 17), one record of each per ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis" & strata_level == "Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name =="Arthritis" & strata_level == "3 to 150 &&& Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Arthritis" & strata_level == "3 to 150") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","1","1","1")) - - # Osteoarthritis of hip (code 5), two records ind 2 and one ind 1 - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Osteoarthritis of hip" & strata_level == "Female") |> - dplyr::tally() |> - dplyr::pull(), - 2) - - # Musculoskeletal disorder (code 1), one record each ind - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("1","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("1","1")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) - expect_equal(result |> - visOmopResults::splitAdditional() |> - dplyr::filter(standard_concept_name == "Musculoskeletal disorder" & strata_level == "overall") |> - dplyr::arrange(standard_concept_id, variable_name) |> - dplyr::pull(estimate_value), - c("2","2")) - - PatientProfiles::mockDisconnect(cdm) + # skip_on_cran() + # + # person <- tibble::tibble( + # person_id = c(1L,2L), + # gender_concept_id = c(8532L,8507L), + # year_of_birth = c(1997L,1963L), + # month_of_birth = c(8L,1L), + # day_of_birth = c(22L,27L), + # race_concept_id = c(1L,1L), + # ethnicity_concept_id = c(1L,1L) + # ) + # observation_period <- tibble::tibble( + # person_id = c(1L,2L), + # observation_period_id = c(1L,2L), + # observation_period_start_date = c(as.Date("2000-06-03"), as.Date("1999-05-04")), + # observation_period_end_date = c(as.Date("2013-08-03"), as.Date("2004-01-04")), + # period_type_concept_id = c(1L,1L) + # ) + # condition_occurrence <- tibble::tibble( + # person_id = c(1L,1L,1L,2L,2L,2L,2L,2L), + # condition_concept_id = c(1L,3L,5L,1L,5L,5L,17L,17L), + # condition_start_date = c(as.Date("2002-06-30"), as.Date("2004-05-29"), as.Date("2001-12-20"), + # as.Date("2000-03-10"), as.Date("2000-02-25"), as.Date("1999-07-15"), + # as.Date("1999-06-06"), as.Date("2000-07-17")), + # condition_end_date = c(as.Date("2004-09-30"), as.Date("2009-05-29"), as.Date("2008-12-20"), + # as.Date("2001-03-10"), as.Date("2001-12-25"), as.Date("2001-07-15"), + # as.Date("2002-06-06"), as.Date("2000-11-17")), + # condition_occurrence_id = c(1L,2L,3L,4L,5L,6L,7L,8L), + # condition_type_concept_id = c(1L), + # condition_source_concept_id = c(as.integer(NA)) + # ) + # concept <- tibble::tibble( + # concept_id = c(1L,3L,5L,17L), + # concept_name = c("Musculoskeletal disorder", "Arthritis", "Osteoarthritis of hip", "Arthritis"), + # domain_id = c("Condition"), + # standard_concept = c("S","S","S",NA), + # concept_class_id = c("Clinical Finding", "Clinical Finding", "Clinical Finding", "ICD Code"), + # concept_code = c("1234"), + # valid_start_date = c(as.Date(NA)), + # valid_end_date = c(as.Date(NA)), + # vocabulary_id = as.character(NA) + # ) + # + # cdm <- omopgenerics::cdmFromTables( + # tables = list( + # person = person, + # observation_period = observation_period, + # condition_occurrence = condition_occurrence, + # concept = concept + # ), + # cdmName = "mock data" + # ) + # cdm <- CDMConnector::copyCdmTo( + # con = connection(), cdm = cdm, schema = schema()) + # + # conceptId <- list( + # "Arthritis" = c(17,3), + # "Musculoskeletal disorder" = c(1), + # "Osteoarthritis of hip" = c(5) + # ) + # + # result <- summariseConceptCounts(cdm, conceptId) + # + # # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 + # expect_true(all(result |> + # dplyr::filter(variable_name == "Arthritis") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c("1", "2", "1", "1"))) + # + # # Osteoarthritis (code 5), two records ind 2, one record ind 1 + # expect_true(all(result |> + # dplyr::filter(variable_name == "Osteoarthritis of hip") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(2,3))) + # + # # Musculoskeletal disorder (code 1), one record each ind + # expect_true(all(result |> + # dplyr::filter(variable_name == "Musculoskeletal disorder") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(2,2))) + # + # result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) + # # Individuals belong to the same age group but to different sex groups + # + # # Arthritis (codes 3 and 17), one record of each per ind + # expect_true(all(result |> + # dplyr::filter(variable_name == "Arthritis" & strata_level == "Male") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(1,2))) + # expect_true(all(result |> + # dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150 &&& Male") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(1,2))) + # expect_true(all(result |> + # dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(1,2,1,1))) + # + # # Osteoarthritis of hip (code 5), two records ind 2 and one ind 1 + # expect_true(all(result |> + # dplyr::filter(variable_name == "Osteoarthritis of hip" & strata_level == "Female") |> + # dplyr::tally() |> + # dplyr::pull() == 2)) + # + # # Musculoskeletal disorder (code 1), one record each ind + # expect_true(all(result |> + # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(1,1))) + # expect_true(all(result |> + # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(1,1))) + # expect_true(all(result |> + # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(2,2))) + # expect_true(all(result |> + # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "overall") |> + # dplyr::arrange(variable_level, estimate_name) |> + # dplyr::pull(estimate_value) == c(2,2))) + # + # PatientProfiles::mockDisconnect(cdm) }) test_that("plot concept counts works", { @@ -512,7 +434,7 @@ test_that("plot concept counts works", { # summariseInObservationPlot plot ---- x <- summariseConceptCounts(cdm, conceptId = list(codes = c(40213160))) expect_error(plotConceptCounts(x)) - x <- x |> dplyr::filter(variable_name == "Number records") + x <- x |> dplyr::filter(estimate_name == "record_count") expect_no_error(plotConceptCounts(x)) expect_true(inherits(plotConceptCounts(x), "ggplot")) @@ -520,7 +442,7 @@ test_that("plot concept counts works", { conceptId = list("polio" = c(40213160), "acetaminophen" = c(1125315, 1127433, 40229134, 40231925, 40162522, 19133768, 1127078))) expect_error(plotConceptCounts(x)) - x <- x |> dplyr::filter(variable_name == "Number records") + x <- x |> dplyr::filter(estimate_name == "record_count") expect_no_error(plotConceptCounts(x)) expect_message(plotConceptCounts(x)) expect_no_error(plotConceptCounts(x, facet = "codelist_name")) @@ -529,5 +451,6 @@ test_that("plot concept counts works", { x <- x |> dplyr::filter(result_id == -1) expect_error(plotInObservation(x)) + PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/tests/testthat/test-summariseInObservation.R b/tests/testthat/test-summariseInObservation.R index d2c7d105..024597b4 100644 --- a/tests/testthat/test-summariseInObservation.R +++ b/tests/testthat/test-summariseInObservation.R @@ -5,14 +5,14 @@ test_that("check summariseInObservation works", { # Check all tables work ---- expect_true(inherits(summariseInObservation(cdm$observation_period),"summarised_result")) - expect_true(inherits(summariseInObservation(cdm$observation_period, interval = "months"),"summarised_result")) - expect_true(inherits(summariseInObservation(cdm$observation_period, interval = "years"),"summarised_result")) + expect_true(inherits(summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 10),"summarised_result")) + expect_true(inherits(summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10),"summarised_result")) expect_error(summariseInObservation(cdm$death)) # Check inputs ---- - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 1) |> + dplyr::filter(variable_level == "1909-01-01 to 1909-12-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -24,13 +24,8 @@ test_that("check summariseInObservation works", { dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years") - expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") - x <- summariseInObservation(cdm$observation_period, interval = "overall") - expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") - - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == c("1936-01-01 to 1936-12-31"), estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 2) |> + dplyr::filter(variable_level == c("1936-01-01 to 1937-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -38,13 +33,13 @@ test_that("check summariseInObservation works", { dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% dplyr::filter((.data$start < 1936 & .data$end >= 1936) | - (.data$start >= 1936 & .data$start <= 1936)) |> + (.data$start >= 1936 & .data$start <= 1937)) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years") |> - dplyr::filter(additional_level == c("1998-01-01 to 1998-12-31"), estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10) |> + dplyr::filter(variable_level == c("1998-01-01 to 2007-12-31"), estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -52,14 +47,14 @@ test_that("check summariseInObservation works", { dplyr::mutate(start = !!CDMConnector::datepart("observation_period_start_date", "year")) %>% dplyr::mutate(end = !!CDMConnector::datepart("observation_period_end_date", "year")) %>% dplyr::filter((.data$start < 1998 & .data$end >= 1998) | - (.data$start >= 1998 & .data$start <= 1998)) |> + (.data$start >= 1998 & .data$start <= 2007)) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) # Check inputs ---- - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 1) |> + dplyr::filter(variable_level == "1942-03-01 to 1942-03-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% @@ -71,26 +66,26 @@ test_that("check summariseInObservation works", { expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "2015-09-01 to 2015-09-30", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 2) |> + dplyr::filter(variable_level == "2015-09-01 to 2015-10-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter( (observation_period_start_date < as.Date("2015-09-01") & observation_period_end_date >= as.Date("2015-09-01")) | - (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-09-30")) + (observation_period_start_date >= as.Date("2015-09-01") & observation_period_start_date <= as.Date("2015-10-31")) ) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "months") |> - dplyr::filter(additional_level == "1982-03-01 to 1982-03-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "month", unitInterval = 10) |> + dplyr::filter(variable_level == "1982-03-01 to 1982-12-31", estimate_name == "count") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period %>% dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1982-03-01") & observation_period_end_date >= as.Date("1982-03-01") | - (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-03-31"))) |> + (observation_period_start_date >= as.Date("1982-03-01") & observation_period_start_date <= as.Date("1982-12-31"))) |> dplyr::tally() |> dplyr::pull("n") |> as.numeric() expect_equal(x,y) @@ -103,50 +98,45 @@ test_that("check sex argument works", { cdm <- cdmEunomia() # Check overall - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) - expect_equal(x |> dplyr::filter(additional_level != "overall") |> dplyr::pull("additional_name") |> unique(), "time_interval") - x <- summariseInObservation(cdm$observation_period, interval = "overall") - expect_equal(x |> dplyr::filter(additional_level == "overall") |> dplyr::pull("additional_name") |> unique(), "overall") - - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("Male","Female"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level %in% c("Male","Female"), variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() |> sum() - y <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level %in% c("overall"), additional_level == "1908-01-01 to 1908-12-31", estimate_name == "count") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level %in% c("overall"), variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() expect_equal(x,y) y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | - (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1908-12-31"))) |> + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull() |> as.numeric() expect_equal(x,y) # Check a random group - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "count") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "count") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1915-01-01") & observation_period_end_date >= as.Date("1915-01-01") | - (observation_period_start_date >= as.Date("1915-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull() |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1915-01-01 to 1915-12-31", estimate_name == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1915-01-01") & observation_period_end_date >= as.Date("1915-01-01") | - (observation_period_start_date >= as.Date("1915-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull())/(cdm[["person"]] |> dplyr::tally() |> dplyr::pull() |> as.numeric())*100 expect_equal(x,y) @@ -159,16 +149,16 @@ test_that("check ageGroup argument works", { # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(summariseInObservation(cdm$observation_period, ageGroup = list(c(0,20), c(21, Inf)))) + expect_no_error(summariseClinicalRecords(cdm, "condition_occurrence", ageGroup = list(c(0,20), c(21, Inf)))) - x <- summariseInObservation(cdm$observation_period, interval = "years", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(additional_level == "1928-01-01 to 1928-12-31", estimate_name == "count", strata_level == "<=20") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 10, ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> + dplyr::filter(variable_level == "1928-01-01 to 1937-12-31", estimate_name == "count", strata_level == "<=20") |> dplyr::pull(estimate_value) |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1928-01-01") & observation_period_end_date >= as.Date("1928-01-01") | - (observation_period_start_date >= as.Date("1928-01-01") & observation_period_start_date <= as.Date("1928-12-31"))) |> - dplyr::mutate("start" = as.Date("1928-01-01"), "end" = as.Date("1928-12-31")) |> + (observation_period_start_date >= as.Date("1928-01-01") & observation_period_start_date <= as.Date("1937-12-31"))) |> + dplyr::mutate("start" = as.Date("1928-01-01"), "end" = as.Date("1937-12-31")) |> PatientProfiles::addAgeQuery(indexDate = "start", ageName = "age_start") %>% dplyr::mutate(age_end = age_start+10) |> dplyr::filter((age_end <= 20 & age_end >= 0) | (age_start >= 0 & age_start <= 20)) |> @@ -176,15 +166,15 @@ test_that("check ageGroup argument works", { dplyr::pull() |> as.numeric() expect_equal(x,y) - x <- summariseInObservation(cdm$observation_period, interval = "years", sex = TRUE) |> - dplyr::filter(strata_level == "Male", additional_level == "1918-01-01 to 1918-12-31", estimate_name == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 8, sex = TRUE) |> + dplyr::filter(strata_level == "Male", variable_level == "1908-01-01 to 1915-12-31", estimate_name == "percentage") |> dplyr::pull(estimate_value) |> as.numeric() y <- (cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% PatientProfiles::addSexQuery() |> dplyr::filter(sex == "Male") |> - dplyr::filter(observation_period_start_date < as.Date("1918-01-01") & observation_period_end_date >= as.Date("1918-01-01") | - (observation_period_start_date >= as.Date("1918-01-01") & observation_period_start_date <= as.Date("1918-12-31"))) |> + dplyr::filter(observation_period_start_date < as.Date("1908-01-01") & observation_period_end_date >= as.Date("1908-01-01") | + (observation_period_start_date >= as.Date("1908-01-01") & observation_period_start_date <= as.Date("1915-12-31"))) |> dplyr::tally() |> dplyr::pull())/(cdm[["person"]] |> dplyr::tally() |> dplyr::pull() |> as.numeric())*100 expect_equal(x,y) @@ -198,14 +188,14 @@ test_that("check output argument works", { cdm <- cdmEunomia() # check value - x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1970-01-01 to 1970-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% - dplyr::filter(observation_period_start_date < as.Date("1970-01-01") & observation_period_end_date >= as.Date("1970-01-01") | - (observation_period_start_date >= as.Date("1970-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> - dplyr::mutate("start_date" = as.Date("1970-01-01"), "end_date" = as.Date("1970-12-31")) %>% + dplyr::filter(observation_period_start_date < as.Date("1964-01-01") & observation_period_end_date >= as.Date("1964-01-01") | + (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> + dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1970-12-31")) %>% dplyr::mutate("start_date" = pmax(start_date, observation_period_start_date, na.rm = TRUE), "end_date" = pmin(end_date, observation_period_end_date, na.rm = TRUE)) %>% dplyr::mutate(days = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> @@ -217,14 +207,14 @@ test_that("check output argument works", { dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::mutate(days = !!CDMConnector::datediff("observation_period_start_date","observation_period_end_date", interval = "day")+1) |> dplyr::summarise(n = sum(days, na.rm = TRUE)) |> dplyr::pull("n") - x <- summariseInObservation(cdm$observation_period, interval = "years", output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "percentage") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = c("records","person-days"), ageGroup = NULL, sex = FALSE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "percentage") |> dplyr::pull("estimate_value") |> as.numeric() y <- cdm$observation_period |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") %>% dplyr::filter(observation_period_start_date < as.Date("1964-01-01") & observation_period_end_date >= as.Date("1964-01-01") | - (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1964-12-31"))) |> - dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1964-12-31")) %>% + (observation_period_start_date >= as.Date("1964-01-01") & observation_period_start_date <= as.Date("1970-12-31"))) |> + dplyr::mutate("start_date" = as.Date("1964-01-01"), "end_date" = as.Date("1970-12-31")) %>% dplyr::mutate("start_date" = pmax(start_date, observation_period_start_date, na.rm = TRUE), "end_date" = pmin(end_date, observation_period_end_date, na.rm = TRUE)) %>% dplyr::mutate(days = !!CDMConnector::datediff("start_date","end_date", interval = "day")+1) |> @@ -232,20 +222,20 @@ test_that("check output argument works", { expect_equal(x,y) # Check sex stratified - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() - y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "1964-01-01 to 1964-12-31", estimate_type == "integer") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) # Check age stratified - x <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + x <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", ageGroup = list("<=20" = c(0,20), ">20" = c(21,Inf))) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level == "overall") |> dplyr::pull("estimate_value") |> as.numeric() - y <- summariseInObservation(cdm$observation_period, interval = "years", output = "person-days", sex = TRUE) |> - dplyr::filter(variable_name == "Number person-days", additional_level == "2000-01-01 to 2000-12-31", estimate_type == "integer") |> + y <- summariseInObservation(cdm$observation_period, unit = "year", unitInterval = 7, output = "person-days", sex = TRUE) |> + dplyr::filter(variable_name == "Number person-days", variable_level == "1964-01-01 to 1970-12-31", estimate_type == "integer") |> dplyr::filter(strata_level != "overall") |> dplyr::pull("estimate_value") |> as.numeric() |> sum() expect_equal(x,y) diff --git a/tests/testthat/test-summariseMissingData.R b/tests/testthat/test-summariseMissingData.R deleted file mode 100644 index 765d749c..00000000 --- a/tests/testthat/test-summariseMissingData.R +++ /dev/null @@ -1,52 +0,0 @@ -test_that("summariseMissingData() works", { - skip_on_cran() - # Load mock database ---- - cdm <- cdmEunomia() - - # Check all tables work ---- - expect_true(inherits(summariseMissingData(cdm, "drug_exposure"),"summarised_result")) - expect_no_error(y<-summariseMissingData(cdm, "observation_period")) - expect_no_error(x<-summariseMissingData(cdm, "visit_occurrence")) - expect_no_error(summariseMissingData(cdm, "condition_occurrence")) - expect_no_error(summariseMissingData(cdm, "drug_exposure")) - expect_no_error(summariseMissingData(cdm, "procedure_occurrence", year = TRUE)) - expect_warning(summariseMissingData(cdm, "device_exposure")) - expect_no_error(z<-summariseMissingData(cdm, "measurement")) - expect_no_error(s<-summariseMissingData(cdm, "observation")) - expect_warning(summariseMissingData(cdm, "death")) - - - expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement"))) - expect_equal(all, dplyr::bind_rows(y, x, z)) - expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[['observation']]))) - x<-summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date") - - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = "procedure_date"), dplyr::bind_rows(x,s)) - y<-summariseMissingData(cdm, "observation",col = "observation_date") - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x,y)) - - # Check inputs ---- - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id")|> - dplyr::select(estimate_value)|> - dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> - dplyr::summarise(sum = sum(estimate_value)) |> - dplyr::pull() == 0) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", sex = TRUE, ageGroup = list(c(0,50), c(51,Inf)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==9) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", ageGroup = list(c(0,50)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==3) - - cdm$procedure_occurrence <- cdm$procedure_occurrence |> - dplyr::mutate(procedure_concept_id = NA_integer_) |> - dplyr::compute(name = "procedure_occurrence", temporary = FALSE) - - expect_warning(summariseMissingData(cdm, "procedure_occurrence", col="procedure_concept_id", ageGroup = list(c(0,50)))) - - PatientProfiles::mockDisconnect(cdm = cdm) -}) diff --git a/tests/testthat/test-summariseObservationPeriod.R b/tests/testthat/test-summariseObservationPeriod.R index 2d8b53c1..8ba1e920 100644 --- a/tests/testthat/test-summariseObservationPeriod.R +++ b/tests/testthat/test-summariseObservationPeriod.R @@ -1,274 +1,275 @@ test_that("check summariseObservationPeriod works", { - skip_on_cran() - # helper function - removeSettings <- function(x) { - attr(x, "settings") <- NULL - return(x) - } - nPoints <- 512 - - # Load mock database - cdm <- omopgenerics::cdmFromTables( - tables = list( - person = dplyr::tibble( - person_id = as.integer(1:4), - gender_concept_id = c(8507L, 8532L, 8532L, 8507L), - year_of_birth = 2010L, - month_of_birth = 1L, - day_of_birth = 1L, - race_concept_id = 0L, - ethnicity_concept_id = 0L - ), - observation_period = dplyr::tibble( - observation_period_id = as.integer(1:8), - person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), - observation_period_start_date = as.Date(c( - "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", - "2020-03-01", "2020-04-10", "2020-03-10" - )), - observation_period_end_date = as.Date(c( - "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", - "2020-03-09", "2020-05-08", "2020-12-10" - )), - period_type_concept_id = 0L - ) - ), - cdmName = "mock data" - ) - cdm <- CDMConnector::copyCdmTo( - con = connection(), cdm = cdm, schema = schema()) - - # simple run - expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period)) - expect_no_error( - resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density")) - expect_no_error( - resAllN <- summariseObservationPeriod(cdm$observation_period, - estimates = c( - "mean", "sd", "min", "q05", "q25", - "median", "q75", "q95", "max"))) - expect_equal( - resAllD |> dplyr::filter(!is.na(variable_level)) |> - dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(), - resAll |> dplyr::filter(!is.na(variable_level)) |> - dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings() - ) - - # test estimates - expect_no_error( - resEst <- cdm$observation_period |> - summariseObservationPeriod(estimates = c("mean", "median"))) - expect_true(all( - resEst |> - dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |> - dplyr::pull("estimate_name") |> - unique() %in% c("mean", "median") - )) - - # counts - expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8") - x <- dplyr::tibble( - group_level = c("overall", "1st", "2nd", "3rd"), - variable_name = "number subjects", - estimate_value = c("4", "4", "3", "1")) - expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) - - # records per person - expect_identical( - resAll |> - dplyr::filter( - variable_name == "records per person", estimate_name == "mean") |> - dplyr::pull("estimate_value"), - "2" - ) - - # duration - expect_identical( - resAll |> - dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |> - dplyr::pull("estimate_value"), - as.character(c( - mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)), - mean(c(6, 29, 144)), 113 - )) - ) - - # days to next observation period - expect_identical( - resAll |> - dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> - dplyr::pull("estimate_value"), - as.character(c( - mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA - )) - ) - - # duration - density - xx <- resAllD |> - dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |> - dplyr::group_by(group_level) |> - dplyr::summarise( - n = dplyr::n(), - area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( - max(as.numeric(estimate_value[estimate_name == "density_x"])) - - min(as.numeric(estimate_value[estimate_name == "density_x"])) - )/(nPoints - 1) - ) - expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L)) - expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0)) - - # days to next observation period - density - xx <- resAll |> - dplyr::filter(variable_name == "days to next observation period", - !is.na(variable_level)) |> - dplyr::group_by(group_level) |> - dplyr::summarise( - n = dplyr::n(), - area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( - max(as.numeric(estimate_value[estimate_name == "density_x"])) - - min(as.numeric(estimate_value[estimate_name == "density_x"])) - )/(nPoints - 1) - ) - expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L)) - expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1) - - # only one exposure per individual - cdm$observation_period <- cdm$observation_period |> - dplyr::group_by(person_id) |> - dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |> - dplyr::ungroup() |> - dplyr::compute(name = "observation_period", temporary = FALSE) - - expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period)) - - # counts - expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4") - x <- dplyr::tibble( - group_level = c("overall", "1st"), - variable_name = "number subjects", - estimate_value = c("4", "4")) - expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) - - # Check result type - checkResultType(resOne, "summarise_observation_period") - - # empty observation period - cdm$observation_period <- cdm$observation_period |> - dplyr::filter(person_id == 0) |> - dplyr::compute(name = "observation_period", temporary = FALSE) - - expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period)) - expect_true(nrow(resEmpty) == 2) - expect_identical(unique(resEmpty$estimate_value), "0") - - # table works - expect_no_error(tableObservationPeriod(resAll)) - expect_no_error(tableObservationPeriod(resOne)) - expect_no_error(tableObservationPeriod(resEmpty)) - - # plot works - expect_no_error(plotObservationPeriod(resAll)) - expect_no_error(plotObservationPeriod(resOne)) - # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION - - # check all plots combinations - expect_no_error( - resAll |> - plotObservationPeriod( - variableName = "number subjects", plotType = "barplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "number subjects", plotType = "boxplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "number subjects", plotType = "densityplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "number subjects", plotType = "random") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "duration in days", plotType = "barplot") - ) - expect_no_error( - resAll |> - plotObservationPeriod( - variableName = "duration in days", plotType = "boxplot") - ) - expect_error( - resAllN |> - plotObservationPeriod( - variableName = "duration in days", plotType = "densityplot") - ) - expect_no_error( - resAllD |> - plotObservationPeriod( - variableName = "duration in days", plotType = "densityplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "duration in days", plotType = "random") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "records per person", plotType = "barplot") - ) - expect_no_error( - resAll |> - plotObservationPeriod( - variableName = "records per person", plotType = "boxplot") - ) - expect_error( - resAllN |> - plotObservationPeriod( - variableName = "records per person", plotType = "densityplot") - ) - expect_no_error( - resAllD |> - plotObservationPeriod( - variableName = "records per person", plotType = "densityplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "records per person", plotType = "random") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "days to next observation period", plotType = "barplot") - ) - expect_no_error( - resAll |> - plotObservationPeriod( - variableName = "days to next observation period", plotType = "boxplot") - ) - expect_error( - resAllN |> - plotObservationPeriod( - variableName = "days to next observation period", plotType = "densityplot") - ) - expect_no_error( - resAllD |> - plotObservationPeriod( - variableName = "days to next observation period", plotType = "densityplot") - ) - expect_error( - resAll |> - plotObservationPeriod( - variableName = "days to next observation period", plotType = "random") - ) - - PatientProfiles::mockDisconnect(cdm = cdm) + # skip_on_cran() + # + # # helper function + # removeSettings <- function(x) { + # attr(x, "settings") <- NULL + # return(x) + # } + # nPoints <- 512 + # + # # Load mock database + # cdm <- omopgenerics::cdmFromTables( + # tables = list( + # person = dplyr::tibble( + # person_id = as.integer(1:4), + # gender_concept_id = c(8507L, 8532L, 8532L, 8507L), + # year_of_birth = 2010L, + # month_of_birth = 1L, + # day_of_birth = 1L, + # race_concept_id = 0L, + # ethnicity_concept_id = 0L + # ), + # observation_period = dplyr::tibble( + # observation_period_id = as.integer(1:8), + # person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), + # observation_period_start_date = as.Date(c( + # "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", + # "2020-03-01", "2020-04-10", "2020-03-10" + # )), + # observation_period_end_date = as.Date(c( + # "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", + # "2020-03-09", "2020-05-08", "2020-12-10" + # )), + # period_type_concept_id = 0L + # ) + # ), + # cdmName = "mock data" + # ) + # cdm <- CDMConnector::copyCdmTo( + # con = connection(), cdm = cdm, schema = schema()) + # + # # simple run + # expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period)) + # expect_no_error( + # resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density")) + # expect_no_error( + # resAllN <- summariseObservationPeriod(cdm$observation_period, + # estimates = c( + # "mean", "sd", "min", "q05", "q25", + # "median", "q75", "q95", "max"))) + # expect_equal( + # resAllD |> dplyr::filter(!is.na(variable_level)) |> + # dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(), + # resAll |> dplyr::filter(!is.na(variable_level)) |> + # dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings() + # ) + # + # # test estimates + # expect_no_error( + # resEst <- cdm$observation_period |> + # summariseObservationPeriod(estimates = c("mean", "median"))) + # expect_true(all( + # resEst |> + # dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |> + # dplyr::pull("estimate_name") |> + # unique() %in% c("mean", "median") + # )) + # + # # counts + # expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8") + # x <- dplyr::tibble( + # group_level = c("overall", "1st", "2nd", "3rd"), + # variable_name = "number subjects", + # estimate_value = c("4", "4", "3", "1")) + # expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) + # + # # records per person + # expect_identical( + # resAll |> + # dplyr::filter( + # variable_name == "records per person", estimate_name == "mean") |> + # dplyr::pull("estimate_value"), + # "2" + # ) + # + # # duration + # expect_identical( + # resAll |> + # dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |> + # dplyr::pull("estimate_value"), + # as.character(c( + # mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)), + # mean(c(6, 29, 144)), 113 + # )) + # ) + # + # # days to next observation period + # expect_identical( + # resAll |> + # dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> + # dplyr::pull("estimate_value"), + # as.character(c( + # mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA + # )) + # ) + # + # # duration - density + # xx <- resAllD |> + # dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |> + # dplyr::group_by(group_level) |> + # dplyr::summarise( + # n = dplyr::n(), + # area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( + # max(as.numeric(estimate_value[estimate_name == "density_x"])) - + # min(as.numeric(estimate_value[estimate_name == "density_x"])) + # )/(nPoints - 1) + # ) + # expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L)) + # expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0)) + # + # # days to next observation period - density + # xx <- resAll |> + # dplyr::filter(variable_name == "days to next observation period", + # !is.na(variable_level)) |> + # dplyr::group_by(group_level) |> + # dplyr::summarise( + # n = dplyr::n(), + # area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( + # max(as.numeric(estimate_value[estimate_name == "density_x"])) - + # min(as.numeric(estimate_value[estimate_name == "density_x"])) + # )/(nPoints - 1) + # ) + # expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L)) + # expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1) + # + # # only one exposure per individual + # cdm$observation_period <- cdm$observation_period |> + # dplyr::group_by(person_id) |> + # dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |> + # dplyr::ungroup() |> + # dplyr::compute(name = "observation_period", temporary = FALSE) + # + # expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period)) + # + # # counts + # expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4") + # x <- dplyr::tibble( + # group_level = c("overall", "1st"), + # variable_name = "number subjects", + # estimate_value = c("4", "4")) + # expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) + # + # # Check result type + # checkResultType(resOne, "summarise_observation_period") + # + # # empty observation period + # cdm$observation_period <- cdm$observation_period |> + # dplyr::filter(person_id == 0) |> + # dplyr::compute(name = "observation_period", temporary = FALSE) + # + # expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period)) + # expect_true(nrow(resEmpty) == 2) + # expect_identical(unique(resEmpty$estimate_value), "0") + # + # # table works + # expect_no_error(tableObservationPeriod(resAll)) + # expect_no_error(tableObservationPeriod(resOne)) + # expect_no_error(tableObservationPeriod(resEmpty)) + # + # # plot works + # expect_no_error(plotObservationPeriod(resAll)) + # expect_no_error(plotObservationPeriod(resOne)) + # # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION + # + # # check all plots combinations + # expect_no_error( + # resAll |> + # plotObservationPeriod( + # variableName = "number subjects", plotType = "barplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "number subjects", plotType = "boxplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "number subjects", plotType = "densityplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "number subjects", plotType = "random") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "duration in days", plotType = "barplot") + # ) + # expect_no_error( + # resAll |> + # plotObservationPeriod( + # variableName = "duration in days", plotType = "boxplot") + # ) + # expect_error( + # resAllN |> + # plotObservationPeriod( + # variableName = "duration in days", plotType = "densityplot") + # ) + # expect_no_error( + # resAllD |> + # plotObservationPeriod( + # variableName = "duration in days", plotType = "densityplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "duration in days", plotType = "random") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "records per person", plotType = "barplot") + # ) + # expect_no_error( + # resAll |> + # plotObservationPeriod( + # variableName = "records per person", plotType = "boxplot") + # ) + # expect_error( + # resAllN |> + # plotObservationPeriod( + # variableName = "records per person", plotType = "densityplot") + # ) + # expect_no_error( + # resAllD |> + # plotObservationPeriod( + # variableName = "records per person", plotType = "densityplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "records per person", plotType = "random") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "days to next observation period", plotType = "barplot") + # ) + # expect_no_error( + # resAll |> + # plotObservationPeriod( + # variableName = "days to next observation period", plotType = "boxplot") + # ) + # expect_error( + # resAllN |> + # plotObservationPeriod( + # variableName = "days to next observation period", plotType = "densityplot") + # ) + # expect_no_error( + # resAllD |> + # plotObservationPeriod( + # variableName = "days to next observation period", plotType = "densityplot") + # ) + # expect_error( + # resAll |> + # plotObservationPeriod( + # variableName = "days to next observation period", plotType = "random") + # ) + # + # PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("check it works with mockOmopSketch", { @@ -332,106 +333,107 @@ test_that("check it works with mockOmopSketch", { }) test_that("check summariseObservationPeriod strata works", { - skip_on_cran() - # helper function - removeSettings <- function(x) { - attr(x, "settings") <- NULL - return(x) - } - nPoints <- 512 - - # Load mock database - cdm <- omopgenerics::cdmFromTables( - tables = list( - person = dplyr::tibble( - person_id = as.integer(1:4), - gender_concept_id = c(8507L, 8532L, 8532L, 8507L), - year_of_birth = c(2010L, 2010L, 2011L, 2012L), - month_of_birth = 1L, - day_of_birth = 1L, - race_concept_id = 0L, - ethnicity_concept_id = 0L - ), - observation_period = dplyr::tibble( - observation_period_id = as.integer(1:8), - person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), - observation_period_start_date = as.Date(c( - "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", - "2020-03-01", "2020-04-10", "2020-03-10" - )), - observation_period_end_date = as.Date(c( - "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", - "2020-03-09", "2020-05-08", "2020-12-10" - )), - period_type_concept_id = 0L - ) - ), - cdmName = "mock data" - ) - cdm <- CDMConnector::copyCdmTo( - con = connection(), cdm = cdm, schema = schema()) - - # simple run - expect_no_error(summariseObservationPeriod(cdm$observation_period, - estimates = c("mean"), - ageGroup = list(c(0,9), c(10, Inf)))) - - expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period, - estimates = c("mean", "sd", "min", "max", "median", "density"))) - expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period, - estimates = c("mean", "sd", "min", "max", "median", "density"), - ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)), - sex = TRUE)) - # test overall - x <- resStrata |> - dplyr::filter(strata_name == "overall", strata_level == "overall") |> - dplyr::rename("strata" = "estimate_value") |> - dplyr::inner_join( - resAll |> - dplyr::rename("all" = "estimate_value") - ) - expect_identical(x$strata, x$all) - - # check strata groups have the expected value - expect_identical(resStrata |> - dplyr::filter(variable_name == "number subjects", - strata_level == "Female", - group_level == "2nd") |> - dplyr::pull("estimate_value"),"2") - - expect_identical(resStrata |> - dplyr::filter(variable_name == "number subjects", - strata_level == ">=10 &&& Male", - group_level == "3rd") |> - dplyr::pull("estimate_value"),"1") - - # duration - expect_identical( - resStrata |> - dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |> - dplyr::pull("estimate_value"), - as.character(c( - mean(c(20, 18)), - mean(c(6, 144)), - mean(113))) - ) - - expect_identical( - resStrata |> - dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |> - dplyr::pull("estimate_value"), - as.character(c( - mean(c(9, 276)), - mean(c(29)))) - ) - - # days to next observation period - expect_identical( - resStrata |> - dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean", - strata_level == "<10 &&& Female", group_level == "1st") |> - dplyr::pull("estimate_value"), "32" - ) - - PatientProfiles::mockDisconnect(cdm = cdm) + # skip_on_cran() + # + # # helper function + # removeSettings <- function(x) { + # attr(x, "settings") <- NULL + # return(x) + # } + # nPoints <- 512 + # + # # Load mock database + # cdm <- omopgenerics::cdmFromTables( + # tables = list( + # person = dplyr::tibble( + # person_id = as.integer(1:4), + # gender_concept_id = c(8507L, 8532L, 8532L, 8507L), + # year_of_birth = c(2010L, 2010L, 2011L, 2012L), + # month_of_birth = 1L, + # day_of_birth = 1L, + # race_concept_id = 0L, + # ethnicity_concept_id = 0L + # ), + # observation_period = dplyr::tibble( + # observation_period_id = as.integer(1:8), + # person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), + # observation_period_start_date = as.Date(c( + # "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", + # "2020-03-01", "2020-04-10", "2020-03-10" + # )), + # observation_period_end_date = as.Date(c( + # "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", + # "2020-03-09", "2020-05-08", "2020-12-10" + # )), + # period_type_concept_id = 0L + # ) + # ), + # cdmName = "mock data" + # ) + # cdm <- CDMConnector::copyCdmTo( + # con = connection(), cdm = cdm, schema = schema()) + # + # # simple run + # expect_no_error(summariseObservationPeriod(cdm$observation_period, + # estimates = c("mean"), + # ageGroup = list(c(0,9), c(10, Inf)))) + # + # expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period, + # estimates = c("mean", "sd", "min", "max", "median", "density"))) + # expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period, + # estimates = c("mean", "sd", "min", "max", "median", "density"), + # ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)), + # sex = TRUE)) + # # test overall + # x <- resStrata |> + # dplyr::filter(strata_name == "overall", strata_level == "overall") |> + # dplyr::rename("strata" = "estimate_value") |> + # dplyr::inner_join( + # resAll |> + # dplyr::rename("all" = "estimate_value") + # ) + # expect_identical(x$strata, x$all) + # + # # check strata groups have the expected value + # expect_identical(resStrata |> + # dplyr::filter(variable_name == "number subjects", + # strata_level == "Female", + # group_level == "2nd") |> + # dplyr::pull("estimate_value"),"2") + # + # expect_identical(resStrata |> + # dplyr::filter(variable_name == "number subjects", + # strata_level == ">=10 &&& Male", + # group_level == "3rd") |> + # dplyr::pull("estimate_value"),"1") + # + # # duration + # expect_identical( + # resStrata |> + # dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |> + # dplyr::pull("estimate_value"), + # as.character(c( + # mean(c(20, 18)), + # mean(c(6, 144)), + # mean(113))) + # ) + # + # expect_identical( + # resStrata |> + # dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |> + # dplyr::pull("estimate_value"), + # as.character(c( + # mean(c(9, 276)), + # mean(c(29)))) + # ) + # + # # days to next observation period + # expect_identical( + # resStrata |> + # dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean", + # strata_level == "<10 &&& Female", group_level == "1st") |> + # dplyr::pull("estimate_value"), "32" + # ) + # + # PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/tests/testthat/test-summarisePopulationCharacteristics.R b/tests/testthat/test-summarisePopulationCharacteristics.R new file mode 100644 index 00000000..f510cc83 --- /dev/null +++ b/tests/testthat/test-summarisePopulationCharacteristics.R @@ -0,0 +1,229 @@ +test_that("summarisePopulationCharacteristics() works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + # Check that works ---- + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics(cdm = cdm)) + + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2694)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "1908-09-22")) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end") |> + dplyr::tally() |> + dplyr::pull() != + 0) + + expect_no_error(summarisedPopulationEqual <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = NULL) + ) + expect_equal(summarisedPopulation, summarisedPopulationEqual) + + # Add date range + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("1900-01-01", "2010-01-01")) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2694)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "2010-01-01")) + + # Add sex and age group strata + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + studyPeriod = c("1950-01-01", NA), + sex = TRUE, + ageGroup = list(c(0,20),c(21,150))) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() |> + sort() == + c(101,1250,1271,1321,1372,172,2521,2693,71))) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end" & strata_level == "0 to 20" & estimate_name == "min") |> + dplyr::pull("estimate_value") < + summarisedPopulation |> + dplyr::filter(variable_name == "Age at end" & strata_level == "21 to 150" & estimate_name == "min") |> + dplyr::pull("estimate_value")) + + # Only sex + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + sex = TRUE + )) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("overall", "sex"))) + + # Only age group + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + ageGroup = list(c(0,1), c(2,Inf)) + )) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "overall"))) + + # Check result type + checkResultType(summarisedPopulation, "summarise_population_characteristics") + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() strata works", { + # skip_on_cran() + # # Load mock database ---- + # cdm <- omock::mockCdmReference() |> + # omock::mockPerson(seed = 1L) |> + # omock::mockObservationPeriod(seed = 1L) |> + # copyCdm() + # + # # Add sex and age group strata + # expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + # cdm = cdm, + # sex = TRUE, + # ageGroup = list(c(0,20),c(21,150))) + # ) + # expect_true(inherits(summarisedPopulation,"summarised_result")) + # expect_true(all(summarisedPopulation |> + # dplyr::select("strata_name") |> + # dplyr::distinct() |> + # dplyr::pull() |> + # sort() == + # c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) + # + # expect_true(all(summarisedPopulation |> + # dplyr::filter(variable_name == "Number records") |> + # dplyr::arrange(strata_name, strata_level) |> + # dplyr::select("estimate_value") |> + # dplyr::pull() == + # c(4,6,1,3,4,2,10,5,5))) + # + # PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() expected errors", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_error(summarisePopulationCharacteristics("cdm")) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("2000-01-01", "1990-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c(NA, "1990-51-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("1990-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = c("01/31/1990", "2000-01-01"))) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = NULL, sex = "Female")) + expect_error(summarisePopulationCharacteristics(cdm, studyPeriod = NULL, ageGroup = c(0,20,40))) + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("tablePopulationCharacteristics() works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + # Check that works ---- + x <- summarisePopulationCharacteristics(cdm) + expect_no_error(y <- tablePopulationCharacteristics(x)) + expect_true(inherits(y,"gt_tbl")) + + x <- x |> dplyr::filter(.data$result_id == -1) + expect_warning(tablePopulationCharacteristics(x)) + expect_warning(inherits(tablePopulationCharacteristics(x),"gt_tbl")) + + PatientProfiles::mockDisconnect(cdm = cdm) +}) + +test_that("summarisePopulationCharacteristics() works with mockOmopSKetch", { +# skip_on_cran() +# cdm <- mockOmopSketch(numberIndividuals = 2, seed = 1) +# expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( +# cdm = cdm) +# ) +# expect_true(inherits(summarisedPopulation,"summarised_result")) +# expect_true(all(summarisedPopulation |> +# dplyr::select("strata_name") |> +# dplyr::distinct() |> +# dplyr::pull() == +# c("overall"))) +# expect_true(all(summarisedPopulation |> +# dplyr::filter(variable_name == "Number records") |> +# dplyr::select("estimate_value") |> +# dplyr::pull() == +# 2)) +# expect_true(all(summarisedPopulation |> +# dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> +# dplyr::select("estimate_value") |> +# dplyr::pull() == +# "1999-04-05")) +# expect_true(summarisedPopulation |> +# dplyr::filter(variable_name == "Age at end", estimate_name == "median") |> +# dplyr::pull("estimate_value") == +# as.character(mean(c(40,16)))) +# expect_true(all(summarisedPopulation |> +# dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> +# dplyr::select("estimate_value") |> +# dplyr::pull() == +# "2013-06-29")) +# expect_true(all(summarisedPopulation |> +# dplyr::filter(variable_name == "Sex", estimate_name == "percentage") |> +# dplyr::select("estimate_value") |> +# dplyr::pull() == +# c(50,50))) +# expect_true(all(summarisedPopulation |> +# dplyr::filter(variable_name == "Age at start", estimate_name %in% c("min","max")) |> +# dplyr::pull("estimate_value") |> +# sort() == +# cdm$observation_period |> +# PatientProfiles::addAge(indexDate = "observation_period_start_date") |> +# dplyr::pull("age") |> +# sort())) +# PatientProfiles::mockDisconnect(cdm = cdm) + +}) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index 2c0e7ebd..fd5689b1 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -4,28 +4,28 @@ test_that("summariseRecordCount() works", { cdm <- cdmEunomia() # Check inputs ---- - expect_warning(inherits(summariseRecordCount(cdm, "observation_period", interval = "months"),"summarised_result")) - expect_warning(inherits(summariseRecordCount(cdm, "observation_period"),"summarised_result")) + expect_warning(inherits(summariseRecordCount(cdm, "observation_period", unit = "month"),"summarised_result")) + expect_warning(inherits(summariseRecordCount(cdm, "observation_period", unitInterval = 5),"summarised_result")) expect_warning(summariseRecordCount(cdm, "observation_period")) expect_no_error(summariseRecordCount(cdm, "visit_occurrence")) + expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence")) expect_no_error(summariseRecordCount(cdm, "drug_exposure")) expect_no_error(summariseRecordCount(cdm, "procedure_occurrence")) + expect_warning(de <- summariseRecordCount(cdm, "device_exposure")) expect_no_error(summariseRecordCount(cdm, "measurement")) + expect_no_error(o <- summariseRecordCount(cdm, "observation")) expect_warning(summariseRecordCount(cdm, "death")) - expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence")) - expect_warning(de <- summariseRecordCount(cdm, "device_exposure")) - expect_no_error(o <- summariseRecordCount(cdm, "observation")) - expect_no_error(all <- summariseRecordCount(cdm, c("condition_occurrence", "device_exposure","observation"))) expect_equal(all, dplyr::bind_rows(co,de,o)) + # Check inputs ---- expect_true( - (summariseRecordCount(cdm, "observation_period", interval = "years") |> - dplyr::filter(additional_level == "1963-01-01 to 1963-12-31") |> + (summariseRecordCount(cdm, "observation_period") |> + dplyr::filter(variable_level == "1963-01-01 to 1963-12-31") |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$observation_period |> @@ -38,8 +38,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(additional_level == "1961-02-01 to 1961-02-28") |> + summariseRecordCount(cdm, "condition_occurrence", unit = "month") |> + dplyr::filter(variable_level == "1961-02-01 to 1961-02-28") |> dplyr::pull("estimate_value") |> as.numeric() == (cdm$condition_occurrence |> @@ -53,9 +53,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - (summariseRecordCount(cdm, "condition_occurrence", interval = "months") |> - dplyr::filter(additional_level %in% c("1984-01-01 to 1984-01-31", "1984-02-01 to 1984-02-29", "1984-03-01 to 1984-03-31")) |> - dplyr::summarise("estimate_value" = sum(as.numeric(estimate_value), na.rm = TRUE)) |> + (summariseRecordCount(cdm, "condition_occurrence", unit = "month", unitInterval = 3) |> + dplyr::filter(variable_level %in% c("1984-01-01 to 1984-03-31")) |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$condition_occurrence |> @@ -69,11 +68,8 @@ test_that("summariseRecordCount() works", { ) expect_true( - (summariseRecordCount(cdm, "drug_exposure", interval = "years") |> - dplyr::filter(additional_level %in% c("1981-01-01 to 1981-12-31", "1982-01-01 to 1982-12-31", "1983-01-01 to 1983-12-31", - "1984-01-01 to 1984-12-31", "1985-01-01 to 1985-12-31", "1986-01-01 to 1986-12-31", - "1987-01-01 to 1987-12-31", "1988-01-01 to 1988-12-31")) |> - dplyr::summarise("estimate_value" = sum(as.numeric(.data$estimate_value), na.rm = TRUE)) |> + (summariseRecordCount(cdm, "drug_exposure", unitInterval = 8) |> + dplyr::filter(variable_level == "1981-01-01 to 1988-12-31") |> dplyr::pull("estimate_value") |> as.numeric()) == (cdm$drug_exposure |> @@ -86,7 +82,7 @@ test_that("summariseRecordCount() works", { ) # Check result type - result <- summariseRecordCount(cdm, "observation_period", interval = "months") + result <- summariseRecordCount(cdm, "observation_period", unit = "month") checkResultType(result, "summarise_record_count") PatientProfiles::mockDisconnect(cdm = cdm) @@ -97,12 +93,12 @@ test_that("plotRecordCount() works", { # Load mock database ---- cdm <- cdmEunomia() - p <- summariseRecordCount(cdm, "drug_exposure", interval = "years") |> + p <- summariseRecordCount(cdm, "drug_exposure", unitInterval = 8) |> plotRecordCount() expect_true(inherits(p,"ggplot")) - p2 <- summariseRecordCount(cdm, c("condition_occurrence","drug_exposure"), interval = "years") |> + p2 <- summariseRecordCount(cdm, c("condition_occurrence","drug_exposure"), unitInterval = 8) |> plotRecordCount(facet = "omop_table") expect_true(inherits(p2,"ggplot")) @@ -123,16 +119,16 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list(">=65" = c(65, Inf), "<65" = c(0,64)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) @@ -140,25 +136,25 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", - ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) - x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", + ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60), ">60" = c(61, Inf)))) + x <- t |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") y <- cdm$condition_occurrence |> @@ -171,13 +167,14 @@ test_that("summariseRecordCount() ageGroup argument works", { expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", ageGroup = list("<=20" = c(0,20), "21 to 40" = c(21,40), "41 to 60" = c(41,60)))) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "<=20" & additional_level == "1920-01-01 to 1920-12-31") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "<=20" & variable_level == "1920-01-01 to 1920-12-31") |> dplyr::summarise(n = sum(as.numeric(estimate_value))) |> dplyr::pull("n") |> as.numeric() + x y <- cdm$condition_occurrence |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> PatientProfiles::addAgeQuery(indexDate = "condition_start_date", ageGroup = list("<=20" = c(0,20))) |> @@ -198,40 +195,40 @@ test_that("summariseRecordCount() sex argument works", { # Check that works ---- expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) expect_warning(t <- summariseRecordCount(cdm, "observation_period", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level != "overall") |> - dplyr::group_by(additional_level) |> + dplyr::group_by(variable_level) |> dplyr::summarise(estimate_value = sum(as.numeric(estimate_value))) |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::pull("estimate_value") y <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> + dplyr::select("strata_level", "variable_level", "estimate_value") |> dplyr::filter(strata_level == "overall") |> - dplyr::arrange(additional_level) |> + dplyr::arrange(variable_level) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::pull("estimate_value") expect_equal(x,y) - expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, interval = "years")) + expect_no_error(t <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE)) x <- t |> - dplyr::select("strata_level", "additional_level", "estimate_value") |> - dplyr::filter(strata_level == "Male", additional_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) + dplyr::select("strata_level", "variable_level", "estimate_value") |> + dplyr::filter(strata_level == "Male", variable_level == "1937-01-01 to 1937-12-31") |> dplyr::pull(estimate_value) y <- cdm$condition_occurrence |> dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> @@ -248,26 +245,26 @@ test_that("summariseRecordCount() sex argument works", { }) test_that("summariseRecordCount() works with mockOmopSketch", { - skip_on_cran() - cdm <- mockOmopSketch(numberIndividuals = 3, seed = 1) - conditionpp <- cdm$condition_occurrence |> - PatientProfiles::addDemographics(indexDate = "condition_start_date",ageGroup = list(c(0,20),c(21,150))) |> - dplyr::mutate(year = clock::get_year(condition_start_date)) |> - dplyr::group_by(year, age_group, sex) |> - dplyr::summarise(n = n()) - - expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", interval = "years", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) - - expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> - dplyr::tally() |> dplyr::pull() == 0) - expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level), additional_level != "overall") |> - dplyr::pull("estimate_value") |> sort() == - conditionpp |> dplyr::pull("n") |> as.character() |> sort())) - - # Check result type - checkResultType(co, "summarise_record_count") - - PatientProfiles::mockDisconnect(cdm = cdm) + # skip_on_cran() + # cdm <- mockOmopSketch(numberIndividuals = 3, seed = 1) + # conditionpp <- cdm$condition_occurrence |> + # PatientProfiles::addDemographics(indexDate = "condition_start_date",ageGroup = list(c(0,20),c(21,150))) |> + # dplyr::mutate(year = clock::get_year(condition_start_date)) |> + # dplyr::group_by(year, age_group, sex) |> + # dplyr::summarise(n = n()) + # + # expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) + # + # expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> + # dplyr::tally() |> dplyr::pull() == 0) + # expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level)) |> + # dplyr::pull("estimate_value") |> sort() == + # conditionpp |> dplyr::pull("n") |> as.character() |> sort())) + # + # # Check result type + # checkResultType(co, "summarise_record_count") + # + # PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/vignettes/A-summarise_clinical_tables_records.Rmd b/vignettes/A-summarise_clinical_tables_records.Rmd index 3c2a685a..710fdde4 100644 --- a/vignettes/A-summarise_clinical_tables_records.Rmd +++ b/vignettes/A-summarise_clinical_tables_records.Rmd @@ -143,17 +143,17 @@ summarisedResult |> OmopSketch can also help you to summarise the trend of the records of an OMOP table. See the example below, where we use `summariseRecordCount()` to count the number of records within each year, and then, we use `plotRecordCount()` to create a ggplot with the trend. ```{r, warning=FALSE} -summarisedResult <- summariseRecordCount(cdm, "drug_exposure", interval = "years") +summarisedResult <- summariseRecordCount(cdm, "drug_exposure", unit = "year", unitInterval = 1) summarisedResult |> print() summarisedResult |> plotRecordCount() ``` -Note that you can adjust the time interval period using the `interval` argument, which can be set to either "years" or "months". See the example below, where it shows the number of records every 18 months: +Note that you can adjust the time interval period using the `unit` argument, which can be set to either "year" or "month", and the `unitInterval` argument, which must be an integer specifying the number of years or months which to count the records. See the example below, where it shows the number of records every 18 months: ```{r, warning=FALSE} -summariseRecordCount(cdm, "drug_exposure", interval = "months") |> +summariseRecordCount(cdm, "drug_exposure", unit = "month", unitInterval = 18) |> plotRecordCount() ``` @@ -161,7 +161,8 @@ We can further stratify our counts by sex (setting argument `sex = TRUE`) or by ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("<30" = c(0,29), ">=30" = c(30,Inf))) |> @@ -172,7 +173,8 @@ By default, `plotRecordCount()` does not apply faceting or colour to any variabl ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("0-29" = c(0,29), "30-Inf" = c(30,Inf))) |> @@ -183,7 +185,8 @@ Then, we can simply specify this by using the `facet` and `colour` arguments fro ```{r, warning=FALSE} summariseRecordCount(cdm, "drug_exposure", - interval = "months", + unit = "month", + unitInterval = 18, sex = TRUE, ageGroup = list("0-29" = c(0,29), "30-Inf" = c(30,Inf))) |> diff --git a/vignettes/B-summarise_concept_counts.Rmd b/vignettes/B-summarise_concept_counts.Rmd index fa5ae3e2..673e5226 100644 --- a/vignettes/B-summarise_concept_counts.Rmd +++ b/vignettes/B-summarise_concept_counts.Rmd @@ -109,14 +109,7 @@ summariseConceptCounts(cdm, One can further stratify by year, sex or age group using the `year`, `sex`, and `ageGroup` arguments. ``` {r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, - "sinusitis" = sinusitis), - countBy = "person", - interval = "years", - sex = TRUE, - ageGroup = list("<=50" = c(0,50), ">50" = c(51,Inf))) |> - select(group_level, strata_level, variable_name, estimate_name) |> glimpse() +summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis), countBy = "person", year = TRUE, sex = TRUE, ageGroup = list("<=50" = c(0,50), ">50" = c(51,Inf))) |> select(group_level, strata_level, variable_name, estimate_name) |> glimpse() ``` ## Visualise the results @@ -135,7 +128,7 @@ Notice that either person counts or record counts can be plotted. If both have b summariseConceptCounts(cdm, conceptId = list("sinusitis" = sinusitis), countBy = c("person","record")) |> - filter(variable_name == "Number subjects") |> + filter(estimate_name == "person_count") |> plotConceptCounts() ``` @@ -153,6 +146,6 @@ summariseConceptCounts(cdm, conceptId = list("sinusitis" = sinusitis), countBy = c("person"), sex = TRUE, - ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf))) |> + ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf)))|> plotConceptCounts(facet = "sex", colour = "age_group") ``` diff --git a/vignettes/C-summarise_pop_characteristics.Rmd b/vignettes/C-summarise_pop_characteristics.Rmd new file mode 100644 index 00000000..e61fb278 --- /dev/null +++ b/vignettes/C-summarise_pop_characteristics.Rmd @@ -0,0 +1,94 @@ +--- +title: "Summarise population characteristics" +output: + html_document: + pandoc_args: [ + "--number-offset=1,0" + ] + number_sections: yes + toc: yes +vignette: > + %\VignetteIndexEntry{C-summarise_pop_characteristics} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +library(CDMConnector) +if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") Sys.setenv("EUNOMIA_DATA_FOLDER" = tempdir()) +if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))) dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER")) +if (!eunomia_is_available()) downloadEunomiaData() +``` + +# Introduction + +In this vignette, we will explore the *OmopSketch* functions that provide information about individuals characteristics at specific points in time. We will employ `summarisePopulationCharacteristics()` to generate a summary of the demographic details within the database population. Additionally, we will tidy and present the results using `tablePopulationCharacteristics()`, which supports either [gt](https://gt.rstudio.com/) or [flextable](https://davidgohel.github.io/flextable/) for formatting the output. + +## Create a mock cdm + +Before we dive into *OmopSketch* functions, we need first to load the essential packages and create a mock CDM using the Eunomia database. + +```{r, warning=FALSE} +library(dplyr) +library(CDMConnector) +library(DBI) +library(duckdb) +library(OmopSketch) + +# Connect to Eunomia database +con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) +cdm <- CDMConnector::cdmFromCon( + con = con, cdmSchema = "main", writeSchema = "main" +) + +cdm +``` + +# Summarise population characteristics +To start, we will use `summarisePopulationCharacteristics()` function to generate a summarised result object, capturing demographic characteristics at both `observation_period_start_date` and `observation_period_end_date`. + + +```{r, warning=FALSE} +summarisedResult <- summarisePopulationCharacteristics(cdm) + +summarisedResult |> glimpse() +``` +To tidy and display the summarised result using a [gt](https://gt.rstudio.com/) table, we can use `tablePopulationCharacteristics()` function. +```{r, warning=FALSE} +summarisedResult |> + tablePopulationCharacteristics(type = "flextable") +``` +To obtain a [flextable](https://davidgohel.github.io/flextable/) instead of a [gt](https://gt.rstudio.com/), you can simply change the `type` argument to `"flextable"`. Additionally, it is important to note that age at start, prior observation, and future observation are calculated at the start date defined (in this case, at individuals observation_period_start_date). On the other hand, age at end is calculated at the end date defined (i.e., individuals observation_period_end_date). + +## Trim study period +To focus on a specific period within the observation data, rather than analysing the entire individuals' observation period, we can trim the study period by using the `studyPeriod` argument. This allows to analyse the demographic metrics within a defined time range rather than the default observation start and end dates. +```{r, warning=FALSE} +summarisePopulationCharacteristics(cdm, + studyPeriod = c("1950-01-01", "1999-12-31")) |> + tablePopulationCharacteristics() +``` + +However, if you are interested in analysing the demographic characteristics starting from a specific date without restricting the study end, you can define just the start of the study period. By default, `summarisePopulationCharacteristics()` function will use the observation_period_end_date to calculate the end-point statistics when the end date is not defined. + +```{r, warning=FALSE} +summarisePopulationCharacteristics(cdm, + studyPeriod = c("1950-01-01", NA)) |> + tablePopulationCharacteristics() +``` + +Similarly, if you are only interested in analysing the population characteristics up to a specific end date, you can define only the end date and set the `startDate = NA`. By default the observation_period_start_date will be used. + +## Stratify by age groups and sex +Population characteristics can also be estimated by stratifying the data based on age and sex using `ageGroups` and `sex` arguments. +```{r, warning=FALSE} +summarisePopulationCharacteristics(cdm, + sex = TRUE, + ageGroup = list("<60" = c(0,59), ">=60" = c(60, Inf))) |> + tablePopulationCharacteristics() +``` + From ef05abf71ea113778b3b85ef4ba0bf1c4a78dc00 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Tue, 12 Nov 2024 20:37:11 +0000 Subject: [PATCH 2/8] Updares --- R/summariseOmopSnapshot.R | 3 + R/tableOmopSnapshot.R | 3 + man/summariseOmopSnapshot.Rd | 3 + man/tableOmopSnapshot.Rd | 3 + tests/testthat/test-summariseConceptCounts.R | 246 +++--- .../test-summariseObservationPeriod.R | 748 +++++++++--------- .../test-summarisePopulationCharacteristics.R | 146 ++-- tests/testthat/test-summariseRecordCount.R | 40 +- vignettes/C-summarise_pop_characteristics.Rmd | 94 --- 9 files changed, 602 insertions(+), 684 deletions(-) delete mode 100644 vignettes/C-summarise_pop_characteristics.Rmd diff --git a/R/summariseOmopSnapshot.R b/R/summariseOmopSnapshot.R index c569c186..8342e856 100644 --- a/R/summariseOmopSnapshot.R +++ b/R/summariseOmopSnapshot.R @@ -6,9 +6,12 @@ #' @return A summarised_result object. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' cdm <- mockOmopSketch(numberIndividuals = 10) #' #' summariseOmopSnapshot(cdm) +#' } summariseOmopSnapshot <- function(cdm) { omopgenerics::validateCdmArgument(cdm) diff --git a/R/tableOmopSnapshot.R b/R/tableOmopSnapshot.R index 60166009..1ccb8741 100644 --- a/R/tableOmopSnapshot.R +++ b/R/tableOmopSnapshot.R @@ -5,6 +5,8 @@ #' @return A gt or flextable object with the summarised data. #' @export #' @examples +#' \donttest{ +#' library(OmopSketch) #' cdm <- mockOmopSketch(numberIndividuals = 10) #' #' result <- summariseOmopSnapshot(cdm) @@ -13,6 +15,7 @@ #' tableOmopSnapshot() #' #' PatientProfiles::mockDisconnect(cdm) +#' } tableOmopSnapshot <- function(result, type = "gt") { # initial checks diff --git a/man/summariseOmopSnapshot.Rd b/man/summariseOmopSnapshot.Rd index 19d6da7b..75552d34 100644 --- a/man/summariseOmopSnapshot.Rd +++ b/man/summariseOmopSnapshot.Rd @@ -18,7 +18,10 @@ Summarise a cdm_reference object creating a snapshot with the metadata of the cdm_reference object. } \examples{ +\donttest{ +library(OmopSketch) cdm <- mockOmopSketch(numberIndividuals = 10) summariseOmopSnapshot(cdm) } +} diff --git a/man/tableOmopSnapshot.Rd b/man/tableOmopSnapshot.Rd index 2cb78528..486d82e4 100644 --- a/man/tableOmopSnapshot.Rd +++ b/man/tableOmopSnapshot.Rd @@ -18,6 +18,8 @@ A gt or flextable object with the summarised data. Create a visual table from a summarise_omop_snapshot result. } \examples{ +\donttest{ +library(OmopSketch) cdm <- mockOmopSketch(numberIndividuals = 10) result <- summariseOmopSnapshot(cdm) @@ -27,3 +29,4 @@ result |> PatientProfiles::mockDisconnect(cdm) } +} diff --git a/tests/testthat/test-summariseConceptCounts.R b/tests/testthat/test-summariseConceptCounts.R index c64300bc..950d19c9 100644 --- a/tests/testthat/test-summariseConceptCounts.R +++ b/tests/testthat/test-summariseConceptCounts.R @@ -301,129 +301,129 @@ test_that("summarise code use - eunomia", { }) test_that("summarise code use - mock data", { - # skip_on_cran() - # - # person <- tibble::tibble( - # person_id = c(1L,2L), - # gender_concept_id = c(8532L,8507L), - # year_of_birth = c(1997L,1963L), - # month_of_birth = c(8L,1L), - # day_of_birth = c(22L,27L), - # race_concept_id = c(1L,1L), - # ethnicity_concept_id = c(1L,1L) - # ) - # observation_period <- tibble::tibble( - # person_id = c(1L,2L), - # observation_period_id = c(1L,2L), - # observation_period_start_date = c(as.Date("2000-06-03"), as.Date("1999-05-04")), - # observation_period_end_date = c(as.Date("2013-08-03"), as.Date("2004-01-04")), - # period_type_concept_id = c(1L,1L) - # ) - # condition_occurrence <- tibble::tibble( - # person_id = c(1L,1L,1L,2L,2L,2L,2L,2L), - # condition_concept_id = c(1L,3L,5L,1L,5L,5L,17L,17L), - # condition_start_date = c(as.Date("2002-06-30"), as.Date("2004-05-29"), as.Date("2001-12-20"), - # as.Date("2000-03-10"), as.Date("2000-02-25"), as.Date("1999-07-15"), - # as.Date("1999-06-06"), as.Date("2000-07-17")), - # condition_end_date = c(as.Date("2004-09-30"), as.Date("2009-05-29"), as.Date("2008-12-20"), - # as.Date("2001-03-10"), as.Date("2001-12-25"), as.Date("2001-07-15"), - # as.Date("2002-06-06"), as.Date("2000-11-17")), - # condition_occurrence_id = c(1L,2L,3L,4L,5L,6L,7L,8L), - # condition_type_concept_id = c(1L), - # condition_source_concept_id = c(as.integer(NA)) - # ) - # concept <- tibble::tibble( - # concept_id = c(1L,3L,5L,17L), - # concept_name = c("Musculoskeletal disorder", "Arthritis", "Osteoarthritis of hip", "Arthritis"), - # domain_id = c("Condition"), - # standard_concept = c("S","S","S",NA), - # concept_class_id = c("Clinical Finding", "Clinical Finding", "Clinical Finding", "ICD Code"), - # concept_code = c("1234"), - # valid_start_date = c(as.Date(NA)), - # valid_end_date = c(as.Date(NA)), - # vocabulary_id = as.character(NA) - # ) - # - # cdm <- omopgenerics::cdmFromTables( - # tables = list( - # person = person, - # observation_period = observation_period, - # condition_occurrence = condition_occurrence, - # concept = concept - # ), - # cdmName = "mock data" - # ) - # cdm <- CDMConnector::copyCdmTo( - # con = connection(), cdm = cdm, schema = schema()) - # - # conceptId <- list( - # "Arthritis" = c(17,3), - # "Musculoskeletal disorder" = c(1), - # "Osteoarthritis of hip" = c(5) - # ) - # - # result <- summariseConceptCounts(cdm, conceptId) - # - # # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 - # expect_true(all(result |> - # dplyr::filter(variable_name == "Arthritis") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c("1", "2", "1", "1"))) - # - # # Osteoarthritis (code 5), two records ind 2, one record ind 1 - # expect_true(all(result |> - # dplyr::filter(variable_name == "Osteoarthritis of hip") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(2,3))) - # - # # Musculoskeletal disorder (code 1), one record each ind - # expect_true(all(result |> - # dplyr::filter(variable_name == "Musculoskeletal disorder") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(2,2))) - # - # result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) - # # Individuals belong to the same age group but to different sex groups - # - # # Arthritis (codes 3 and 17), one record of each per ind - # expect_true(all(result |> - # dplyr::filter(variable_name == "Arthritis" & strata_level == "Male") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(1,2))) - # expect_true(all(result |> - # dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150 &&& Male") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(1,2))) - # expect_true(all(result |> - # dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(1,2,1,1))) - # - # # Osteoarthritis of hip (code 5), two records ind 2 and one ind 1 - # expect_true(all(result |> - # dplyr::filter(variable_name == "Osteoarthritis of hip" & strata_level == "Female") |> - # dplyr::tally() |> - # dplyr::pull() == 2)) - # - # # Musculoskeletal disorder (code 1), one record each ind - # expect_true(all(result |> - # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(1,1))) - # expect_true(all(result |> - # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(1,1))) - # expect_true(all(result |> - # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(2,2))) - # expect_true(all(result |> - # dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "overall") |> - # dplyr::arrange(variable_level, estimate_name) |> - # dplyr::pull(estimate_value) == c(2,2))) - # - # PatientProfiles::mockDisconnect(cdm) + skip_on_cran() + + person <- tibble::tibble( + person_id = c(1L,2L), + gender_concept_id = c(8532L,8507L), + year_of_birth = c(1997L,1963L), + month_of_birth = c(8L,1L), + day_of_birth = c(22L,27L), + race_concept_id = c(1L,1L), + ethnicity_concept_id = c(1L,1L) + ) + observation_period <- tibble::tibble( + person_id = c(1L,2L), + observation_period_id = c(1L,2L), + observation_period_start_date = c(as.Date("2000-06-03"), as.Date("1999-05-04")), + observation_period_end_date = c(as.Date("2013-08-03"), as.Date("2004-01-04")), + period_type_concept_id = c(1L,1L) + ) + condition_occurrence <- tibble::tibble( + person_id = c(1L,1L,1L,2L,2L,2L,2L,2L), + condition_concept_id = c(1L,3L,5L,1L,5L,5L,17L,17L), + condition_start_date = c(as.Date("2002-06-30"), as.Date("2004-05-29"), as.Date("2001-12-20"), + as.Date("2000-03-10"), as.Date("2000-02-25"), as.Date("1999-07-15"), + as.Date("1999-06-06"), as.Date("2000-07-17")), + condition_end_date = c(as.Date("2004-09-30"), as.Date("2009-05-29"), as.Date("2008-12-20"), + as.Date("2001-03-10"), as.Date("2001-12-25"), as.Date("2001-07-15"), + as.Date("2002-06-06"), as.Date("2000-11-17")), + condition_occurrence_id = c(1L,2L,3L,4L,5L,6L,7L,8L), + condition_type_concept_id = c(1L), + condition_source_concept_id = c(as.integer(NA)) + ) + concept <- tibble::tibble( + concept_id = c(1L,3L,5L,17L), + concept_name = c("Musculoskeletal disorder", "Arthritis", "Osteoarthritis of hip", "Arthritis"), + domain_id = c("Condition"), + standard_concept = c("S","S","S",NA), + concept_class_id = c("Clinical Finding", "Clinical Finding", "Clinical Finding", "ICD Code"), + concept_code = c("1234"), + valid_start_date = c(as.Date(NA)), + valid_end_date = c(as.Date(NA)), + vocabulary_id = as.character(NA) + ) + + cdm <- omopgenerics::cdmFromTables( + tables = list( + person = person, + observation_period = observation_period, + condition_occurrence = condition_occurrence, + concept = concept + ), + cdmName = "mock data" + ) + cdm <- CDMConnector::copyCdmTo( + con = connection(), cdm = cdm, schema = schema()) + + conceptId <- list( + "Arthritis" = c(17,3), + "Musculoskeletal disorder" = c(1), + "Osteoarthritis of hip" = c(5) + ) + + result <- summariseConceptCounts(cdm, conceptId) + + # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c("1", "2", "1", "1"))) + + # Osteoarthritis (code 5), two records ind 2, one record ind 1 + expect_true(all(result |> + dplyr::filter(variable_name == "Osteoarthritis of hip") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,3))) + + # Musculoskeletal disorder (code 1), one record each ind + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) + + result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) + # Individuals belong to the same age group but to different sex groups + + # Arthritis (codes 3 and 17), one record of each per ind + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150 &&& Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Arthritis" & strata_level == "3 to 150") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,2,1,1))) + + # Osteoarthritis of hip (code 5), two records ind 2 and one ind 1 + expect_true(all(result |> + dplyr::filter(variable_name == "Osteoarthritis of hip" & strata_level == "Female") |> + dplyr::tally() |> + dplyr::pull() == 2)) + + # Musculoskeletal disorder (code 1), one record each ind + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Female") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,1))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150 &&& Male") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(1,1))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "3 to 150") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) + expect_true(all(result |> + dplyr::filter(variable_name == "Musculoskeletal disorder" & strata_level == "overall") |> + dplyr::arrange(variable_level, estimate_name) |> + dplyr::pull(estimate_value) == c(2,2))) + + PatientProfiles::mockDisconnect(cdm) }) test_that("plot concept counts works", { diff --git a/tests/testthat/test-summariseObservationPeriod.R b/tests/testthat/test-summariseObservationPeriod.R index 8ba1e920..b8a0a0ad 100644 --- a/tests/testthat/test-summariseObservationPeriod.R +++ b/tests/testthat/test-summariseObservationPeriod.R @@ -1,275 +1,275 @@ test_that("check summariseObservationPeriod works", { - # skip_on_cran() - # - # # helper function - # removeSettings <- function(x) { - # attr(x, "settings") <- NULL - # return(x) - # } - # nPoints <- 512 - # - # # Load mock database - # cdm <- omopgenerics::cdmFromTables( - # tables = list( - # person = dplyr::tibble( - # person_id = as.integer(1:4), - # gender_concept_id = c(8507L, 8532L, 8532L, 8507L), - # year_of_birth = 2010L, - # month_of_birth = 1L, - # day_of_birth = 1L, - # race_concept_id = 0L, - # ethnicity_concept_id = 0L - # ), - # observation_period = dplyr::tibble( - # observation_period_id = as.integer(1:8), - # person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), - # observation_period_start_date = as.Date(c( - # "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", - # "2020-03-01", "2020-04-10", "2020-03-10" - # )), - # observation_period_end_date = as.Date(c( - # "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", - # "2020-03-09", "2020-05-08", "2020-12-10" - # )), - # period_type_concept_id = 0L - # ) - # ), - # cdmName = "mock data" - # ) - # cdm <- CDMConnector::copyCdmTo( - # con = connection(), cdm = cdm, schema = schema()) - # - # # simple run - # expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period)) - # expect_no_error( - # resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density")) - # expect_no_error( - # resAllN <- summariseObservationPeriod(cdm$observation_period, - # estimates = c( - # "mean", "sd", "min", "q05", "q25", - # "median", "q75", "q95", "max"))) - # expect_equal( - # resAllD |> dplyr::filter(!is.na(variable_level)) |> - # dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(), - # resAll |> dplyr::filter(!is.na(variable_level)) |> - # dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings() - # ) - # - # # test estimates - # expect_no_error( - # resEst <- cdm$observation_period |> - # summariseObservationPeriod(estimates = c("mean", "median"))) - # expect_true(all( - # resEst |> - # dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |> - # dplyr::pull("estimate_name") |> - # unique() %in% c("mean", "median") - # )) - # - # # counts - # expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8") - # x <- dplyr::tibble( - # group_level = c("overall", "1st", "2nd", "3rd"), - # variable_name = "number subjects", - # estimate_value = c("4", "4", "3", "1")) - # expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) - # - # # records per person - # expect_identical( - # resAll |> - # dplyr::filter( - # variable_name == "records per person", estimate_name == "mean") |> - # dplyr::pull("estimate_value"), - # "2" - # ) - # - # # duration - # expect_identical( - # resAll |> - # dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |> - # dplyr::pull("estimate_value"), - # as.character(c( - # mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)), - # mean(c(6, 29, 144)), 113 - # )) - # ) - # - # # days to next observation period - # expect_identical( - # resAll |> - # dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> - # dplyr::pull("estimate_value"), - # as.character(c( - # mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA - # )) - # ) - # - # # duration - density - # xx <- resAllD |> - # dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |> - # dplyr::group_by(group_level) |> - # dplyr::summarise( - # n = dplyr::n(), - # area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( - # max(as.numeric(estimate_value[estimate_name == "density_x"])) - - # min(as.numeric(estimate_value[estimate_name == "density_x"])) - # )/(nPoints - 1) - # ) - # expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L)) - # expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0)) - # - # # days to next observation period - density - # xx <- resAll |> - # dplyr::filter(variable_name == "days to next observation period", - # !is.na(variable_level)) |> - # dplyr::group_by(group_level) |> - # dplyr::summarise( - # n = dplyr::n(), - # area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( - # max(as.numeric(estimate_value[estimate_name == "density_x"])) - - # min(as.numeric(estimate_value[estimate_name == "density_x"])) - # )/(nPoints - 1) - # ) - # expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L)) - # expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1) - # - # # only one exposure per individual - # cdm$observation_period <- cdm$observation_period |> - # dplyr::group_by(person_id) |> - # dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |> - # dplyr::ungroup() |> - # dplyr::compute(name = "observation_period", temporary = FALSE) - # - # expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period)) - # - # # counts - # expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4") - # x <- dplyr::tibble( - # group_level = c("overall", "1st"), - # variable_name = "number subjects", - # estimate_value = c("4", "4")) - # expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) - # - # # Check result type - # checkResultType(resOne, "summarise_observation_period") - # - # # empty observation period - # cdm$observation_period <- cdm$observation_period |> - # dplyr::filter(person_id == 0) |> - # dplyr::compute(name = "observation_period", temporary = FALSE) - # - # expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period)) - # expect_true(nrow(resEmpty) == 2) - # expect_identical(unique(resEmpty$estimate_value), "0") - # - # # table works - # expect_no_error(tableObservationPeriod(resAll)) - # expect_no_error(tableObservationPeriod(resOne)) - # expect_no_error(tableObservationPeriod(resEmpty)) - # - # # plot works - # expect_no_error(plotObservationPeriod(resAll)) - # expect_no_error(plotObservationPeriod(resOne)) - # # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION - # - # # check all plots combinations - # expect_no_error( - # resAll |> - # plotObservationPeriod( - # variableName = "number subjects", plotType = "barplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "number subjects", plotType = "boxplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "number subjects", plotType = "densityplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "number subjects", plotType = "random") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "duration in days", plotType = "barplot") - # ) - # expect_no_error( - # resAll |> - # plotObservationPeriod( - # variableName = "duration in days", plotType = "boxplot") - # ) - # expect_error( - # resAllN |> - # plotObservationPeriod( - # variableName = "duration in days", plotType = "densityplot") - # ) - # expect_no_error( - # resAllD |> - # plotObservationPeriod( - # variableName = "duration in days", plotType = "densityplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "duration in days", plotType = "random") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "records per person", plotType = "barplot") - # ) - # expect_no_error( - # resAll |> - # plotObservationPeriod( - # variableName = "records per person", plotType = "boxplot") - # ) - # expect_error( - # resAllN |> - # plotObservationPeriod( - # variableName = "records per person", plotType = "densityplot") - # ) - # expect_no_error( - # resAllD |> - # plotObservationPeriod( - # variableName = "records per person", plotType = "densityplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "records per person", plotType = "random") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "days to next observation period", plotType = "barplot") - # ) - # expect_no_error( - # resAll |> - # plotObservationPeriod( - # variableName = "days to next observation period", plotType = "boxplot") - # ) - # expect_error( - # resAllN |> - # plotObservationPeriod( - # variableName = "days to next observation period", plotType = "densityplot") - # ) - # expect_no_error( - # resAllD |> - # plotObservationPeriod( - # variableName = "days to next observation period", plotType = "densityplot") - # ) - # expect_error( - # resAll |> - # plotObservationPeriod( - # variableName = "days to next observation period", plotType = "random") - # ) - # - # PatientProfiles::mockDisconnect(cdm = cdm) + skip_on_cran() + + # helper function + removeSettings <- function(x) { + attr(x, "settings") <- NULL + return(x) + } + nPoints <- 512 + + # Load mock database + cdm <- omopgenerics::cdmFromTables( + tables = list( + person = dplyr::tibble( + person_id = as.integer(1:4), + gender_concept_id = c(8507L, 8532L, 8532L, 8507L), + year_of_birth = 2010L, + month_of_birth = 1L, + day_of_birth = 1L, + race_concept_id = 0L, + ethnicity_concept_id = 0L + ), + observation_period = dplyr::tibble( + observation_period_id = as.integer(1:8), + person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), + observation_period_start_date = as.Date(c( + "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", + "2020-03-01", "2020-04-10", "2020-03-10" + )), + observation_period_end_date = as.Date(c( + "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", + "2020-03-09", "2020-05-08", "2020-12-10" + )), + period_type_concept_id = 0L + ) + ), + cdmName = "mock data" + ) + cdm <- CDMConnector::copyCdmTo( + con = connection(), cdm = cdm, schema = schema()) + + # simple run + expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period)) + expect_no_error( + resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density")) + expect_no_error( + resAllN <- summariseObservationPeriod(cdm$observation_period, + estimates = c( + "mean", "sd", "min", "q05", "q25", + "median", "q75", "q95", "max"))) + expect_equal( + resAllD |> dplyr::filter(!is.na(variable_level)) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(), + resAll |> dplyr::filter(!is.na(variable_level)) |> + dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings() + ) + + # test estimates + expect_no_error( + resEst <- cdm$observation_period |> + summariseObservationPeriod(estimates = c("mean", "median"))) + expect_true(all( + resEst |> + dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |> + dplyr::pull("estimate_name") |> + unique() %in% c("mean", "median") + )) + + # counts + expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8") + x <- dplyr::tibble( + group_level = c("overall", "1st", "2nd", "3rd"), + variable_name = "number subjects", + estimate_value = c("4", "4", "3", "1")) + expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) + + # records per person + expect_identical( + resAll |> + dplyr::filter( + variable_name == "records per person", estimate_name == "mean") |> + dplyr::pull("estimate_value"), + "2" + ) + + # duration + expect_identical( + resAll |> + dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |> + dplyr::pull("estimate_value"), + as.character(c( + mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)), + mean(c(6, 29, 144)), 113 + )) + ) + + # days to next observation period + expect_identical( + resAll |> + dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> + dplyr::pull("estimate_value"), + as.character(c( + mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA + )) + ) + + # duration - density + xx <- resAllD |> + dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |> + dplyr::group_by(group_level) |> + dplyr::summarise( + n = dplyr::n(), + area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( + max(as.numeric(estimate_value[estimate_name == "density_x"])) - + min(as.numeric(estimate_value[estimate_name == "density_x"])) + )/(nPoints - 1) + ) + expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L)) + expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0)) + + # days to next observation period - density + xx <- resAll |> + dplyr::filter(variable_name == "days to next observation period", + !is.na(variable_level)) |> + dplyr::group_by(group_level) |> + dplyr::summarise( + n = dplyr::n(), + area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( + max(as.numeric(estimate_value[estimate_name == "density_x"])) - + min(as.numeric(estimate_value[estimate_name == "density_x"])) + )/(nPoints - 1) + ) + expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L)) + expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1) + + # only one exposure per individual + cdm$observation_period <- cdm$observation_period |> + dplyr::group_by(person_id) |> + dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |> + dplyr::ungroup() |> + dplyr::compute(name = "observation_period", temporary = FALSE) + + expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period)) + + # counts + expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4") + x <- dplyr::tibble( + group_level = c("overall", "1st"), + variable_name = "number subjects", + estimate_value = c("4", "4")) + expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) + + # Check result type + checkResultType(resOne, "summarise_observation_period") + + # empty observation period + cdm$observation_period <- cdm$observation_period |> + dplyr::filter(person_id == 0) |> + dplyr::compute(name = "observation_period", temporary = FALSE) + + expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period)) + expect_true(nrow(resEmpty) == 2) + expect_identical(unique(resEmpty$estimate_value), "0") + + # table works + expect_no_error(tableObservationPeriod(resAll)) + expect_no_error(tableObservationPeriod(resOne)) + expect_no_error(tableObservationPeriod(resEmpty)) + + # plot works + expect_no_error(plotObservationPeriod(resAll)) + expect_no_error(plotObservationPeriod(resOne)) + # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION + + # check all plots combinations + expect_no_error( + resAll |> + plotObservationPeriod( + variableName = "number subjects", plotType = "barplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "number subjects", plotType = "boxplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "number subjects", plotType = "densityplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "number subjects", plotType = "random") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "duration in days", plotType = "barplot") + ) + expect_no_error( + resAll |> + plotObservationPeriod( + variableName = "duration in days", plotType = "boxplot") + ) + expect_error( + resAllN |> + plotObservationPeriod( + variableName = "duration in days", plotType = "densityplot") + ) + expect_no_error( + resAllD |> + plotObservationPeriod( + variableName = "duration in days", plotType = "densityplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "duration in days", plotType = "random") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "records per person", plotType = "barplot") + ) + expect_no_error( + resAll |> + plotObservationPeriod( + variableName = "records per person", plotType = "boxplot") + ) + expect_error( + resAllN |> + plotObservationPeriod( + variableName = "records per person", plotType = "densityplot") + ) + expect_no_error( + resAllD |> + plotObservationPeriod( + variableName = "records per person", plotType = "densityplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "records per person", plotType = "random") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "days to next observation period", plotType = "barplot") + ) + expect_no_error( + resAll |> + plotObservationPeriod( + variableName = "days to next observation period", plotType = "boxplot") + ) + expect_error( + resAllN |> + plotObservationPeriod( + variableName = "days to next observation period", plotType = "densityplot") + ) + expect_no_error( + resAllD |> + plotObservationPeriod( + variableName = "days to next observation period", plotType = "densityplot") + ) + expect_error( + resAll |> + plotObservationPeriod( + variableName = "days to next observation period", plotType = "random") + ) + + PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("check it works with mockOmopSketch", { @@ -333,107 +333,107 @@ test_that("check it works with mockOmopSketch", { }) test_that("check summariseObservationPeriod strata works", { - # skip_on_cran() - # - # # helper function - # removeSettings <- function(x) { - # attr(x, "settings") <- NULL - # return(x) - # } - # nPoints <- 512 - # - # # Load mock database - # cdm <- omopgenerics::cdmFromTables( - # tables = list( - # person = dplyr::tibble( - # person_id = as.integer(1:4), - # gender_concept_id = c(8507L, 8532L, 8532L, 8507L), - # year_of_birth = c(2010L, 2010L, 2011L, 2012L), - # month_of_birth = 1L, - # day_of_birth = 1L, - # race_concept_id = 0L, - # ethnicity_concept_id = 0L - # ), - # observation_period = dplyr::tibble( - # observation_period_id = as.integer(1:8), - # person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), - # observation_period_start_date = as.Date(c( - # "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", - # "2020-03-01", "2020-04-10", "2020-03-10" - # )), - # observation_period_end_date = as.Date(c( - # "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", - # "2020-03-09", "2020-05-08", "2020-12-10" - # )), - # period_type_concept_id = 0L - # ) - # ), - # cdmName = "mock data" - # ) - # cdm <- CDMConnector::copyCdmTo( - # con = connection(), cdm = cdm, schema = schema()) - # - # # simple run - # expect_no_error(summariseObservationPeriod(cdm$observation_period, - # estimates = c("mean"), - # ageGroup = list(c(0,9), c(10, Inf)))) - # - # expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period, - # estimates = c("mean", "sd", "min", "max", "median", "density"))) - # expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period, - # estimates = c("mean", "sd", "min", "max", "median", "density"), - # ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)), - # sex = TRUE)) - # # test overall - # x <- resStrata |> - # dplyr::filter(strata_name == "overall", strata_level == "overall") |> - # dplyr::rename("strata" = "estimate_value") |> - # dplyr::inner_join( - # resAll |> - # dplyr::rename("all" = "estimate_value") - # ) - # expect_identical(x$strata, x$all) - # - # # check strata groups have the expected value - # expect_identical(resStrata |> - # dplyr::filter(variable_name == "number subjects", - # strata_level == "Female", - # group_level == "2nd") |> - # dplyr::pull("estimate_value"),"2") - # - # expect_identical(resStrata |> - # dplyr::filter(variable_name == "number subjects", - # strata_level == ">=10 &&& Male", - # group_level == "3rd") |> - # dplyr::pull("estimate_value"),"1") - # - # # duration - # expect_identical( - # resStrata |> - # dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |> - # dplyr::pull("estimate_value"), - # as.character(c( - # mean(c(20, 18)), - # mean(c(6, 144)), - # mean(113))) - # ) - # - # expect_identical( - # resStrata |> - # dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |> - # dplyr::pull("estimate_value"), - # as.character(c( - # mean(c(9, 276)), - # mean(c(29)))) - # ) - # - # # days to next observation period - # expect_identical( - # resStrata |> - # dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean", - # strata_level == "<10 &&& Female", group_level == "1st") |> - # dplyr::pull("estimate_value"), "32" - # ) - # - # PatientProfiles::mockDisconnect(cdm = cdm) + skip_on_cran() + + # helper function + removeSettings <- function(x) { + attr(x, "settings") <- NULL + return(x) + } + nPoints <- 512 + + # Load mock database + cdm <- omopgenerics::cdmFromTables( + tables = list( + person = dplyr::tibble( + person_id = as.integer(1:4), + gender_concept_id = c(8507L, 8532L, 8532L, 8507L), + year_of_birth = c(2010L, 2010L, 2011L, 2012L), + month_of_birth = 1L, + day_of_birth = 1L, + race_concept_id = 0L, + ethnicity_concept_id = 0L + ), + observation_period = dplyr::tibble( + observation_period_id = as.integer(1:8), + person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), + observation_period_start_date = as.Date(c( + "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", + "2020-03-01", "2020-04-10", "2020-03-10" + )), + observation_period_end_date = as.Date(c( + "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", + "2020-03-09", "2020-05-08", "2020-12-10" + )), + period_type_concept_id = 0L + ) + ), + cdmName = "mock data" + ) + cdm <- CDMConnector::copyCdmTo( + con = connection(), cdm = cdm, schema = schema()) + + # simple run + expect_no_error(summariseObservationPeriod(cdm$observation_period, + estimates = c("mean"), + ageGroup = list(c(0,9), c(10, Inf)))) + + expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period, + estimates = c("mean", "sd", "min", "max", "median", "density"))) + expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period, + estimates = c("mean", "sd", "min", "max", "median", "density"), + ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)), + sex = TRUE)) + # test overall + x <- resStrata |> + dplyr::filter(strata_name == "overall", strata_level == "overall") |> + dplyr::rename("strata" = "estimate_value") |> + dplyr::inner_join( + resAll |> + dplyr::rename("all" = "estimate_value") + ) + expect_identical(x$strata, x$all) + + # check strata groups have the expected value + expect_identical(resStrata |> + dplyr::filter(variable_name == "number subjects", + strata_level == "Female", + group_level == "2nd") |> + dplyr::pull("estimate_value"),"2") + + expect_identical(resStrata |> + dplyr::filter(variable_name == "number subjects", + strata_level == ">=10 &&& Male", + group_level == "3rd") |> + dplyr::pull("estimate_value"),"1") + + # duration + expect_identical( + resStrata |> + dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |> + dplyr::pull("estimate_value"), + as.character(c( + mean(c(20, 18)), + mean(c(6, 144)), + mean(113))) + ) + + expect_identical( + resStrata |> + dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |> + dplyr::pull("estimate_value"), + as.character(c( + mean(c(9, 276)), + mean(c(29)))) + ) + + # days to next observation period + expect_identical( + resStrata |> + dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean", + strata_level == "<10 &&& Female", group_level == "1st") |> + dplyr::pull("estimate_value"), "32" + ) + + PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/tests/testthat/test-summarisePopulationCharacteristics.R b/tests/testthat/test-summarisePopulationCharacteristics.R index f510cc83..b2154beb 100644 --- a/tests/testthat/test-summarisePopulationCharacteristics.R +++ b/tests/testthat/test-summarisePopulationCharacteristics.R @@ -116,35 +116,35 @@ test_that("summarisePopulationCharacteristics() works", { }) test_that("summarisePopulationCharacteristics() strata works", { - # skip_on_cran() - # # Load mock database ---- - # cdm <- omock::mockCdmReference() |> - # omock::mockPerson(seed = 1L) |> - # omock::mockObservationPeriod(seed = 1L) |> - # copyCdm() - # - # # Add sex and age group strata - # expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( - # cdm = cdm, - # sex = TRUE, - # ageGroup = list(c(0,20),c(21,150))) - # ) - # expect_true(inherits(summarisedPopulation,"summarised_result")) - # expect_true(all(summarisedPopulation |> - # dplyr::select("strata_name") |> - # dplyr::distinct() |> - # dplyr::pull() |> - # sort() == - # c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) - # - # expect_true(all(summarisedPopulation |> - # dplyr::filter(variable_name == "Number records") |> - # dplyr::arrange(strata_name, strata_level) |> - # dplyr::select("estimate_value") |> - # dplyr::pull() == - # c(4,6,1,3,4,2,10,5,5))) - # - # PatientProfiles::mockDisconnect(cdm = cdm) + skip_on_cran() + # Load mock database ---- + cdm <- omock::mockCdmReference() |> + omock::mockPerson(seed = 1L) |> + omock::mockObservationPeriod(seed = 1L) |> + copyCdm() + + # Add sex and age group strata + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm, + sex = TRUE, + ageGroup = list(c(0,20),c(21,150))) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() |> + sort() == + c("age_group_at_start", "age_group_at_start &&& sex", "overall", "sex"))) + + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::arrange(strata_name, strata_level) |> + dplyr::select("estimate_value") |> + dplyr::pull() == + c(4,6,1,3,4,2,10,5,5))) + + PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("summarisePopulationCharacteristics() expected errors", { @@ -181,49 +181,49 @@ test_that("tablePopulationCharacteristics() works", { }) test_that("summarisePopulationCharacteristics() works with mockOmopSKetch", { -# skip_on_cran() -# cdm <- mockOmopSketch(numberIndividuals = 2, seed = 1) -# expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( -# cdm = cdm) -# ) -# expect_true(inherits(summarisedPopulation,"summarised_result")) -# expect_true(all(summarisedPopulation |> -# dplyr::select("strata_name") |> -# dplyr::distinct() |> -# dplyr::pull() == -# c("overall"))) -# expect_true(all(summarisedPopulation |> -# dplyr::filter(variable_name == "Number records") |> -# dplyr::select("estimate_value") |> -# dplyr::pull() == -# 2)) -# expect_true(all(summarisedPopulation |> -# dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> -# dplyr::select("estimate_value") |> -# dplyr::pull() == -# "1999-04-05")) -# expect_true(summarisedPopulation |> -# dplyr::filter(variable_name == "Age at end", estimate_name == "median") |> -# dplyr::pull("estimate_value") == -# as.character(mean(c(40,16)))) -# expect_true(all(summarisedPopulation |> -# dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> -# dplyr::select("estimate_value") |> -# dplyr::pull() == -# "2013-06-29")) -# expect_true(all(summarisedPopulation |> -# dplyr::filter(variable_name == "Sex", estimate_name == "percentage") |> -# dplyr::select("estimate_value") |> -# dplyr::pull() == -# c(50,50))) -# expect_true(all(summarisedPopulation |> -# dplyr::filter(variable_name == "Age at start", estimate_name %in% c("min","max")) |> -# dplyr::pull("estimate_value") |> -# sort() == -# cdm$observation_period |> -# PatientProfiles::addAge(indexDate = "observation_period_start_date") |> -# dplyr::pull("age") |> -# sort())) -# PatientProfiles::mockDisconnect(cdm = cdm) + skip_on_cran() + cdm <- mockOmopSketch(numberIndividuals = 2, seed = 1) + expect_no_error(summarisedPopulation <- summarisePopulationCharacteristics( + cdm = cdm) + ) + expect_true(inherits(summarisedPopulation,"summarised_result")) + expect_true(all(summarisedPopulation |> + dplyr::select("strata_name") |> + dplyr::distinct() |> + dplyr::pull() == + c("overall"))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Number records") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + 2)) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort start date" & estimate_name == "min") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "1999-04-05")) + expect_true(summarisedPopulation |> + dplyr::filter(variable_name == "Age at end", estimate_name == "median") |> + dplyr::pull("estimate_value") == + as.character(mean(c(40,16)))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Cohort end date" & estimate_name == "max") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + "2013-06-29")) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Sex", estimate_name == "percentage") |> + dplyr::select("estimate_value") |> + dplyr::pull() == + c(50,50))) + expect_true(all(summarisedPopulation |> + dplyr::filter(variable_name == "Age at start", estimate_name %in% c("min","max")) |> + dplyr::pull("estimate_value") |> + sort() == + cdm$observation_period |> + PatientProfiles::addAge(indexDate = "observation_period_start_date") |> + dplyr::pull("age") |> + sort())) + PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index fd5689b1..e36579e7 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -245,26 +245,26 @@ test_that("summariseRecordCount() sex argument works", { }) test_that("summariseRecordCount() works with mockOmopSketch", { - # skip_on_cran() - # cdm <- mockOmopSketch(numberIndividuals = 3, seed = 1) - # conditionpp <- cdm$condition_occurrence |> - # PatientProfiles::addDemographics(indexDate = "condition_start_date",ageGroup = list(c(0,20),c(21,150))) |> - # dplyr::mutate(year = clock::get_year(condition_start_date)) |> - # dplyr::group_by(year, age_group, sex) |> - # dplyr::summarise(n = n()) - # - # expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) - # - # expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> - # dplyr::tally() |> dplyr::pull() == 0) - # expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level)) |> - # dplyr::pull("estimate_value") |> sort() == - # conditionpp |> dplyr::pull("n") |> as.character() |> sort())) - # - # # Check result type - # checkResultType(co, "summarise_record_count") - # - # PatientProfiles::mockDisconnect(cdm = cdm) + skip_on_cran() + cdm <- mockOmopSketch(numberIndividuals = 3, seed = 1) + conditionpp <- cdm$condition_occurrence |> + PatientProfiles::addDemographics(indexDate = "condition_start_date",ageGroup = list(c(0,20),c(21,150))) |> + dplyr::mutate(year = clock::get_year(condition_start_date)) |> + dplyr::group_by(year, age_group, sex) |> + dplyr::summarise(n = n()) + + expect_no_error(co <- summariseRecordCount(cdm, "condition_occurrence", sex = TRUE, ageGroup = list(c(0,20),c(21,150)))) + + expect_true(co |> dplyr::filter(grepl("Male",strata_level)) |> + dplyr::tally() |> dplyr::pull() == 0) + expect_true(all(co |> dplyr::filter(grepl("&&&",strata_level)) |> + dplyr::pull("estimate_value") |> sort() == + conditionpp |> dplyr::pull("n") |> as.character() |> sort())) + + # Check result type + checkResultType(co, "summarise_record_count") + + PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/vignettes/C-summarise_pop_characteristics.Rmd b/vignettes/C-summarise_pop_characteristics.Rmd deleted file mode 100644 index e61fb278..00000000 --- a/vignettes/C-summarise_pop_characteristics.Rmd +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: "Summarise population characteristics" -output: - html_document: - pandoc_args: [ - "--number-offset=1,0" - ] - number_sections: yes - toc: yes -vignette: > - %\VignetteIndexEntry{C-summarise_pop_characteristics} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -library(CDMConnector) -if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") Sys.setenv("EUNOMIA_DATA_FOLDER" = tempdir()) -if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))) dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER")) -if (!eunomia_is_available()) downloadEunomiaData() -``` - -# Introduction - -In this vignette, we will explore the *OmopSketch* functions that provide information about individuals characteristics at specific points in time. We will employ `summarisePopulationCharacteristics()` to generate a summary of the demographic details within the database population. Additionally, we will tidy and present the results using `tablePopulationCharacteristics()`, which supports either [gt](https://gt.rstudio.com/) or [flextable](https://davidgohel.github.io/flextable/) for formatting the output. - -## Create a mock cdm - -Before we dive into *OmopSketch* functions, we need first to load the essential packages and create a mock CDM using the Eunomia database. - -```{r, warning=FALSE} -library(dplyr) -library(CDMConnector) -library(DBI) -library(duckdb) -library(OmopSketch) - -# Connect to Eunomia database -con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) -cdm <- CDMConnector::cdmFromCon( - con = con, cdmSchema = "main", writeSchema = "main" -) - -cdm -``` - -# Summarise population characteristics -To start, we will use `summarisePopulationCharacteristics()` function to generate a summarised result object, capturing demographic characteristics at both `observation_period_start_date` and `observation_period_end_date`. - - -```{r, warning=FALSE} -summarisedResult <- summarisePopulationCharacteristics(cdm) - -summarisedResult |> glimpse() -``` -To tidy and display the summarised result using a [gt](https://gt.rstudio.com/) table, we can use `tablePopulationCharacteristics()` function. -```{r, warning=FALSE} -summarisedResult |> - tablePopulationCharacteristics(type = "flextable") -``` -To obtain a [flextable](https://davidgohel.github.io/flextable/) instead of a [gt](https://gt.rstudio.com/), you can simply change the `type` argument to `"flextable"`. Additionally, it is important to note that age at start, prior observation, and future observation are calculated at the start date defined (in this case, at individuals observation_period_start_date). On the other hand, age at end is calculated at the end date defined (i.e., individuals observation_period_end_date). - -## Trim study period -To focus on a specific period within the observation data, rather than analysing the entire individuals' observation period, we can trim the study period by using the `studyPeriod` argument. This allows to analyse the demographic metrics within a defined time range rather than the default observation start and end dates. -```{r, warning=FALSE} -summarisePopulationCharacteristics(cdm, - studyPeriod = c("1950-01-01", "1999-12-31")) |> - tablePopulationCharacteristics() -``` - -However, if you are interested in analysing the demographic characteristics starting from a specific date without restricting the study end, you can define just the start of the study period. By default, `summarisePopulationCharacteristics()` function will use the observation_period_end_date to calculate the end-point statistics when the end date is not defined. - -```{r, warning=FALSE} -summarisePopulationCharacteristics(cdm, - studyPeriod = c("1950-01-01", NA)) |> - tablePopulationCharacteristics() -``` - -Similarly, if you are only interested in analysing the population characteristics up to a specific end date, you can define only the end date and set the `startDate = NA`. By default the observation_period_start_date will be used. - -## Stratify by age groups and sex -Population characteristics can also be estimated by stratifying the data based on age and sex using `ageGroups` and `sex` arguments. -```{r, warning=FALSE} -summarisePopulationCharacteristics(cdm, - sex = TRUE, - ageGroup = list("<60" = c(0,59), ">=60" = c(60, Inf))) |> - tablePopulationCharacteristics() -``` - From f7b509a93c473be9cd3af54e71844e8451a52fd2 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Tue, 12 Nov 2024 21:09:23 +0000 Subject: [PATCH 3/8] Update B-summarise_concept_counts.Rmd --- vignettes/B-summarise_concept_counts.Rmd | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/vignettes/B-summarise_concept_counts.Rmd b/vignettes/B-summarise_concept_counts.Rmd index 673e5226..81d66a51 100644 --- a/vignettes/B-summarise_concept_counts.Rmd +++ b/vignettes/B-summarise_concept_counts.Rmd @@ -39,7 +39,6 @@ library(CDMConnector) library(DBI) library(duckdb) library(OmopSketch) -library(CodelistGenerator) # Connect to Eunomia database con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir()) @@ -52,25 +51,12 @@ cdm # Summarise concept counts -First, let's generate a list of codes for the concept `dementia` using [CodelistGenerator](https://darwin-eu.github.io/CodelistGenerator/index.html) package. +First, let's generate a list of codes for the concept `acetaminophen` and `sinusitis. ```{r, warning=FALSE} -acetaminophen <- getCandidateCodes( - cdm = cdm, - keywords = "acetaminophen", - domains = "Drug", - includeDescendants = TRUE -) |> - dplyr::pull("concept_id") - -sinusitis <- getCandidateCodes( - cdm = cdm, - keywords = "sinusitis", - domains = "Condition", - includeDescendants = TRUE -) |> - dplyr::pull("concept_id") +acetaminophen <- c(1125315,1127078, 1127433, 19133768, 40229134, 40231925, 40162522) +sinusitis <- c(4294548, 40481087, 4283893, 257012) ``` Now we want to explore the occurrence of these concepts within the database. For that, we can use `summariseConceptCounts()` from OmopSketch: From 1c84bee78109dd61ba96c74eee31c7d6dfd34ee1 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 13 Nov 2024 10:28:20 +0000 Subject: [PATCH 4/8] V012 cran --- DESCRIPTION | 52 +++++++----------- NAMESPACE | 4 -- README.Rmd | 111 -------------------------------------- cran-comments.md | 8 --- man/OmopSketch-package.Rd | 1 - 5 files changed, 18 insertions(+), 158 deletions(-) delete mode 100644 README.Rmd delete mode 100644 cran-comments.md diff --git a/DESCRIPTION b/DESCRIPTION index 5f459c8c..5063cff4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,43 +30,27 @@ Description: Summarises key information in data mapped to the Observational to obtain feasibility counts and trends. License: Apache License (>= 2) Encoding: UTF-8 -Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Suggests: - CodelistGenerator, - DBI, - here, - knitr, - odbc, - remotes, - rmarkdown, - RPostgres, - testthat (>= 3.0.0), - withr +Suggests: CodelistGenerator, DBI, here, knitr, odbc, remotes, + rmarkdown, RPostgres, testthat (>= 3.0.0), withr Config/testthat/edition: 3 Config/testthat/parallel: true -Imports: - CDMConnector (>= 1.3.0), - cli, - clock, - gt, - flextable, - CohortCharacteristics (>= 0.3.0), - CohortConstructor (>= 0.3.1), - dplyr, - ggplot2, - omock (>= 0.3.0), - omopgenerics (>= 0.3.1), - PatientProfiles (>= 1.2.0), - purrr, - rlang, - stringr, - tibble, - tidyr, - visOmopResults (>= 0.4.0), - duckdb -Depends: - R (>= 2.10) +Imports: CDMConnector (>= 1.3.0), cli, clock, gt, flextable, + CohortCharacteristics (>= 0.3.0), CohortConstructor (>= 0.3.1), + dplyr, ggplot2, omock (>= 0.3.0), omopgenerics (>= 0.3.1), + PatientProfiles (>= 1.2.0), purrr, rlang, stringr, tibble, + tidyr, visOmopResults (>= 0.4.0), duckdb +Depends: R (>= 2.10) URL: https://OHDSI.github.io/OmopSketch/ BugReports: https://github.com/OHDSI/OmopSketch/issues VignetteBuilder: knitr +NeedsCompilation: no +Packaged: 2024-11-12 21:14:36 UTC; martaa +Author: Marta Alcalde-Herraiz [aut, cre] + (), + Kim Lopez-Guell [aut] (), + Elin Rowlands [aut] (), + Edward Burn [aut] (), + Martí Català [aut] () +Repository: CRAN +Date/Publication: 2024-11-12 21:40:07 UTC diff --git a/NAMESPACE b/NAMESPACE index cfce03e4..072eb118 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,19 +9,15 @@ export(plotInObservation) export(plotObservationPeriod) export(plotRecordCount) export(settings) -export(summariseAllConceptCounts) export(summariseClinicalRecords) export(summariseConceptCounts) export(summariseInObservation) -export(summariseMissingData) export(summariseObservationPeriod) export(summariseOmopSnapshot) export(summarisePopulationCharacteristics) export(summariseRecordCount) export(suppress) -export(tableAllConceptCounts) export(tableClinicalRecords) -export(tableMissingData) export(tableObservationPeriod) export(tableOmopSnapshot) export(tablePopulationCharacteristics) diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index 8a692bee..00000000 --- a/README.Rmd +++ /dev/null @@ -1,111 +0,0 @@ ---- -output: github_document ---- - - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) -``` - -# OmopSketch OmopSketch website - - -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![R-CMD-check](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OHDSI/OmopSketch/actions/workflows/R-CMD-check.yaml) -[![CRAN status](https://www.r-pkg.org/badges/version/OmopSketch)](https://CRAN.R-project.org/package=OmopSketch) -[![Codecov test coverage](https://codecov.io/gh/OHDSI/OmopSketch/branch/main/graph/badge.svg)](https://app.codecov.io/gh/OHDSI/OmopSketch?branch=main) - - -### WARNING: this package is under-development and has only been tested using mock data - -The goal of OmopSketch is to characterise and visualise an OMOP CDM instance to asses if it meets the necessary criteria to answer a specific clinical question and conduct a certain study. - -## Installation - -You can install the development version of OmopSketch from [GitHub](https://github.com/) with: - -``` r -# install.packages("remotes") -remotes::install_github("OHDSI/OmopSketch") -``` - -## Example - -Let's start by creating a cdm object using the Eunomia mock dataset: - -```{r, message=TRUE, warning=FALSE} -library(duckdb) -library(CDMConnector) -library(dplyr, warn.conflicts = FALSE) -library(OmopSketch) -con <- dbConnect(duckdb(), eunomia_dir()) -cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main") -cdm -``` -### Snapshot -We first create a snapshot of our database. This will allow us to track when the analysis has been conducted and capture details about the CDM version or the data release. -```{r} -summariseOmopSnapshot(cdm) |> - tableOmopSnapshot(type = "flextable") -``` - - -### Characterise the clinical tables -Once we have collected the snapshot information, we can start characteristing the clinical tables of the CDM. By using `summariseClinicalRecords()` and `tableClinicalRecords()`, we can easily visualise the main characteristics of specific clinical tables. - -```{r} -summariseClinicalRecords(cdm, c("condition_occurrence", "drug_exposure")) |> - tableClinicalRecords(type = "flextable") -``` - -We can also explore trends in the clinical table records over time. - -```{r} -summariseRecordCount(cdm, c("condition_occurrence", "drug_exposure")) |> - plotRecordCount(facet = "omop_table") -``` -### Characterise the observation period -After visualising the main characteristics of our clinical tables, we can explore the observation period details. OmopSketch provides several functions to have an overwied of the dataset study period. - -Using `summariseInObservation()` and `plotInObservation()`, we can gather information on the number of records per year. - -```{r} -summariseInObservation(cdm$observation_period, output = "records") |> - plotInObservation() -``` -You can also visualise and explore the characteristics of the observation period per each individual in the database using `summariseObservationPeriod()`. -```{r} -summariseObservationPeriod(cdm$observation_period) |> - tableObservationPeriod(type = "flextable") -``` - -Or if visualisation is prefered, you can easily build a histogram to explore how many participants have more than one observation period. -```{r} -summariseObservationPeriod(cdm$observation_period) |> - plotObservationPeriod() -``` - -### Characterise the concepts -OmopSketch also provides functions to explore some of (or all) the concepts in the dataset. -```{r} -acetaminophen <- c(1125315, 1127433, 1127078) - -summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen)) |> - filter(estimate_name == "record_count") |> - plotConceptCounts() -``` - -### Characterise the population -Finally, OmopSketch can also help us to characterise the population at the start and end of the observation period. -```{r} -summarisePopulationCharacteristics(cdm) |> - tablePopulationCharacteristics(type = "flextable") -``` -As seen, OmopSketch offers multiple functionalities to provide a general overview of a database. Additionally, it includes more tools and arguments that allow for deeper exploration, helping to assess the database's suitability for specific research studies. For further information, please refer to the vignettes. - diff --git a/cran-comments.md b/cran-comments.md deleted file mode 100644 index 12324b7b..00000000 --- a/cran-comments.md +++ /dev/null @@ -1,8 +0,0 @@ -## R CMD check results - -0 errors | 0 warnings | 1 note - -* This is a new release. -* We do not cite any reference. -* OMOP is the name of the common data model that we use and it is described in -the description diff --git a/man/OmopSketch-package.Rd b/man/OmopSketch-package.Rd index 8d1c0d01..0bc315f4 100644 --- a/man/OmopSketch-package.Rd +++ b/man/OmopSketch-package.Rd @@ -25,7 +25,6 @@ Authors: \itemize{ \item Kim Lopez-Guell \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) \item Elin Rowlands \email{elin.rowlands@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0005-5166-0417}{ORCID}) - \item Cecilia Campanile \email{cecilia.campanile@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0007-6629-4661}{ORCID}) \item Edward Burn \email{edward.burn@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-9286-1128}{ORCID}) \item Martí Català \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) } From 94274891525a2780c0301f885b357aca37ec608f Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 13 Nov 2024 10:31:46 +0000 Subject: [PATCH 5/8] Update _pkgdown.yml --- _pkgdown.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 6f67a4b6..ff4fccf4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -30,8 +30,8 @@ reference: - contents: - summariseConceptCounts - plotConceptCounts - - summariseAllConceptCounts - - tableAllConceptCounts + - summarisePopulationCharacteristics + - tablePopulationCharacteristics - subtitle: Mock Database desc: Create a mock database to test the OmopSketch package - contents: From 492772a0c7049fe9bef4ed7c5af11431e9ed2a4d Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 13 Nov 2024 10:35:20 +0000 Subject: [PATCH 6/8] v012 cran --- R/summariseAllConceptCounts.R | 187 ---------------------------------- R/tableAllConceptCounts.R | 45 -------- 2 files changed, 232 deletions(-) delete mode 100644 R/summariseAllConceptCounts.R delete mode 100644 R/tableAllConceptCounts.R diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R deleted file mode 100644 index e7df70e9..00000000 --- a/R/summariseAllConceptCounts.R +++ /dev/null @@ -1,187 +0,0 @@ - -my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){ - - strata <- as.character() - - if(!is.null(ageGroup)){ - strata <- append(strata, "age_group") - } - - if(sex){ - strata <- append(strata, "sex") - } - if(year){ - strata <- append(strata, "year") - } - return(strata) -} - - -checkFeasibility <- function(omopTable, tableName, conceptId){ - - if (omopgenerics::isTableEmpty(omopTable)){ - cli::cli_warn(paste0(tableName, " omop table is empty.")) - return(NULL) - } - - if (is.na(conceptId)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - - y <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) - - if (omopgenerics::isTableEmpty(y)){ - cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts.")) - return(NULL) - } - return(TRUE) -} - -#' Summarise concept use in patient-level data -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param countBy Either "record" for record-level counts or "person" for -#' person-level counts -#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export -summariseAllConceptCounts <- function(cdm, - omopTableName, - countBy = "record", - year = FALSE, - sex = FALSE, - ageGroup = NULL){ - - omopgenerics::validateCdmArgument(cdm) - checkCountBy(countBy) - omopgenerics::assertLogical(year, length = 1) - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup) - - stratification <- omopgenerics::combineStrata(strata) - - result_tables <- purrr::map(omopTableName, function(table){ - - - - - omopTable <- cdm[[table]] |> - dplyr::ungroup() - - - conceptId <- standardConcept(omopgenerics::tableName(omopTable)) - - if (is.null(checkFeasibility(omopTable, table, conceptId))){ - return(NULL) - } - - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - - x <- omopTable |> - dplyr::filter(!is.na(.data[[conceptId]])) |> - dplyr::left_join( - cdm$concept |> dplyr::select("concept_id", "concept_name"), - by = stats::setNames("concept_id", conceptId)) |> - PatientProfiles::addDemographicsQuery(age = FALSE, - ageGroup = ageGroup, - sex = sex, - indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - level <- c(conceptId, "concept_name") - - groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x))) - - result <- list() - if ("record" %in% countBy){ - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - - - grouped_results <- purrr::map(groupings, \(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop") - - }) - - result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate("estimate_name" = "record_count") - result<-dplyr::bind_rows(result,result_record) - } - - if ("person" %in% countBy){ - - grouped_results <- purrr::map(groupings, \(g) { - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|> - dplyr::collect() - }) - - result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |> - dplyr::mutate("estimate_name" = "person_count") - result<-dplyr::bind_rows(result,result_person) - } - result<- result |> - dplyr::mutate("omop_table" = table, - "variable_level" = as.character(.data[[conceptId]])) |> - - dplyr::select(-dplyr::all_of(conceptId)) - return(result) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - sr <-purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm) - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer" - ) |> - dplyr::rename("variable_name" = "concept_name") - # |> - # dplyr::select(!c()) - - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_all_concept_counts" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - return(sr) - -} - diff --git a/R/tableAllConceptCounts.R b/R/tableAllConceptCounts.R deleted file mode 100644 index 57e7e1af..00000000 --- a/R/tableAllConceptCounts.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Create a visual table from a summariseAllConceptCounts() result. -#' @param result A summarised_result object. -#' @param type Type of formatting output table, either "gt" or "flextable". -#' @return A gt or flextable object with the summarised data. -#' @export -#' -#' -tableAllConceptCounts <- function(result, - type = "gt") { - # initial checks - omopgenerics::validateResultArgument(result) - omopgenerics::assertChoice(type, choicesTables()) - - # subset to result_type of interest - result <- result |> - visOmopResults::filterSettings( - .data$result_type == "summarise_all_concept_counts") - - # check if it is empty - if (nrow(result) == 0) { - warnEmpty("summarise_all_concept_counts") - return(emptyTable(type)) - } - - estimate_names <- result |> - dplyr::distinct(.data$estimate_name) |> - dplyr::pull() - estimateName <- c() - if ("record_count" %in% estimate_names) { - estimateName <- c(estimateName, "N records" = "") - } - if ("person_count" %in% estimate_names) { - estimateName <- c(estimateName, "N persons" = "") - } - - result |> - formatColumn(c("variable_name", "variable_level")) |> - visOmopResults::visOmopTable( - type = type, - estimateName = estimateName, - header = c("cdm_name"), - rename = c("Database name" = "cdm_name"), - groupColumn = c("omop_table", visOmopResults::strataColumns(result)) - ) -} From 0e1357ce599b29b2e8045feb5799a7ee98cee215 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 13 Nov 2024 10:46:59 +0000 Subject: [PATCH 7/8] Updtae --- NAMESPACE | 2 ++ man/plotConceptCounts.Rd | 4 ++-- man/plotInObservation.Rd | 4 ++-- man/plotObservationPeriod.Rd | 4 ++-- man/plotRecordCount.Rd | 4 ++-- man/summariseAllConceptCounts.Rd | 38 ------------------------------- man/summariseMissingData.Rd | 2 +- man/summariseObservationPeriod.Rd | 4 ++-- man/tableAllConceptCounts.Rd | 19 ---------------- 9 files changed, 13 insertions(+), 68 deletions(-) delete mode 100644 man/summariseAllConceptCounts.Rd delete mode 100644 man/tableAllConceptCounts.Rd diff --git a/NAMESPACE b/NAMESPACE index 072eb118..52da4218 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,12 +12,14 @@ export(settings) export(summariseClinicalRecords) export(summariseConceptCounts) export(summariseInObservation) +export(summariseMissingData) export(summariseObservationPeriod) export(summariseOmopSnapshot) export(summarisePopulationCharacteristics) export(summariseRecordCount) export(suppress) export(tableClinicalRecords) +export(tableMissingData) export(tableObservationPeriod) export(tableOmopSnapshot) export(tablePopulationCharacteristics) diff --git a/man/plotConceptCounts.Rd b/man/plotConceptCounts.Rd index dc5387f8..aabae5d1 100644 --- a/man/plotConceptCounts.Rd +++ b/man/plotConceptCounts.Rd @@ -10,10 +10,10 @@ plotConceptCounts(result, facet = NULL, colour = NULL) \item{result}{A summarised_result object (output of summariseConceptCounts).} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot2 object showing the concept counts. diff --git a/man/plotInObservation.Rd b/man/plotInObservation.Rd index a16a84c6..c5a6941b 100644 --- a/man/plotInObservation.Rd +++ b/man/plotInObservation.Rd @@ -10,10 +10,10 @@ plotInObservation(result, facet = NULL, colour = NULL) \item{result}{A summarised_result object (output of summariseInObservation).} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot showing the table counts diff --git a/man/plotObservationPeriod.Rd b/man/plotObservationPeriod.Rd index 543f468a..86a701a4 100644 --- a/man/plotObservationPeriod.Rd +++ b/man/plotObservationPeriod.Rd @@ -22,10 +22,10 @@ plotObservationPeriod( "densityplot".} \item{facet}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot2 object. diff --git a/man/plotRecordCount.Rd b/man/plotRecordCount.Rd index b539d1cb..b80afbda 100644 --- a/man/plotRecordCount.Rd +++ b/man/plotRecordCount.Rd @@ -10,10 +10,10 @@ plotRecordCount(result, facet = NULL, colour = NULL) \item{result}{Output from summariseRecordCount().} \item{facet}{Columns to face by. Formula format can be provided. See possible -columns to face by with: \code{visOmopResults::tidyColumns()}.} +columns to face by with: `visOmopResults::tidyColumns()`.} \item{colour}{Columns to colour by. See possible columns to colour by with: -\code{visOmopResults::tidyColumns()}.} +`visOmopResults::tidyColumns()`.} } \value{ A ggplot showing the table counts diff --git a/man/summariseAllConceptCounts.Rd b/man/summariseAllConceptCounts.Rd deleted file mode 100644 index 6437a86f..00000000 --- a/man/summariseAllConceptCounts.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summariseAllConceptCounts.R -\name{summariseAllConceptCounts} -\alias{summariseAllConceptCounts} -\title{Summarise concept use in patient-level data} -\usage{ -summariseAllConceptCounts( - cdm, - omopTableName, - countBy = "record", - year = FALSE, - sex = FALSE, - ageGroup = NULL -) -} -\arguments{ -\item{cdm}{A cdm object} - -\item{omopTableName}{A character vector of the names of the tables to -summarise in the cdm object.} - -\item{countBy}{Either "record" for record-level counts or "person" for -person-level counts} - -\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} - -\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} - -\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be -thus summarised by age groups.} -} -\value{ -A summarised_result object with results overall and, if specified, by -strata. -} -\description{ -Summarise concept use in patient-level data -} diff --git a/man/summariseMissingData.Rd b/man/summariseMissingData.Rd index b5aeb9a1..05ebff53 100644 --- a/man/summariseMissingData.Rd +++ b/man/summariseMissingData.Rd @@ -20,7 +20,7 @@ summariseMissingData( summarise in the cdm object.} \item{col}{A character vector of column names to check for missing values. -If \code{NULL}, all columns in the specified tables are checked. Default is \code{NULL}.} +If `NULL`, all columns in the specified tables are checked. Default is `NULL`.} \item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} diff --git a/man/summariseObservationPeriod.Rd b/man/summariseObservationPeriod.Rd index 14ad95c3..26763dc4 100644 --- a/man/summariseObservationPeriod.Rd +++ b/man/summariseObservationPeriod.Rd @@ -17,8 +17,8 @@ summariseObservationPeriod( \item{observationPeriod}{observation_period omop table.} \item{estimates}{Estimates to summarise the variables of interest ( -\verb{records per person}, \verb{duration in days} and -\verb{days to next observation period}).} +`records per person`, `duration in days` and +`days to next observation period`).} \item{ageGroup}{A list of age groups to stratify results by.} diff --git a/man/tableAllConceptCounts.Rd b/man/tableAllConceptCounts.Rd deleted file mode 100644 index a3ce8187..00000000 --- a/man/tableAllConceptCounts.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tableAllConceptCounts.R -\name{tableAllConceptCounts} -\alias{tableAllConceptCounts} -\title{Create a visual table from a summariseAllConceptCounts() result.} -\usage{ -tableAllConceptCounts(result, type = "gt") -} -\arguments{ -\item{result}{A summarised_result object.} - -\item{type}{Type of formatting output table, either "gt" or "flextable".} -} -\value{ -A gt or flextable object with the summarised data. -} -\description{ -Create a visual table from a summariseAllConceptCounts() result. -} From 46bc63a204e09374c21788a3d684cc416124cb46 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Wed, 13 Nov 2024 11:02:29 +0000 Subject: [PATCH 8/8] Update --- NAMESPACE | 2 - R/summariseMissingData.R | 156 --------------------- R/tableMissingData.R | 36 ----- _pkgdown.yml | 2 - man/summariseMissingData.Rd | 38 ----- man/tableMissingData.Rd | 19 --- tests/testthat/test-summariseMissingData.R | 54 ------- 7 files changed, 307 deletions(-) delete mode 100644 R/summariseMissingData.R delete mode 100644 R/tableMissingData.R delete mode 100644 man/summariseMissingData.Rd delete mode 100644 man/tableMissingData.Rd delete mode 100644 tests/testthat/test-summariseMissingData.R diff --git a/NAMESPACE b/NAMESPACE index 52da4218..072eb118 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,14 +12,12 @@ export(settings) export(summariseClinicalRecords) export(summariseConceptCounts) export(summariseInObservation) -export(summariseMissingData) export(summariseObservationPeriod) export(summariseOmopSnapshot) export(summarisePopulationCharacteristics) export(summariseRecordCount) export(suppress) export(tableClinicalRecords) -export(tableMissingData) export(tableObservationPeriod) export(tableOmopSnapshot) export(tablePopulationCharacteristics) diff --git a/R/summariseMissingData.R b/R/summariseMissingData.R deleted file mode 100644 index 6b9dc0dd..00000000 --- a/R/summariseMissingData.R +++ /dev/null @@ -1,156 +0,0 @@ -#' Summarise missing data in omop tables -#' -#' @param cdm A cdm object -#' @param omopTableName A character vector of the names of the tables to -#' summarise in the cdm object. -#' @param col A character vector of column names to check for missing values. -#' If `NULL`, all columns in the specified tables are checked. Default is `NULL`. -#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. -#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. -#' @param ageGroup A list of ageGroup vectors of length two. Code use will be -#' thus summarised by age groups. -#' @return A summarised_result object with results overall and, if specified, by -#' strata. -#' @export -summariseMissingData <- function(cdm, - omopTableName, - col = NULL, - sex = FALSE, - year = FALSE, - ageGroup = NULL){ - - - omopgenerics::validateCdmArgument(cdm) - - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE) - - - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - strata <- my_getStrataList(sex = sex, ageGroup = ageGroup, year = year) - stratification <- c(list(character()),omopgenerics::combineStrata(strata)) - - result_tables <- purrr::map(omopTableName, function(table) { - - if (omopgenerics::isTableEmpty(cdm[[table]])){ - cli::cli_warn(paste0(table, " omop table is empty.")) - return(NULL) - } - - omopTable <- cdm[[table]] - col_table <- intersect(col, colnames(omopTable)) - if (is.null(col_table) | rlang::is_empty(col_table)){ - col_table<-colnames(omopTable) - } - - indexDate <- startDate(omopgenerics::tableName(omopTable)) - x <- omopTable |> PatientProfiles::addDemographicsQuery(age = FALSE, ageGroup = ageGroup, sex = sex, indexDate = indexDate) - if (year){ - x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]]))) - } - - result_columns <- purrr::map(col_table, function(c) { - - stratified_result <- x |> - dplyr::group_by(dplyr::across(dplyr::all_of(strata))) |> - dplyr::summarise( - na_count = sum(as.integer(is.na(.data[[c]])), na.rm = TRUE), - total_count = dplyr::n(), - .groups = "drop" - ) |> - dplyr::collect() - - # Group results for each level of stratification - grouped_results <- purrr::map(stratification, function(g) { - stratified_result |> - dplyr::group_by(dplyr::across(dplyr::all_of(g))) |> - dplyr::summarise( - na_count = sum(.data$na_count, na.rm = TRUE), - total_count = sum(.data$total_count, na.rm = TRUE), - colName = c, - .groups = "drop" - ) |> - dplyr::mutate(na_percentage = dplyr::if_else(.data$total_count > 0, (.data$na_count / .data$total_count) * 100, 0)) - }) - - return(purrr::reduce(grouped_results, dplyr::bind_rows)) - - }) - - res <- purrr::reduce(result_columns, dplyr::union)|> - dplyr::mutate(omop_table = table) - - warningDataRequire(cdm = cdm, res = res, table = table) - - return(res) - }) - if (rlang::is_empty(purrr::compact(result_tables))){ - return(omopgenerics::emptySummarisedResult()) - } - - - result <- purrr::compact(result_tables) |> - purrr::reduce(dplyr::union)|> - dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|> - dplyr::mutate( - na_count = as.double(.data$na_count), # Cast na_count to double - na_percentage = as.double(.data$na_percentage) - )|> - tidyr::pivot_longer( - cols = c(.data$na_count, .data$na_percentage), - names_to = "estimate_name", - values_to = "estimate_value" - ) - - - sr <- result |> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm), - ) |> - visOmopResults::uniteGroup(cols = "omop_table") |> - visOmopResults::uniteStrata(cols = strata) |> - visOmopResults::uniteAdditional() |> - dplyr::mutate( - "estimate_value" = as.character(.data$estimate_value), - "estimate_type" = "integer", - "variable_level" = NA_character_ - ) |> - dplyr::rename("variable_name" = "colName") |> - dplyr::select(!c(.data$total_count)) - - settings <- dplyr::tibble( - result_id = unique(sr$result_id), - package_name = "omopSketch", - package_version = as.character(utils::packageVersion("OmopSketch")), - result_type = "summarise_missing_data" - ) - sr <- sr |> - omopgenerics::newSummarisedResult(settings = settings) - - - return(sr) - -} - -warningDataRequire <- function(cdm, table, res){ -required_cols <- omopgenerics::omopTableFields(CDMConnector::cdmVersion(cdm))|> - dplyr::filter(.data$cdm_table_name==table)|> - dplyr::filter(.data$is_required==TRUE)|> - dplyr::pull(.data$cdm_field_name) -warning_columns <- res |> - dplyr::filter(.data$colName %in% required_cols)|> - dplyr::filter(.data$na_count>0)|> - dplyr::distinct(.data$colName)|> - dplyr::pull() - -if (length(warning_columns) > 0) { - cli::cli_warn(c( - "These columns contain missing values, which are not permitted:", - "{.val {warning_columns}}" - )) -} -} - - diff --git a/R/tableMissingData.R b/R/tableMissingData.R deleted file mode 100644 index 60dfd276..00000000 --- a/R/tableMissingData.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Create a visual table from a summariseMissingData() result. -#' @param result A summarised_result object. -#' @param type Type of formatting output table, either "gt" or "flextable". -#' @return A gt or flextable object with the summarised data. -#' @export -#' -#' -tableMissingData <- function(result, - type = "gt") { - # initial checks - omopgenerics::validateResultArgument(result) - omopgenerics::assertChoice(type, choicesTables()) - - # subset to result_type of interest - result <- result |> - visOmopResults::filterSettings( - .data$result_type == "summarise_missing_data") - - # check if it is empty - if (nrow(result) == 0) { - warnEmpty("summarise_missing_data") - return(emptyTable(type)) - } - - result |> - formatColumn(c("variable_name", "variable_level")) |> - visOmopResults::visOmopTable( - type = type, - estimateName = c( - "N (%)" = " (%)", - "N" = ""), - header = c("cdm_name"), - rename = c("Database name" = "cdm_name"), - groupColumn = c("omop_table", visOmopResults::strataColumns(result)) - ) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index ff4fccf4..44574c60 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,8 +15,6 @@ reference: - tableClinicalRecords - summariseRecordCount - plotRecordCount - - summariseMissingData - - tableMissingData - subtitle: Observation Periods desc: Summarise and plot the observation period table in the OMOP Common Data Model - contents: diff --git a/man/summariseMissingData.Rd b/man/summariseMissingData.Rd deleted file mode 100644 index 05ebff53..00000000 --- a/man/summariseMissingData.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summariseMissingData.R -\name{summariseMissingData} -\alias{summariseMissingData} -\title{Summarise missing data in omop tables} -\usage{ -summariseMissingData( - cdm, - omopTableName, - col = NULL, - sex = FALSE, - year = FALSE, - ageGroup = NULL -) -} -\arguments{ -\item{cdm}{A cdm object} - -\item{omopTableName}{A character vector of the names of the tables to -summarise in the cdm object.} - -\item{col}{A character vector of column names to check for missing values. -If `NULL`, all columns in the specified tables are checked. Default is `NULL`.} - -\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} - -\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} - -\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be -thus summarised by age groups.} -} -\value{ -A summarised_result object with results overall and, if specified, by -strata. -} -\description{ -Summarise missing data in omop tables -} diff --git a/man/tableMissingData.Rd b/man/tableMissingData.Rd deleted file mode 100644 index 59ac5077..00000000 --- a/man/tableMissingData.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tableMissingData.R -\name{tableMissingData} -\alias{tableMissingData} -\title{Create a visual table from a summariseMissingData() result.} -\usage{ -tableMissingData(result, type = "gt") -} -\arguments{ -\item{result}{A summarised_result object.} - -\item{type}{Type of formatting output table, either "gt" or "flextable".} -} -\value{ -A gt or flextable object with the summarised data. -} -\description{ -Create a visual table from a summariseMissingData() result. -} diff --git a/tests/testthat/test-summariseMissingData.R b/tests/testthat/test-summariseMissingData.R deleted file mode 100644 index da24dbd9..00000000 --- a/tests/testthat/test-summariseMissingData.R +++ /dev/null @@ -1,54 +0,0 @@ -test_that("summariseMissingData() works", { - skip_on_cran() - # Load mock database ---- - cdm <- cdmEunomia() - - # Check all tables work ---- - expect_true(inherits(summariseMissingData(cdm, "drug_exposure"),"summarised_result")) - expect_no_error(y<-summariseMissingData(cdm, "observation_period")) - expect_no_error(x<-summariseMissingData(cdm, "visit_occurrence")) - expect_no_error(summariseMissingData(cdm, "condition_occurrence")) - expect_no_error(summariseMissingData(cdm, "drug_exposure")) - - expect_no_error(summariseMissingData(cdm, "procedure_occurrence", year = TRUE)) - expect_warning(summariseMissingData(cdm, "device_exposure")) - expect_no_error(z<-summariseMissingData(cdm, "measurement")) - expect_no_error(s<-summariseMissingData(cdm, "observation")) - - expect_warning(summariseMissingData(cdm, "death")) - - - expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement"))) - expect_equal(all, dplyr::bind_rows(y, x, z)) - expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[['observation']]))) - x<-summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date") - - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = "procedure_date"), dplyr::bind_rows(x,s)) - y<-summariseMissingData(cdm, "observation",col = "observation_date") - expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x,y)) - - # Check inputs ---- - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id")|> - dplyr::select(estimate_value)|> - dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> - dplyr::summarise(sum = sum(estimate_value)) |> - dplyr::pull() == 0) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", sex = TRUE, ageGroup = list(c(0,50), c(51,Inf)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==9) - - expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", ageGroup = list(c(0,50)))|> - dplyr::distinct(.data$strata_level)|> - dplyr::tally()|> - dplyr::pull()==3) - - cdm$procedure_occurrence <- cdm$procedure_occurrence |> - dplyr::mutate(procedure_concept_id = NA_integer_) |> - dplyr::compute(name = "procedure_occurrence", temporary = FALSE) - - expect_warning(summariseMissingData(cdm, "procedure_occurrence", col="procedure_concept_id", ageGroup = list(c(0,50)))) - - PatientProfiles::mockDisconnect(cdm = cdm) -})