From 41a68860b77386c4672d485de372719e45f46593 Mon Sep 17 00:00:00 2001 From: cecicampanile Date: Tue, 17 Dec 2024 12:31:28 +0000 Subject: [PATCH 1/3] argument sample added test is missing --- R/summariseAllConceptCounts.R | 3 ++- R/summariseClinicalRecords.R | 5 ++++- R/summariseConceptSetCounts.R | 42 ++++++++++++++++++++++------------- R/summariseRecordCount.R | 13 ++++++++--- R/utilities.R | 13 +++++++++++ 5 files changed, 55 insertions(+), 21 deletions(-) diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R index 36d0e12..e08570f 100644 --- a/R/summariseAllConceptCounts.R +++ b/R/summariseAllConceptCounts.R @@ -63,6 +63,7 @@ summariseAllConceptCounts <- function(cdm, year = FALSE, sex = FALSE, ageGroup = NULL, + sample = 1000000, dateRange = NULL){ omopgenerics::validateCdmArgument(cdm) @@ -93,7 +94,7 @@ summariseAllConceptCounts <- function(cdm, } omopTable <- restrictStudyPeriod(omopTable, dateRange) - + omopTable <- sampleOmopTable(omopTable, sample) indexDate <- startDate(omopgenerics::tableName(omopTable)) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index a9ee5e6..dc529e9 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -55,6 +55,7 @@ summariseClinicalRecords <- function(cdm, typeConcept = TRUE, sex = FALSE, ageGroup = NULL, + sample = 1000000, dateRange = NULL) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) @@ -93,6 +94,7 @@ summariseClinicalRecords <- function(cdm, typeConcept = typeConcept, sex = sex, ageGroup = ageGroup, + sample = sample, dateRange = dateRange ) }) |> @@ -112,6 +114,7 @@ summariseClinicalRecord <- function(omopTableName, typeConcept, sex, ageGroup, + sample, dateRange, call = parent.frame(3)) { @@ -124,8 +127,8 @@ summariseClinicalRecord <- function(omopTableName, omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() - omopTable <- restrictStudyPeriod(omopTable, dateRange) + omopTable <- sampleOmopTable(omopTable, sample) if(omopgenerics::isTableEmpty(omopTable)) { return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_clinical_records", study_period = dateRange))) } diff --git a/R/summariseConceptSetCounts.R b/R/summariseConceptSetCounts.R index 71e0967..75123c3 100644 --- a/R/summariseConceptSetCounts.R +++ b/R/summariseConceptSetCounts.R @@ -37,6 +37,7 @@ summariseConceptSetCounts <- function(cdm, interval = "overall", sex = FALSE, ageGroup = NULL, + sample = 1000000, dateRange = NULL){ omopgenerics::validateCdmArgument(cdm) @@ -74,6 +75,7 @@ summariseConceptSetCounts <- function(cdm, unitInterval = unitInterval, sex = sex, ageGroup = ageGroup, + sample = sample, dateRange = dateRange) Sys.sleep(i/length(conceptSet)) cli::cli_progress_update() @@ -107,6 +109,7 @@ getCodeUse <- function(x, unitInterval, sex, ageGroup, + sample, dateRange, call = parent.frame()){ @@ -150,7 +153,9 @@ getCodeUse <- function(x, records <- getRelevantRecords(cdm = cdm, tableCodelist = tableCodelist, intermediateTable = intermediateTable, - tablePrefix = tablePrefix) + tablePrefix = tablePrefix, + sample = sample, dateRange = dateRange) + if(is.null(records)){ cc <- dplyr::tibble() cli::cli_inform(c( @@ -159,17 +164,16 @@ getCodeUse <- function(x, return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange))) } - if (!is.null(dateRange)) - { - records <- records |> - dplyr::filter( - as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2] - ) - if (is.null(warningEmptyStudyPeriod(records))){ - return(tibble::tibble()) - } - - } + # if (!is.null(dateRange)) + # { + # records <- records |> + # dplyr::filter( + # as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2] + # ) + # if (is.null(warningEmptyStudyPeriod(records))){ + # return(tibble::tibble()) + # } + # } records <- addStrataToOmopTable(records, "date", ageGroup, sex) strata <- getStrataList(sex, ageGroup) @@ -238,7 +242,8 @@ getCodeUse <- function(x, getRelevantRecords <- function(cdm, tableCodelist, intermediateTable, - tablePrefix){ + tablePrefix, + sample, dateRange){ codes <- cdm[[tableCodelist]] |> dplyr::collect() @@ -248,9 +253,11 @@ getRelevantRecords <- function(cdm, dateName <- purrr::discard(unique(codes$start_date), is.na) if(length(tableName)>0){ - codeRecords <- cdm[[tableName[[1]]]] + codeRecords <- cdm[[tableName[[1]]]]|> + restrictStudyPeriod(dateRange)|> + sampleOmopTable(sample) - if(is.null(codeRecords)){return(NULL)} + if(is.null(codeRecords) || omopgenerics::isTableEmpty(codeRecords)){return(NULL)} tableCodes <- paste0(tablePrefix, "table_codes") @@ -290,7 +297,10 @@ getRelevantRecords <- function(cdm, # get for any additional domains and union if(length(tableName) > 1) { for(i in 1:(length(tableName)-1)) { - workingRecords <- cdm[[tableName[[i+1]]]] + workingRecords <- cdm[[tableName[[i+1]]]] |> + restrictStudyPeriod(dateRange)|> + sampleOmopTable(sample) + if(is.null(workingRecords) || omopgenerics::isTableEmpty(workingRecords)){return(NULL)} workingRecords <- workingRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index d929b28..4daec64 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -35,7 +35,9 @@ summariseRecordCount <- function(cdm, interval = "overall", ageGroup = NULL, sex = FALSE, - dateRange = NULL) { + sample = 1000000, + dateRange = NULL + ) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) @@ -48,6 +50,7 @@ summariseRecordCount <- function(cdm, omopgenerics::assertLogical(sex, length = 1) dateRange <- validateStudyPeriod(cdm, dateRange) + result <- purrr::map(omopTableName, function(x) { omopgenerics::assertClass(cdm[[x]], "omop_table", call = parent.frame()) @@ -66,7 +69,9 @@ summariseRecordCount <- function(cdm, original_interval, ageGroup = ageGroup, sex = sex, - dateRange = dateRange) + sample = sample, + dateRange = dateRange + ) } ) |> dplyr::bind_rows() @@ -76,10 +81,12 @@ summariseRecordCount <- function(cdm, #' @noRd summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval, - original_interval, ageGroup, sex, dateRange) { + original_interval, ageGroup, sex, sample, dateRange) { prefix <- omopgenerics::tmpPrefix() omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() + omopTable <- restrictStudyPeriod(omopTable, dateRange) + omopTable <- sampleOmopTable(omopTable, sample) # Create initial variables ---- diff --git a/R/utilities.R b/R/utilities.R index a147af5..53aaa9e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -83,4 +83,17 @@ createSettings <- function(result_type, result_id = 1L, package_name = "OmopSket return(settings) } +sampleOmopTable <- function(omopTable, sample){ + sampling <- !is.null(sample) & !is.infinite(sample) + + if (sampling & omopTable |> dplyr::tally() |> dplyr::pull() <= sample) { + sampling <- FALSE + } + + if (sampling){ + omopTable <- omopTable |> + dplyr::slice_sample(n = sample) + } + return(omopTable) +} From 6948bca0d3a5fb760e2b4796229e3014c9995791 Mon Sep 17 00:00:00 2001 From: cecicampanile Date: Tue, 17 Dec 2024 13:24:57 +0000 Subject: [PATCH 2/3] test --- R/summariseAllConceptCounts.R | 2 ++ R/summariseClinicalRecords.R | 2 ++ R/summariseConceptSetCounts.R | 2 ++ R/summariseRecordCount.R | 2 ++ man/summariseAllConceptCounts.Rd | 4 ++++ man/summariseClinicalRecords.Rd | 4 ++++ man/summariseConceptSetCounts.Rd | 4 ++++ man/summariseRecordCount.Rd | 4 ++++ tests/testthat/test-summariseAllConceptCounts.R | 14 ++++++++++++++ tests/testthat/test-summariseClinicalRecords.R | 15 +++++++++++++++ tests/testthat/test-summariseConceptSetCounts.R | 16 ++++++++++++++++ tests/testthat/test-summariseRecordCount.R | 14 ++++++++++++++ 12 files changed, 83 insertions(+) diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R index e08570f..0f410b7 100644 --- a/R/summariseAllConceptCounts.R +++ b/R/summariseAllConceptCounts.R @@ -52,6 +52,8 @@ checkFeasibility <- function(omopTable, tableName, conceptId){ #' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. #' @param ageGroup A list of ageGroup vectors of length two. Code use will be #' thus summarised by age groups. +#' @param sample An integer to sample the tables to only that number of records. +#' If NULL no sample is done. #' @param dateRange A list containing the minimum and the maximum dates #' defining the time range within which the analysis is performed. #' @return A summarised_result object with results overall and, if specified, by diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index dc529e9..b3d8911 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -22,6 +22,8 @@ #' @param ageGroup A list of age groups to stratify results by. #' @param sex Boolean variable. Whether to stratify by sex (TRUE) or not #' (FALSE). +#' @param sample An integer to sample the tables to only that number of records. +#' If NULL no sample is done. #' @param dateRange A list containing the minimum and the maximum dates #' defining the time range within which the analysis is performed. #' @return A summarised_result object. diff --git a/R/summariseConceptSetCounts.R b/R/summariseConceptSetCounts.R index 75123c3..8ee9f4a 100644 --- a/R/summariseConceptSetCounts.R +++ b/R/summariseConceptSetCounts.R @@ -10,6 +10,8 @@ #' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. #' @param ageGroup A list of ageGroup vectors of length two. Code use will be #' thus summarised by age groups. +#' @param sample An integer to sample the tables in the cdm object to only that number of records. +#' If NULL no sample is done. #' @param dateRange A list containing the minimum and the maximum dates #' defining the time range within which the analysis is performed. #' @return A summarised_result object with results overall and, if specified, by diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index 4daec64..0c435b3 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -9,6 +9,8 @@ #' @param sex Whether to stratify by sex (TRUE) or not (FALSE). #' @param dateRange A list containing the minimum and the maximum dates #' defining the time range within which the analysis is performed. +#' @param sample An integer to sample the tables to only that number of records. +#' If NULL no sample is done. #' @return A summarised_result object. #' @export #' @examples diff --git a/man/summariseAllConceptCounts.Rd b/man/summariseAllConceptCounts.Rd index 8a3d680..4f04ba7 100644 --- a/man/summariseAllConceptCounts.Rd +++ b/man/summariseAllConceptCounts.Rd @@ -11,6 +11,7 @@ summariseAllConceptCounts( year = FALSE, sex = FALSE, ageGroup = NULL, + sample = 1e+06, dateRange = NULL ) } @@ -30,6 +31,9 @@ person-level counts} \item{ageGroup}{A list of ageGroup vectors of length two. Code use will be thus summarised by age groups.} +\item{sample}{An integer to sample the tables to only that number of records. +If NULL no sample is done.} + \item{dateRange}{A list containing the minimum and the maximum dates defining the time range within which the analysis is performed.} } diff --git a/man/summariseClinicalRecords.Rd b/man/summariseClinicalRecords.Rd index 5f649ff..7b7dbd2 100644 --- a/man/summariseClinicalRecords.Rd +++ b/man/summariseClinicalRecords.Rd @@ -18,6 +18,7 @@ summariseClinicalRecords( typeConcept = TRUE, sex = FALSE, ageGroup = NULL, + sample = 1e+06, dateRange = NULL ) } @@ -50,6 +51,9 @@ field information.} \item{ageGroup}{A list of age groups to stratify results by.} +\item{sample}{An integer to sample the tables to only that number of records. +If NULL no sample is done.} + \item{dateRange}{A list containing the minimum and the maximum dates defining the time range within which the analysis is performed.} } diff --git a/man/summariseConceptSetCounts.Rd b/man/summariseConceptSetCounts.Rd index b75e4a2..97faa23 100644 --- a/man/summariseConceptSetCounts.Rd +++ b/man/summariseConceptSetCounts.Rd @@ -12,6 +12,7 @@ summariseConceptSetCounts( interval = "overall", sex = FALSE, ageGroup = NULL, + sample = 1e+06, dateRange = NULL ) } @@ -32,6 +33,9 @@ person-level counts} \item{ageGroup}{A list of ageGroup vectors of length two. Code use will be thus summarised by age groups.} +\item{sample}{An integer to sample the tables in the cdm object to only that number of records. +If NULL no sample is done.} + \item{dateRange}{A list containing the minimum and the maximum dates defining the time range within which the analysis is performed.} } diff --git a/man/summariseRecordCount.Rd b/man/summariseRecordCount.Rd index 0520cf7..20af48b 100644 --- a/man/summariseRecordCount.Rd +++ b/man/summariseRecordCount.Rd @@ -11,6 +11,7 @@ summariseRecordCount( interval = "overall", ageGroup = NULL, sex = FALSE, + sample = 1e+06, dateRange = NULL ) } @@ -25,6 +26,9 @@ summariseRecordCount( \item{sex}{Whether to stratify by sex (TRUE) or not (FALSE).} +\item{sample}{An integer to sample the tables to only that number of records. +If NULL no sample is done.} + \item{dateRange}{A list containing the minimum and the maximum dates defining the time range within which the analysis is performed.} } diff --git a/tests/testthat/test-summariseAllConceptCounts.R b/tests/testthat/test-summariseAllConceptCounts.R index ceee030..5e859b9 100644 --- a/tests/testthat/test-summariseAllConceptCounts.R +++ b/tests/testthat/test-summariseAllConceptCounts.R @@ -76,6 +76,20 @@ test_that("dateRange argument works", { expect_equal(colnames(settings(y)), colnames(settings(x))) PatientProfiles::mockDisconnect(cdm = cdm) }) +test_that("sample argument works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_no_error(x<-summariseAllConceptCounts(cdm,"drug_exposure", sample = 50)) + expect_no_error(y<-summariseAllConceptCounts(cdm,"drug_exposure")) + n <- cdm$drug_exposure |> + dplyr::tally()|> + dplyr::pull(n) + expect_no_error(z<-summariseAllConceptCounts(cdm,"drug_exposure",sample = n)) + expect_equal(y,z) + PatientProfiles::mockDisconnect(cdm = cdm) +}) test_that("tableAllConceptCounts() works", { skip_on_cran() diff --git a/tests/testthat/test-summariseClinicalRecords.R b/tests/testthat/test-summariseClinicalRecords.R index a9349b8..e980671 100644 --- a/tests/testthat/test-summariseClinicalRecords.R +++ b/tests/testthat/test-summariseClinicalRecords.R @@ -247,6 +247,21 @@ test_that("dateRange argument works", { }) +test_that("sample argument works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_no_error(x<-summariseClinicalRecords(cdm,"drug_exposure", sample = 50)) + expect_no_error(y<-summariseClinicalRecords(cdm,"drug_exposure")) + n <- cdm$drug_exposure |> + dplyr::tally()|> + dplyr::pull(n) + expect_no_error(z<-summariseClinicalRecords(cdm,"drug_exposure",sample = n)) + expect_equal(y,z) + PatientProfiles::mockDisconnect(cdm = cdm) +}) + test_that("tableClinicalRecords() works", { skip_on_cran() # Load mock database ---- diff --git a/tests/testthat/test-summariseConceptSetCounts.R b/tests/testthat/test-summariseConceptSetCounts.R index 1425cdc..f27ef85 100644 --- a/tests/testthat/test-summariseConceptSetCounts.R +++ b/tests/testthat/test-summariseConceptSetCounts.R @@ -558,3 +558,19 @@ test_that("dateRange argument works", { expect_equal(colnames(settings(z)), colnames(settings(x))) PatientProfiles::mockDisconnect(cdm = cdm) }) + + +test_that("sample argument works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_no_error(x<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)), sample = 50)) + expect_no_error(y<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)))) + n <- cdm$drug_exposure |> + dplyr::tally()|> + dplyr::pull(n) + expect_no_error(z<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)),sample = n)) + expect_equal(y,z) + PatientProfiles::mockDisconnect(cdm = cdm) +}) diff --git a/tests/testthat/test-summariseRecordCount.R b/tests/testthat/test-summariseRecordCount.R index fbbee3d..925960c 100644 --- a/tests/testthat/test-summariseRecordCount.R +++ b/tests/testthat/test-summariseRecordCount.R @@ -305,3 +305,17 @@ test_that("dateRnge argument works", { }) +test_that("sample argument works", { + skip_on_cran() + # Load mock database ---- + cdm <- cdmEunomia() + + expect_no_error(x<-summariseRecordCount(cdm,"drug_exposure", sample = 50)) + expect_no_error(y<-summariseRecordCount(cdm,"drug_exposure")) + n <- cdm$drug_exposure |> + dplyr::tally()|> + dplyr::pull(n) + expect_no_error(z<-summariseRecordCount(cdm,"drug_exposure",sample = n)) + expect_equal(y,z) + PatientProfiles::mockDisconnect(cdm = cdm) +}) From 8c6b448820fb9b997c21f7a46280177928885438 Mon Sep 17 00:00:00 2001 From: cecicampanile Date: Tue, 17 Dec 2024 14:33:03 +0000 Subject: [PATCH 3/3] accounts for empty table after looking for a concept in a table --- R/summariseConceptSetCounts.R | 2 +- tests/testthat/test-summariseConceptSetCounts.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/summariseConceptSetCounts.R b/R/summariseConceptSetCounts.R index 8ee9f4a..1eaedd7 100644 --- a/R/summariseConceptSetCounts.R +++ b/R/summariseConceptSetCounts.R @@ -158,7 +158,7 @@ getCodeUse <- function(x, tablePrefix = tablePrefix, sample = sample, dateRange = dateRange) - if(is.null(records)){ + if(is.null(records) || omopgenerics::isTableEmpty(records)){ cc <- dplyr::tibble() cli::cli_inform(c( "i" = "No records found in the cdm for the concepts provided." diff --git a/tests/testthat/test-summariseConceptSetCounts.R b/tests/testthat/test-summariseConceptSetCounts.R index f27ef85..9c66daf 100644 --- a/tests/testthat/test-summariseConceptSetCounts.R +++ b/tests/testthat/test-summariseConceptSetCounts.R @@ -565,12 +565,12 @@ test_that("sample argument works", { # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(x<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)), sample = 50)) - expect_no_error(y<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)))) + expect_no_error(d<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)), sample = 50)) + expect_no_error(y<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)))) n <- cdm$drug_exposure |> dplyr::tally()|> dplyr::pull(n) - expect_no_error(z<-summariseConceptSetCounts(cdm,conceptSet = list("x" = c(40213260)),sample = n)) + expect_no_error(z<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)),sample = n)) expect_equal(y,z) PatientProfiles::mockDisconnect(cdm = cdm) })