Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sampling option #276

Merged
merged 4 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion R/summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ checkFeasibility <- function(omopTable, tableName, conceptId){
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object with results overall and, if specified, by
Expand All @@ -63,6 +65,7 @@ summariseAllConceptCounts <- function(cdm,
year = FALSE,
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL){

omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -93,7 +96,7 @@ summariseAllConceptCounts <- function(cdm,
}

omopTable <- restrictStudyPeriod(omopTable, dateRange)

omopTable <- sampleOmopTable(omopTable, sample)

indexDate <- startDate(omopgenerics::tableName(omopTable))

Expand Down
7 changes: 6 additions & 1 deletion R/summariseClinicalRecords.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#' @param ageGroup A list of age groups to stratify results by.
#' @param sex Boolean variable. Whether to stratify by sex (TRUE) or not
#' (FALSE).
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object.
Expand Down Expand Up @@ -55,6 +57,7 @@ summariseClinicalRecords <- function(cdm,
typeConcept = TRUE,
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL) {
# Initial checks ----
omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -93,6 +96,7 @@ summariseClinicalRecords <- function(cdm,
typeConcept = typeConcept,
sex = sex,
ageGroup = ageGroup,
sample = sample,
dateRange = dateRange
)
}) |>
Expand All @@ -112,6 +116,7 @@ summariseClinicalRecord <- function(omopTableName,
typeConcept,
sex,
ageGroup,
sample,
dateRange,
call = parent.frame(3)) {

Expand All @@ -124,8 +129,8 @@ summariseClinicalRecord <- function(omopTableName,

omopTable <- cdm[[omopTableName]] |>
dplyr::ungroup()

omopTable <- restrictStudyPeriod(omopTable, dateRange)
omopTable <- sampleOmopTable(omopTable, sample)
if(omopgenerics::isTableEmpty(omopTable)) {
return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_clinical_records", study_period = dateRange)))
}
Expand Down
46 changes: 29 additions & 17 deletions R/summariseConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @param sample An integer to sample the tables in the cdm object to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object with results overall and, if specified, by
Expand Down Expand Up @@ -37,6 +39,7 @@
interval = "overall",
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL){

omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -74,6 +77,7 @@
unitInterval = unitInterval,
sex = sex,
ageGroup = ageGroup,
sample = sample,
dateRange = dateRange)
Sys.sleep(i/length(conceptSet))
cli::cli_progress_update()
Expand Down Expand Up @@ -107,6 +111,7 @@
unitInterval,
sex,
ageGroup,
sample,
dateRange,
call = parent.frame()){

Expand Down Expand Up @@ -150,26 +155,27 @@
records <- getRelevantRecords(cdm = cdm,
tableCodelist = tableCodelist,
intermediateTable = intermediateTable,
tablePrefix = tablePrefix)
if(is.null(records)){
tablePrefix = tablePrefix,
sample = sample, dateRange = dateRange)

if(is.null(records) || omopgenerics::isTableEmpty(records)){
cc <- dplyr::tibble()
cli::cli_inform(c(
"i" = "No records found in the cdm for the concepts provided."
))
return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange)))
}

if (!is.null(dateRange))
{
records <- records |>
dplyr::filter(
as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2]
)
if (is.null(warningEmptyStudyPeriod(records))){
return(tibble::tibble())
}

}
# if (!is.null(dateRange))
# {
# records <- records |>
# dplyr::filter(
# as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2]
# )
# if (is.null(warningEmptyStudyPeriod(records))){
# return(tibble::tibble())
# }
# }
records <- addStrataToOmopTable(records, "date", ageGroup, sex)
strata <- getStrataList(sex, ageGroup)

Expand Down Expand Up @@ -238,7 +244,8 @@
getRelevantRecords <- function(cdm,
tableCodelist,
intermediateTable,
tablePrefix){
tablePrefix,
sample, dateRange){

codes <- cdm[[tableCodelist]] |> dplyr::collect()

Expand All @@ -248,9 +255,11 @@
dateName <- purrr::discard(unique(codes$start_date), is.na)

if(length(tableName)>0){
codeRecords <- cdm[[tableName[[1]]]]
codeRecords <- cdm[[tableName[[1]]]]|>
restrictStudyPeriod(dateRange)|>
sampleOmopTable(sample)

if(is.null(codeRecords)){return(NULL)}
if(is.null(codeRecords) || omopgenerics::isTableEmpty(codeRecords)){return(NULL)}

tableCodes <- paste0(tablePrefix, "table_codes")

Expand Down Expand Up @@ -290,7 +299,10 @@
# get for any additional domains and union
if(length(tableName) > 1) {
for(i in 1:(length(tableName)-1)) {
workingRecords <- cdm[[tableName[[i+1]]]]
workingRecords <- cdm[[tableName[[i+1]]]] |>
restrictStudyPeriod(dateRange)|>
sampleOmopTable(sample)
if(is.null(workingRecords) || omopgenerics::isTableEmpty(workingRecords)){return(NULL)}

Check warning on line 305 in R/summariseConceptSetCounts.R

View check run for this annotation

Codecov / codecov/patch

R/summariseConceptSetCounts.R#L305

Added line #L305 was not covered by tests

workingRecords <- workingRecords %>%
dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>%
Expand Down
15 changes: 12 additions & 3 deletions R/summariseRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#' @param sex Whether to stratify by sex (TRUE) or not (FALSE).
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @return A summarised_result object.
#' @export
#' @examples
Expand All @@ -35,7 +37,9 @@ summariseRecordCount <- function(cdm,
interval = "overall",
ageGroup = NULL,
sex = FALSE,
dateRange = NULL) {
sample = 1000000,
dateRange = NULL
) {

# Initial checks ----
omopgenerics::validateCdmArgument(cdm)
Expand All @@ -48,6 +52,7 @@ summariseRecordCount <- function(cdm,
omopgenerics::assertLogical(sex, length = 1)
dateRange <- validateStudyPeriod(cdm, dateRange)


result <- purrr::map(omopTableName,
function(x) {
omopgenerics::assertClass(cdm[[x]], "omop_table", call = parent.frame())
Expand All @@ -66,7 +71,9 @@ summariseRecordCount <- function(cdm,
original_interval,
ageGroup = ageGroup,
sex = sex,
dateRange = dateRange)
sample = sample,
dateRange = dateRange
)
}
) |>
dplyr::bind_rows()
Expand All @@ -76,10 +83,12 @@ summariseRecordCount <- function(cdm,

#' @noRd
summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval,
original_interval, ageGroup, sex, dateRange) {
original_interval, ageGroup, sex, sample, dateRange) {

prefix <- omopgenerics::tmpPrefix()
omopTable <- cdm[[omopTableName]] |> dplyr::ungroup()
omopTable <- restrictStudyPeriod(omopTable, dateRange)
omopTable <- sampleOmopTable(omopTable, sample)

# Create initial variables ----

Expand Down
13 changes: 13 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,17 @@ createSettings <- function(result_type, result_id = 1L, package_name = "OmopSket
return(settings)
}

sampleOmopTable <- function(omopTable, sample){
sampling <- !is.null(sample) & !is.infinite(sample)

if (sampling & omopTable |> dplyr::tally() |> dplyr::pull() <= sample) {
sampling <- FALSE
}

if (sampling){
omopTable <- omopTable |>
dplyr::slice_sample(n = sample)
}
return(omopTable)
}

4 changes: 4 additions & 0 deletions man/summariseAllConceptCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseClinicalRecords.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseConceptSetCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseRecordCount.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ test_that("dateRange argument works", {
expect_equal(colnames(settings(y)), colnames(settings(x)))
PatientProfiles::mockDisconnect(cdm = cdm)
})
test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(x<-summariseAllConceptCounts(cdm,"drug_exposure", sample = 50))
expect_no_error(y<-summariseAllConceptCounts(cdm,"drug_exposure"))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseAllConceptCounts(cdm,"drug_exposure",sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableAllConceptCounts() works", {
skip_on_cran()
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test-summariseClinicalRecords.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,21 @@ test_that("dateRange argument works", {

})

test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(x<-summariseClinicalRecords(cdm,"drug_exposure", sample = 50))
expect_no_error(y<-summariseClinicalRecords(cdm,"drug_exposure"))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseClinicalRecords(cdm,"drug_exposure",sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableClinicalRecords() works", {
skip_on_cran()
# Load mock database ----
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-summariseConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,22 @@ test_that("dateRange argument works", {
expect_equal(colnames(settings(z)), colnames(settings(x)))
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(d<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)), sample = 50))
expect_no_error(y<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260))))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)),sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("interval argument works", {
skip_on_cran()
# Load mock database ----
Expand Down
Loading
Loading