From ae7aeef853657ea701e877d6ade3fb438c8ad87d Mon Sep 17 00:00:00 2001 From: cecicampanile Date: Fri, 20 Dec 2024 15:46:07 +0000 Subject: [PATCH] name changed to summariseConceptIdCounts --- NAMESPACE | 1 + R/summariseAllConceptCounts.R | 104 ++------------ R/summariseConceptIdCounts.R | 133 ++++++++++++++++++ man/summariseConceptIdCounts.Rd | 60 ++++++++ .../testthat/test-summariseAllConceptCounts.R | 62 ++++---- 5 files changed, 240 insertions(+), 120 deletions(-) create mode 100644 R/summariseConceptIdCounts.R create mode 100644 man/summariseConceptIdCounts.Rd diff --git a/NAMESPACE b/NAMESPACE index a3fa787..15fa8e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(settings) export(summariseAllConceptCounts) export(summariseClinicalRecords) export(summariseConceptCounts) +export(summariseConceptIdCounts) export(summariseConceptSetCounts) export(summariseInObservation) export(summariseMissingData) diff --git a/R/summariseAllConceptCounts.R b/R/summariseAllConceptCounts.R index bd3befc..3f79dd1 100644 --- a/R/summariseAllConceptCounts.R +++ b/R/summariseAllConceptCounts.R @@ -41,93 +41,19 @@ summariseAllConceptCounts <- function(cdm, ageGroup = NULL, sample = NULL, dateRange = NULL) { - # initial checks - cdm <- omopgenerics::validateCdmArgument(cdm) - checkCountBy(countBy) - omopgenerics::assertLogical(year, length = 1) - omopgenerics::assertLogical(sex, length = 1) - omopgenerics::assertChoice(omopTableName, choices = omopgenerics::omopTables(), unique = TRUE) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup) - dateRange <- validateStudyPeriod(cdm, dateRange) - omopgenerics::assertNumeric(sample, integerish = TRUE, min = 1, null = TRUE, length = 1) - - # settings for the created results - set <- createSettings(result_type = "summarise_all_concept_counts", study_period = dateRange) - - # get strata - strata <- omopgenerics::combineStrata(c(strataCols(sex = sex, ageGroup = ageGroup), "year"[year])) - concepts <- c("concept_id", "concept_name") - stratax <- c(list(concepts), purrr::map(strata, \(x) c(concepts, x))) - - # how to count - counts <- c("records", "person_id")[c("record", "person") %in% countBy] - - # summarise counts - resultTables <- purrr::map(omopTableName, \(table) { - # initial table - omopTable <- dplyr::ungroup(cdm[[table]]) - conceptId <- omopgenerics::omopColumns(table = table, field = "standard_concept") - if (is.na(conceptId)) { - cli::cli_warn(c("!" = "No standard concept identified for {table}.")) - return(NULL) - } - - prefix <- omopgenerics::tmpPrefix() - - # restrict study period - omopTable <- restrictStudyPeriod(omopTable, dateRange) - if (is.null(omopTable)) return(NULL) - - # sample table - omopTable <- omopTable |> - sampleOmopTable(sample = sample, name = omopgenerics::uniqueTableName(prefix)) - - result <- omopTable |> - # add concept names - dplyr::rename(concept_id = dplyr::all_of(conceptId)) |> - dplyr::left_join( - cdm$concept |> - dplyr::select("concept_id", "concept_name"), - by = "concept_id" - ) |> - # add demographics and year - addStratifications( - indexDate = omopgenerics::omopColumns(table = table, field = "start_date"), - sex = sex, - ageGroup = ageGroup, - interval = dplyr::if_else(year, "years", "overall"), - intervalName = "year", - name = omopgenerics::uniqueTableName(prefix) - ) |> - # summarise results - summariseCountsInternal(stratax, counts) |> - dplyr::mutate(omop_table = .env$table) - - omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(prefix)) - - return(result) - }) |> - purrr::compact() - - if (length(resultTables) == 0) { - return(omopgenerics::emptySummarisedResult(settings = set)) - } - - resultTables |> - dplyr::bind_rows() |> - dplyr::mutate( - result_id = 1L, - cdm_name = omopgenerics::cdmName(cdm) - ) |> - omopgenerics::uniteGroup(cols = "omop_table") |> - omopgenerics::uniteStrata(cols = unique(unlist(strata)) %||% character()) |> - omopgenerics::uniteAdditional() |> - dplyr::mutate( - estimate_value = as.character(.data$estimate_value), - estimate_type = "integer", - variable_level = as.character(.data$concept_id) - ) |> - dplyr::rename("variable_name" = "concept_name") |> - dplyr::select(!"concept_id") |> - omopgenerics::newSummarisedResult(settings = set) + lifecycle::deprecate_warn( + when = "0.2.0", + what = "summariseAllConceptCounts()", + with = "summariseConceptIdCounts()" + ) + summariseConceptIdCounts( + cdm = cdm, + omopTableName = omopTableName, + countBy = countBy, + year = year, + sex = sex, + ageGroup = ageGroup, + sample = sample, + dateRange = dateRange + ) } diff --git a/R/summariseConceptIdCounts.R b/R/summariseConceptIdCounts.R new file mode 100644 index 0000000..20af304 --- /dev/null +++ b/R/summariseConceptIdCounts.R @@ -0,0 +1,133 @@ + +#' Summarise concept use in patient-level data +#' +#' @param cdm A cdm object +#' @param omopTableName A character vector of the names of the tables to +#' summarise in the cdm object. +#' @param countBy Either "record" for record-level counts or "person" for +#' person-level counts +#' @param year TRUE or FALSE. If TRUE code use will be summarised by year. +#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. +#' @param ageGroup A list of ageGroup vectors of length two. Code use will be +#' thus summarised by age groups. +#' @param sample An integer to sample the tables to only that number of records. +#' If NULL no sample is done. +#' @param dateRange A list containing the minimum and the maximum dates +#' defining the time range within which the analysis is performed. +#' +#' @return A summarised_result object with results overall and, if specified, by +#' strata. +#' +#' @export +#' +#' @examples +#' \donttest{ +#' library(OmopSketch) +#' library(CDMConnector) +#' library(duckdb) +#' +#' requireEunomia() +#' con <- dbConnect(duckdb(), eunomiaDir()) +#' cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main") +#' +#' summariseConceptIdCounts(cdm, "condition_occurrence") +#' } +#' +summariseConceptIdCounts <- function(cdm, + omopTableName, + countBy = "record", + year = FALSE, + sex = FALSE, + ageGroup = NULL, + sample = NULL, + dateRange = NULL) { + # initial checks + cdm <- omopgenerics::validateCdmArgument(cdm) + checkCountBy(countBy) + omopgenerics::assertLogical(year, length = 1) + omopgenerics::assertLogical(sex, length = 1) + omopgenerics::assertChoice(omopTableName, choices = omopgenerics::omopTables(), unique = TRUE) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup) + dateRange <- validateStudyPeriod(cdm, dateRange) + omopgenerics::assertNumeric(sample, integerish = TRUE, min = 1, null = TRUE, length = 1) + + # settings for the created results + set <- createSettings(result_type = "summarise_all_concept_counts", study_period = dateRange) + + # get strata + strata <- omopgenerics::combineStrata(c(strataCols(sex = sex, ageGroup = ageGroup), "year"[year])) + concepts <- c("concept_id", "concept_name") + stratax <- c(list(concepts), purrr::map(strata, \(x) c(concepts, x))) + + # how to count + counts <- c("records", "person_id")[c("record", "person") %in% countBy] + + # summarise counts + resultTables <- purrr::map(omopTableName, \(table) { + # initial table + omopTable <- dplyr::ungroup(cdm[[table]]) + conceptId <- omopgenerics::omopColumns(table = table, field = "standard_concept") + if (is.na(conceptId)) { + cli::cli_warn(c("!" = "No standard concept identified for {table}.")) + return(NULL) + } + + prefix <- omopgenerics::tmpPrefix() + + # restrict study period + omopTable <- restrictStudyPeriod(omopTable, dateRange) + if (is.null(omopTable)) return(NULL) + + # sample table + omopTable <- omopTable |> + sampleOmopTable(sample = sample, name = omopgenerics::uniqueTableName(prefix)) + + result <- omopTable |> + # add concept names + dplyr::rename(concept_id = dplyr::all_of(conceptId)) |> + dplyr::left_join( + cdm$concept |> + dplyr::select("concept_id", "concept_name"), + by = "concept_id" + ) |> + # add demographics and year + addStratifications( + indexDate = omopgenerics::omopColumns(table = table, field = "start_date"), + sex = sex, + ageGroup = ageGroup, + interval = dplyr::if_else(year, "years", "overall"), + intervalName = "year", + name = omopgenerics::uniqueTableName(prefix) + ) |> + # summarise results + summariseCountsInternal(stratax, counts) |> + dplyr::mutate(omop_table = .env$table) + + omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(prefix)) + + return(result) + }) |> + purrr::compact() + + if (length(resultTables) == 0) { + return(omopgenerics::emptySummarisedResult(settings = set)) + } + + resultTables |> + dplyr::bind_rows() |> + dplyr::mutate( + result_id = 1L, + cdm_name = omopgenerics::cdmName(cdm) + ) |> + omopgenerics::uniteGroup(cols = "omop_table") |> + omopgenerics::uniteStrata(cols = unique(unlist(strata)) %||% character()) |> + omopgenerics::uniteAdditional() |> + dplyr::mutate( + estimate_value = as.character(.data$estimate_value), + estimate_type = "integer", + variable_level = as.character(.data$concept_id) + ) |> + dplyr::rename("variable_name" = "concept_name") |> + dplyr::select(!"concept_id") |> + omopgenerics::newSummarisedResult(settings = set) +} diff --git a/man/summariseConceptIdCounts.Rd b/man/summariseConceptIdCounts.Rd new file mode 100644 index 0000000..7ba5006 --- /dev/null +++ b/man/summariseConceptIdCounts.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summariseConceptIdCounts.R +\name{summariseConceptIdCounts} +\alias{summariseConceptIdCounts} +\title{Summarise concept use in patient-level data} +\usage{ +summariseConceptIdCounts( + cdm, + omopTableName, + countBy = "record", + year = FALSE, + sex = FALSE, + ageGroup = NULL, + sample = NULL, + dateRange = NULL +) +} +\arguments{ +\item{cdm}{A cdm object} + +\item{omopTableName}{A character vector of the names of the tables to +summarise in the cdm object.} + +\item{countBy}{Either "record" for record-level counts or "person" for +person-level counts} + +\item{year}{TRUE or FALSE. If TRUE code use will be summarised by year.} + +\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} + +\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be +thus summarised by age groups.} + +\item{sample}{An integer to sample the tables to only that number of records. +If NULL no sample is done.} + +\item{dateRange}{A list containing the minimum and the maximum dates +defining the time range within which the analysis is performed.} +} +\value{ +A summarised_result object with results overall and, if specified, by +strata. +} +\description{ +Summarise concept use in patient-level data +} +\examples{ +\donttest{ +library(OmopSketch) +library(CDMConnector) +library(duckdb) + +requireEunomia() +con <- dbConnect(duckdb(), eunomiaDir()) +cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main") + +summariseAllConceptCounts(cdm, "condition_occurrence") +} + +} diff --git a/tests/testthat/test-summariseAllConceptCounts.R b/tests/testthat/test-summariseAllConceptCounts.R index 2c40730..d52f14d 100644 --- a/tests/testthat/test-summariseAllConceptCounts.R +++ b/tests/testthat/test-summariseAllConceptCounts.R @@ -3,42 +3,42 @@ test_that("summariseAllConceptCount works", { cdm <- cdmEunomia() - expect_true(inherits(summariseAllConceptCounts(cdm, "drug_exposure"), "summarised_result")) - expect_warning(summariseAllConceptCounts(cdm, "observation_period")) - expect_no_error(x <- summariseAllConceptCounts(cdm, "visit_occurrence")) - expect_no_error(summariseAllConceptCounts(cdm, "condition_occurrence", countBy = c("record", "person"))) - expect_no_error(summariseAllConceptCounts(cdm, "drug_exposure")) - expect_no_error(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "person")) - expect_warning(summariseAllConceptCounts(cdm, "device_exposure")) - expect_no_error(y <- summariseAllConceptCounts(cdm, "measurement")) - expect_no_error(summariseAllConceptCounts(cdm, "observation", year = TRUE)) - expect_warning(p<-summariseAllConceptCounts(cdm, "death")) + expect_true(inherits(summariseConceptIdCounts(cdm, "drug_exposure"), "summarised_result")) + expect_warning(summariseConceptIdCounts(cdm, "observation_period")) + expect_no_error(x <- summariseConceptIdCounts(cdm, "visit_occurrence")) + expect_no_error(summariseConceptIdCounts(cdm, "condition_occurrence", countBy = c("record", "person"))) + expect_no_error(summariseConceptIdCounts(cdm, "drug_exposure")) + expect_no_error(summariseConceptIdCounts(cdm, "procedure_occurrence", countBy = "person")) + expect_warning(summariseConceptIdCounts(cdm, "device_exposure")) + expect_no_error(y <- summariseConceptIdCounts(cdm, "measurement")) + expect_no_error(summariseConceptIdCounts(cdm, "observation", year = TRUE)) + expect_warning(p<-summariseConceptIdCounts(cdm, "death")) - expect_no_error(all <- summariseAllConceptCounts(cdm, c("visit_occurrence", "measurement"))) + expect_no_error(all <- summariseConceptIdCounts(cdm, c("visit_occurrence", "measurement"))) expect_equal(all |> sortTibble(), x |> dplyr::bind_rows(y) |> sortTibble()) expect_equal( - summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "record") |> + summariseConceptIdCounts(cdm, "procedure_occurrence", countBy = "record") |> sortTibble(), - summariseAllConceptCounts(cdm, "procedure_occurrence") |> + summariseConceptIdCounts(cdm, "procedure_occurrence") |> sortTibble() ) - expect_warning(summariseAllConceptCounts(cdm, "observation_period")) - expect_error(summariseAllConceptCounts(cdm, omopTableName = "")) - expect_error(summariseAllConceptCounts(cdm, omopTableName = "visit_occurrence", countBy = "dd")) + expect_warning(summariseConceptIdCounts(cdm, "observation_period")) + expect_error(summariseConceptIdCounts(cdm, omopTableName = "")) + expect_error(summariseConceptIdCounts(cdm, omopTableName = "visit_occurrence", countBy = "dd")) expect_equal(settings(y)$result_type, settings(p)$result_type) - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> + expect_true(summariseConceptIdCounts(cdm, "procedure_occurrence", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> dplyr::distinct(.data$strata_level) |> dplyr::tally() |> dplyr::pull() == 9) - expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", ageGroup = list(c(0, 50))) |> + expect_true(summariseConceptIdCounts(cdm, "procedure_occurrence", ageGroup = list(c(0, 50))) |> dplyr::distinct(.data$strata_level) |> dplyr::tally() |> dplyr::pull() == 3) - s <- summariseAllConceptCounts(cdm, "procedure_occurrence") |> + s <- summariseConceptIdCounts(cdm, "procedure_occurrence") |> sortTibble() - z <- summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, year = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> + z <- summariseConceptIdCounts(cdm, "procedure_occurrence", sex = TRUE, year = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |> sortTibble() x <- z |> @@ -69,16 +69,16 @@ test_that("dateRange argument works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(summariseAllConceptCounts(cdm, "condition_occurrence", dateRange = as.Date(c("2012-01-01", "2018-01-01")))) - expect_message(x<-summariseAllConceptCounts(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", "2025-01-01")))) + expect_no_error(summariseConceptIdCounts(cdm, "condition_occurrence", dateRange = as.Date(c("2012-01-01", "2018-01-01")))) + expect_message(x<-summariseConceptIdCounts(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", "2025-01-01")))) observationRange <- cdm$observation_period |> dplyr::summarise(minobs = min(.data$observation_period_start_date, na.rm = TRUE), maxobs = max(.data$observation_period_end_date, na.rm = TRUE)) - expect_no_error(y<- summariseAllConceptCounts(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", observationRange |>dplyr::pull("maxobs"))))) + expect_no_error(y<- summariseConceptIdCounts(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", observationRange |>dplyr::pull("maxobs"))))) expect_equal(x |> sortTibble(), y |> sortTibble(), ignore_attr = TRUE) expect_false(settings(x)$study_period_end==settings(y)$study_period_end) - expect_error(summariseAllConceptCounts(cdm, "drug_exposure", dateRange = as.Date(c("2015-01-01", "2014-01-01")))) - expect_warning(y<-summariseAllConceptCounts(cdm, "drug_exposure", dateRange = as.Date(c("2020-01-01", "2021-01-01")))) + expect_error(summariseConceptIdCounts(cdm, "drug_exposure", dateRange = as.Date(c("2015-01-01", "2014-01-01")))) + expect_warning(y<-summariseConceptIdCounts(cdm, "drug_exposure", dateRange = as.Date(c("2020-01-01", "2021-01-01")))) expect_equal(y, omopgenerics::emptySummarisedResult(), ignore_attr = TRUE) expect_equal(settings(y)$result_type, settings(x)$result_type) expect_equal(colnames(settings(y)), colnames(settings(x))) @@ -89,12 +89,12 @@ test_that("sample argument works", { # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(x<-summariseAllConceptCounts(cdm,"drug_exposure", sample = 50)) - expect_no_error(y<-summariseAllConceptCounts(cdm,"drug_exposure")) + expect_no_error(x<-summariseConceptIdCounts(cdm,"drug_exposure", sample = 50)) + expect_no_error(y<-summariseConceptIdCounts(cdm,"drug_exposure")) n <- cdm$drug_exposure |> dplyr::tally()|> dplyr::pull(n) - expect_no_error(z<-summariseAllConceptCounts(cdm,"drug_exposure",sample = n)) + expect_no_error(z<-summariseConceptIdCounts(cdm,"drug_exposure",sample = n)) expect_equal(y |> sortTibble(), z |> sortTibble()) PatientProfiles::mockDisconnect(cdm = cdm) }) @@ -105,12 +105,12 @@ test_that("tableAllConceptCounts() works", { cdm <- cdmEunomia() # Check that works ---- - expect_no_error(x <- tableAllConceptCounts(summariseAllConceptCounts(cdm, "condition_occurrence"))) + expect_no_error(x <- tableAllConceptCounts(summariseConceptIdCounts(cdm, "condition_occurrence"))) expect_true(inherits(x,"gt_tbl")) - expect_no_error(y <- tableAllConceptCounts(summariseAllConceptCounts(cdm, c("drug_exposure", + expect_no_error(y <- tableAllConceptCounts(summariseConceptIdCounts(cdm, c("drug_exposure", "measurement")))) expect_true(inherits(y,"gt_tbl")) - expect_warning(t <- summariseAllConceptCounts(cdm, "death")) + expect_warning(t <- summariseConceptIdCounts(cdm, "death")) expect_warning(inherits(tableAllConceptCounts(t),"gt_tbl")) PatientProfiles::mockDisconnect(cdm = cdm)