diff --git a/DESCRIPTION b/DESCRIPTION index 2b18de4..2c79450 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Characterization Type: Package Title: Characterizations of Cohorts -Version: 0.1.2 +Version: 0.1.3 Date: 2023-09-03 Authors@R: c( person("Jenna", "Reps", , "reps@ohdsi.org", role = c("aut", "cre")), @@ -16,7 +16,7 @@ Depends: R (>= 4.0.0) Imports: Andromeda, - DatabaseConnector (>= 5.0.4), + DatabaseConnector (>= 6.3.1), FeatureExtraction (>= 3.0.0), SqlRender (>= 1.9.0), ParallelLogger (>= 3.0.0), @@ -39,7 +39,8 @@ Remotes: ohdsi/FeatureExtraction, ohdsi/Eunomia, ohdsi/ResultModelManager, - ohdsi/ShinyAppBuilder + ohdsi/ShinyAppBuilder, + ohdsi/DatabaseConnector NeedsCompilation: no RoxygenNote: 7.2.3 Encoding: UTF-8 diff --git a/R/Database.R b/R/Database.R index 69a137a..611c332 100644 --- a/R/Database.R +++ b/R/Database.R @@ -86,7 +86,7 @@ insertAndromedaToDatabase <- function( data = data, minCellCount = minCellCount, minCellCountColumns = minCellCountColumns - ) + ) DatabaseConnector::insertTable( connection = connection, @@ -255,11 +255,14 @@ createCharacterizationTables <- function( #' function \code{connect} in the #' \code{DatabaseConnector} package. #' @param resultSchema The name of the database schema that the result tables will be created. -#' @param targetDialect The database management system being used +#' @param targetDialect DEPRECATED: derived from \code{connectionDetails}. #' @param tablePrefix The table prefix to apply to the characterization result tables #' @param filePrefix The prefix to apply to the files #' @param tempEmulationSchema The temp schema used when the database management system is oracle #' @param saveDirectory The directory to save the csv results +#' @param minMeanCovariateValue The minimum mean covariate value (i.e. the minimum proportion for +#' binary covariates) for a covariate to be included in covariate table. +#' Other covariates are removed to save space. #' #' @return #' csv file per table into the saveDirectory @@ -268,11 +271,12 @@ createCharacterizationTables <- function( exportDatabaseToCsv <- function( connectionDetails, resultSchema, - targetDialect, + targetDialect = NULL, tablePrefix = "c_", filePrefix = NULL, tempEmulationSchema = NULL, - saveDirectory + saveDirectory, + minMeanCovariateValue = 0.001 ){ errorMessages <- checkmate::makeAssertCollection() @@ -282,6 +286,9 @@ exportDatabaseToCsv <- function( errorMessages = errorMessages ) checkmate::reportAssertions(errorMessages) + if (!is.null(targetDialect)) { + warning("The targetDialect argument is deprecated") + } if (is.null(filePrefix)) { filePrefix = '' @@ -303,37 +310,44 @@ exportDatabaseToCsv <- function( ) } + # max number of rows extracted at a time + maxRowCount <- 1e6 + # get the table names using the function in uploadToDatabase.R tables <- getResultTables() # extract result per table for(table in tables){ - sql <- "select * from @resultSchema.@appendtotable@tablename" + sql <- "select * from @resultSchema.@appendtotable@tablename;" sql <- SqlRender::render( sql = sql, resultSchema = resultSchema, appendtotable = tablePrefix, tablename = table ) - sql <- SqlRender::translate( - sql = sql, - targetDialect = targetDialect, - tempEmulationSchema = tempEmulationSchema - ) - result <- DatabaseConnector::querySql( - connection = connection, - sql = sql, - snakeCaseToCamelCase = F - ) - result <- formatDouble(result) - - # save the results as a csv - readr::write_csv( - x = result, - file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv')) - ) + resultSet <- DatabaseConnector::dbSendQuery(connection, sql) + tryCatch({ + first <- TRUE + while (first || !DatabaseConnector::dbHasCompleted(resultSet)) { + result <- DatabaseConnector::dbFetch(resultSet, n = maxRowCount) + if (table == "covariates" && minMeanCovariateValue > 0) { + result <- result %>% + dplyr::filter(.data$average_value >= minMeanCovariateValue) + } + result <- formatDouble(result) + # save the results as a csv + readr::write_csv( + x = result, + file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv')), + append = !first + ) + first <- FALSE + } + }, + finally = { + DatabaseConnector::dbClearResult(resultSet) + }) } - invisible(saveDirectory) } diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R index 3f66f83..c32f42c 100644 --- a/R/RunCharacterization.R +++ b/R/RunCharacterization.R @@ -273,7 +273,7 @@ runCharacterizationAnalyses <- function( tablePrefix = tablePrefix, minCellCount = minCellCount, minCellCountColumns = list( - c('numEvents'), + c('numCases'), c('dechallengeAttempt'), c('dechallengeFail', 'dechallengeSuccess'), c('rechallengeAttempt'), diff --git a/inst/sql/sql_server/DropAggregateCovariate.sql b/inst/sql/sql_server/DropAggregateCovariate.sql index b319cff..3e603cb 100644 --- a/inst/sql/sql_server/DropAggregateCovariate.sql +++ b/inst/sql/sql_server/DropAggregateCovariate.sql @@ -21,12 +21,6 @@ DROP TABLE #target_with_outcome; TRUNCATE TABLE #target_outcome_f; DROP TABLE #target_outcome_f; -TRUNCATE TABLE #target_nooutcome; -DROP TABLE #target_nooutcome; - -TRUNCATE TABLE #target_noout_f; -DROP TABLE #target_noout_f; - TRUNCATE TABLE #agg_cohorts; DROP TABLE #agg_cohorts; diff --git a/inst/sql/sql_server/createTargetOutcomeCombinations.sql b/inst/sql/sql_server/createTargetOutcomeCombinations.sql index 5b56ad4..8329fd4 100644 --- a/inst/sql/sql_server/createTargetOutcomeCombinations.sql +++ b/inst/sql/sql_server/createTargetOutcomeCombinations.sql @@ -10,7 +10,7 @@ drop table if exists #targets_agg; select * into #targets_agg from (select *, -row_number() over(partition by subject_id, cohort_definition_id, cohort_start_date order by cohort_start_date asc) as rn +row_number() over(partition by subject_id, cohort_definition_id order by cohort_start_date asc) as rn from @target_database_schema.@target_table where cohort_definition_id in (@target_ids) @@ -69,16 +69,16 @@ CROSS JOIN union -select distinct -t.cohort_definition_id as target_cohort_id, -o.cohort_definition_id as outcome_cohort_id, -'TnOc' as cohort_type -from -(select distinct cohort_definition_id from #targets_agg) as t -CROSS JOIN -(select distinct cohort_definition_id from #outcomes_agg) as o +--select distinct +--t.cohort_definition_id as target_cohort_id, +--o.cohort_definition_id as outcome_cohort_id, +--'TnOc' as cohort_type +--from +--(select distinct cohort_definition_id from #targets_agg) as t +--CROSS JOIN +--(select distinct cohort_definition_id from #outcomes_agg) as o -union +--union select distinct t.cohort_definition_id as target_cohort_id, @@ -102,16 +102,16 @@ CROSS JOIN union -select distinct -t.cohort_definition_id as target_cohort_id, -o.cohort_definition_id as outcome_cohort_id, -'TnfirstOc' as cohort_type -from -(select distinct cohort_definition_id from #targets_agg) as t -CROSS JOIN -(select distinct cohort_definition_id from #outcomes_agg) as o +--select distinct +--t.cohort_definition_id as target_cohort_id, +--o.cohort_definition_id as outcome_cohort_id, +--'TnfirstOc' as cohort_type +--from +--(select distinct cohort_definition_id from #targets_agg) as t +--CROSS JOIN +--(select distinct cohort_definition_id from #outcomes_agg) as o -union +--union select distinct t.cohort_definition_id as target_cohort_id, @@ -191,39 +191,39 @@ o.cohort_start_date >= dateadd(day, @tar_start, t.@tar_start_anchor); -- 2) get all the people without the outcome in TAR -drop table if exists #target_nooutcome; -select -t.subject_id, -t.cohort_start_date, -t.cohort_end_date, -t.cohort_definition_id as target_cohort_id, -o.cohort_definition_id as outcome_cohort_id -into #target_nooutcome -from #targets_agg t -CROSS JOIN -( select distinct cohort_definition_id from #outcomes_agg) o -left outer join #target_with_outcome two -on t.cohort_definition_id = two.target_cohort_id -and t.subject_id = two.subject_id -and o.cohort_definition_id = two.outcome_cohort_id -where two.subject_id IS NULL; - -drop table if exists #target_noout_f; -select -t.subject_id, -t.cohort_start_date, -t.cohort_end_date, -t.cohort_definition_id as target_cohort_id, -o.cohort_definition_id as outcome_cohort_id -into #target_noout_f -from #targets_agg t -CROSS JOIN -( select distinct cohort_definition_id from #outcomes_agg) o -left outer join #target_outcome_f two -on t.cohort_definition_id = two.target_cohort_id -and t.subject_id = two.subject_id -and o.cohort_definition_id = two.outcome_cohort_id -where two.subject_id IS NULL; +--drop table if exists #target_nooutcome; +--select +--t.subject_id, +--t.cohort_start_date, +--t.cohort_end_date, +--t.cohort_definition_id as target_cohort_id, +--o.cohort_definition_id as outcome_cohort_id +--into #target_nooutcome +--from #targets_agg t +--CROSS JOIN +--( select distinct cohort_definition_id from #outcomes_agg) o +--left outer join #target_with_outcome two +--on t.cohort_definition_id = two.target_cohort_id +--and t.subject_id = two.subject_id +--and o.cohort_definition_id = two.outcome_cohort_id +--where two.subject_id IS NULL; + +--drop table if exists #target_noout_f; +--select +--t.subject_id, +--t.cohort_start_date, +--t.cohort_end_date, +--t.cohort_definition_id as target_cohort_id, +--o.cohort_definition_id as outcome_cohort_id +--into #target_noout_f +--from #targets_agg t +--CROSS JOIN +--( select distinct cohort_definition_id from #outcomes_agg) o +--left outer join #target_outcome_f two +--on t.cohort_definition_id = two.target_cohort_id +--and t.subject_id = two.subject_id +--and o.cohort_definition_id = two.outcome_cohort_id +--where two.subject_id IS NULL; -- Final: select into #agg_cohorts @@ -294,33 +294,33 @@ union -- T without O -select -tnoc.subject_id, -tnoc.cohort_start_date, -tnoc.cohort_end_date, -cd.cohort_definition_id -from #target_nooutcome tnoc -INNER JOIN #cohort_details cd -on cd.target_cohort_id = tnoc.target_cohort_id -and cd.outcome_cohort_id = tnoc.outcome_cohort_id -and cd.cohort_type = 'TnOc' +--select +--tnoc.subject_id, +--tnoc.cohort_start_date, +--tnoc.cohort_end_date, +--cd.cohort_definition_id +--from #target_nooutcome tnoc +--INNER JOIN #cohort_details cd +--on cd.target_cohort_id = tnoc.target_cohort_id +--and cd.outcome_cohort_id = tnoc.outcome_cohort_id +--and cd.cohort_type = 'TnOc' -union +--union -- T without first O -select -tnoc.subject_id, -tnoc.cohort_start_date, -tnoc.cohort_end_date, -cd.cohort_definition_id -from #target_noout_f tnoc -INNER JOIN #cohort_details cd -on cd.target_cohort_id = tnoc.target_cohort_id -and cd.outcome_cohort_id = tnoc.outcome_cohort_id -and cd.cohort_type = 'TnfirstOc' - -union +--select +--tnoc.subject_id, +--tnoc.cohort_start_date, +--tnoc.cohort_end_date, +--cd.cohort_definition_id +--from #target_noout_f tnoc +--INNER JOIN #cohort_details cd +--on cd.target_cohort_id = tnoc.target_cohort_id +--and cd.outcome_cohort_id = tnoc.outcome_cohort_id +--and cd.cohort_type = 'TnfirstOc' + +--union -- Ts and Os diff --git a/man/exportDatabaseToCsv.Rd b/man/exportDatabaseToCsv.Rd index 08e5fdc..e39f860 100644 --- a/man/exportDatabaseToCsv.Rd +++ b/man/exportDatabaseToCsv.Rd @@ -7,11 +7,12 @@ exportDatabaseToCsv( connectionDetails, resultSchema, - targetDialect, + targetDialect = NULL, tablePrefix = "c_", filePrefix = NULL, tempEmulationSchema = NULL, - saveDirectory + saveDirectory, + minMeanCovariateValue = 0.001 ) } \arguments{ @@ -21,7 +22,7 @@ function \code{connect} in the \item{resultSchema}{The name of the database schema that the result tables will be created.} -\item{targetDialect}{The database management system being used} +\item{targetDialect}{DEPRECATED: derived from \code{connectionDetails}.} \item{tablePrefix}{The table prefix to apply to the characterization result tables} @@ -30,6 +31,10 @@ function \code{connect} in the \item{tempEmulationSchema}{The temp schema used when the database management system is oracle} \item{saveDirectory}{The directory to save the csv results} + +\item{minMeanCovariateValue}{The minimum mean covariate value (i.e. the minimum proportion for +binary covariates) for a covariate to be included in covariate table. +Other covariates are removed to save space.} } \value{ csv file per table into the saveDirectory diff --git a/tests/testthat/test-aggregateCovariate.R b/tests/testthat/test-aggregateCovariate.R index af70431..9f1e9d0 100644 --- a/tests/testthat/test-aggregateCovariate.R +++ b/tests/testthat/test-aggregateCovariate.R @@ -97,7 +97,7 @@ test_that("computeAggregateCovariateAnalyses", { testthat::expect_true(inherits(agc, "CovariateData")) testthat::expect_true(length(unique(as.data.frame(agc$covariates)$cohortDefinitionId)) - <= length(res$targetIds) * length(res$outcomeIds) * 6 + length(res$targetIds)*2 + length(res$outcomeIds)*2) + <= length(res$targetIds) * length(res$outcomeIds) * 4 + length(res$targetIds)*2 + length(res$outcomeIds)*2) testthat::expect_true( sum(names(agc) %in% c( "analysisRef", @@ -121,7 +121,7 @@ test_that("computeAggregateCovariateAnalyses", { ) testthat::expect_true( - nrow(as.data.frame(agc$cohortDetails)) == 26 # 8 T/Os, 6 TnO, 6 TnOc, 6 OnT + nrow(as.data.frame(agc$cohortDetails)) == 20 # 8 T/Os, 6 TnO, 0 TnOc, 6 OnT ) # test saving/loading