Merge pull request #36 from OHDSI/develop

Develop
OHDSI · Nov 28, 2023 · bbadcb4 · bbadcb4
2 parents 01d29b1 + ea3c228
commit bbadcb4
Show file tree

Hide file tree

Showing 7 changed files with 127 additions and 113 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: Characterization
 Type: Package
 Title: Characterizations of Cohorts
-Version: 0.1.2
+Version: 0.1.3
 Date: 2023-09-03
 Authors@R: c(
 	person("Jenna", "Reps", , "[email protected]", role = c("aut", "cre")),
@@ -16,7 +16,7 @@ Depends:
 	R (>= 4.0.0)
 Imports:
   Andromeda,
-	DatabaseConnector (>= 5.0.4),
+	DatabaseConnector (>= 6.3.1),
 	FeatureExtraction  (>= 3.0.0),
 	SqlRender (>= 1.9.0),
 	ParallelLogger (>= 3.0.0),
@@ -39,7 +39,8 @@ Remotes:
 	ohdsi/FeatureExtraction,
 	ohdsi/Eunomia,
 	ohdsi/ResultModelManager,
-	ohdsi/ShinyAppBuilder
+	ohdsi/ShinyAppBuilder,
+	ohdsi/DatabaseConnector
 NeedsCompilation: no
 RoxygenNote: 7.2.3
 Encoding: UTF-8

diff --git a/R/Database.R b/R/Database.R
@@ -86,7 +86,7 @@ insertAndromedaToDatabase <- function(
         data = data,
         minCellCount = minCellCount,
         minCellCountColumns = minCellCountColumns
-        )
+      )
 
       DatabaseConnector::insertTable(
         connection = connection,
@@ -255,11 +255,14 @@ createCharacterizationTables <- function(
 #'                                     function \code{connect} in the
 #'                                     \code{DatabaseConnector} package.
 #' @param resultSchema                 The name of the database schema that the result tables will be created.
-#' @param targetDialect                The database management system being used
+#' @param targetDialect                DEPRECATED: derived from \code{connectionDetails}.
 #' @param tablePrefix                  The table prefix to apply to the characterization result tables
 #' @param filePrefix                   The prefix to apply to the files
 #' @param tempEmulationSchema          The temp schema used when the database management system is oracle
 #' @param saveDirectory                The directory to save the csv results
+#' @param minMeanCovariateValue        The minimum mean covariate value (i.e. the minimum proportion for
+#'                                     binary covariates) for a covariate to be included in covariate table.
+#'                                     Other covariates are removed to save space.
 #'
 #' @return
 #' csv file per table into the saveDirectory
@@ -268,11 +271,12 @@ createCharacterizationTables <- function(
 exportDatabaseToCsv <- function(
     connectionDetails,
     resultSchema,
-    targetDialect,
+    targetDialect = NULL,
     tablePrefix = "c_",
     filePrefix = NULL,
     tempEmulationSchema = NULL,
-    saveDirectory
+    saveDirectory,
+    minMeanCovariateValue = 0.001
 ){
 
   errorMessages <- checkmate::makeAssertCollection()
@@ -282,6 +286,9 @@ exportDatabaseToCsv <- function(
     errorMessages = errorMessages
   )
   checkmate::reportAssertions(errorMessages)
+  if (!is.null(targetDialect)) {
+    warning("The targetDialect argument is deprecated")
+  }
 
   if (is.null(filePrefix)) {
     filePrefix = ''
@@ -303,37 +310,44 @@ exportDatabaseToCsv <- function(
     )
   }
 
+  # max number of rows extracted at a time
+  maxRowCount <- 1e6
+
   # get the table names using the function in uploadToDatabase.R
   tables <- getResultTables()
 
   # extract result per table
   for(table in tables){
-    sql <- "select * from @resultSchema.@appendtotable@tablename"
+    sql <- "select * from @resultSchema.@appendtotable@tablename;"
     sql <- SqlRender::render(
       sql = sql,
       resultSchema = resultSchema,
       appendtotable = tablePrefix,
       tablename = table
     )
-    sql <- SqlRender::translate(
-      sql = sql,
-      targetDialect = targetDialect,
-      tempEmulationSchema = tempEmulationSchema
-    )
-    result <- DatabaseConnector::querySql(
-      connection = connection,
-      sql = sql,
-      snakeCaseToCamelCase = F
-    )
-    result <- formatDouble(result)
-
-    # save the results as a csv
-    readr::write_csv(
-      x = result,
-      file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv'))
-    )
+    resultSet <- DatabaseConnector::dbSendQuery(connection, sql)
+    tryCatch({
+      first <- TRUE
+      while (first || !DatabaseConnector::dbHasCompleted(resultSet)) {
+        result <- DatabaseConnector::dbFetch(resultSet, n = maxRowCount)
+        if (table == "covariates" && minMeanCovariateValue > 0) {
+          result <- result %>%
+            dplyr::filter(.data$average_value >= minMeanCovariateValue)
+        }
+        result <- formatDouble(result)
+        # save the results as a csv
+        readr::write_csv(
+          x = result,
+          file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv')),
+          append = !first
+        )
+        first <- FALSE
+      }
+    },
+    finally = {
+      DatabaseConnector::dbClearResult(resultSet)
+    })
   }
-
   invisible(saveDirectory)
 }
 

diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R
@@ -273,7 +273,7 @@ runCharacterizationAnalyses <- function(
           tablePrefix = tablePrefix,
           minCellCount = minCellCount,
           minCellCountColumns = list(
-              c('numEvents'),
+              c('numCases'),
               c('dechallengeAttempt'),
               c('dechallengeFail', 'dechallengeSuccess'),
               c('rechallengeAttempt'),

diff --git a/inst/sql/sql_server/DropAggregateCovariate.sql b/inst/sql/sql_server/DropAggregateCovariate.sql
@@ -21,12 +21,6 @@ DROP TABLE #target_with_outcome;
 TRUNCATE TABLE #target_outcome_f;
 DROP TABLE #target_outcome_f;
 
-TRUNCATE TABLE #target_nooutcome;
-DROP TABLE #target_nooutcome;
-
-TRUNCATE TABLE #target_noout_f;
-DROP TABLE #target_noout_f;
-
 TRUNCATE TABLE #agg_cohorts;
 DROP TABLE #agg_cohorts;
 

diff --git a/inst/sql/sql_server/createTargetOutcomeCombinations.sql b/inst/sql/sql_server/createTargetOutcomeCombinations.sql
@@ -10,7 +10,7 @@ drop table if exists #targets_agg;
 select * into #targets_agg
 from
 (select *,
-row_number() over(partition by subject_id, cohort_definition_id, cohort_start_date order by cohort_start_date asc) as rn
+row_number() over(partition by subject_id, cohort_definition_id order by cohort_start_date asc) as rn
 from @target_database_schema.@target_table
 where cohort_definition_id in
 (@target_ids)
@@ -69,16 +69,16 @@ CROSS JOIN
 
 union
 
-select distinct
-t.cohort_definition_id as target_cohort_id,
-o.cohort_definition_id as outcome_cohort_id,
-'TnOc' as cohort_type
-from
-(select distinct cohort_definition_id from #targets_agg) as t
-CROSS JOIN
-(select distinct cohort_definition_id from #outcomes_agg) as o
+--select distinct
+--t.cohort_definition_id as target_cohort_id,
+--o.cohort_definition_id as outcome_cohort_id,
+--'TnOc' as cohort_type
+--from
+--(select distinct cohort_definition_id from #targets_agg) as t
+--CROSS JOIN
+--(select distinct cohort_definition_id from #outcomes_agg) as o
 
-union
+--union
 
 select distinct
 t.cohort_definition_id as target_cohort_id,
@@ -102,16 +102,16 @@ CROSS JOIN
 
 union
 
-select distinct
-t.cohort_definition_id as target_cohort_id,
-o.cohort_definition_id as outcome_cohort_id,
-'TnfirstOc' as cohort_type
-from
-(select distinct cohort_definition_id from #targets_agg) as t
-CROSS JOIN
-(select distinct cohort_definition_id from #outcomes_agg) as o
+--select distinct
+--t.cohort_definition_id as target_cohort_id,
+--o.cohort_definition_id as outcome_cohort_id,
+--'TnfirstOc' as cohort_type
+--from
+--(select distinct cohort_definition_id from #targets_agg) as t
+--CROSS JOIN
+--(select distinct cohort_definition_id from #outcomes_agg) as o
 
-union
+--union
 
 select distinct
 t.cohort_definition_id as target_cohort_id,
@@ -191,39 +191,39 @@ o.cohort_start_date >= dateadd(day, @tar_start, t.@tar_start_anchor);
 
 
 -- 2) get all the people without the outcome in TAR
-drop table if exists #target_nooutcome;
-select
-t.subject_id,
-t.cohort_start_date,
-t.cohort_end_date,
-t.cohort_definition_id as target_cohort_id,
-o.cohort_definition_id as outcome_cohort_id
-into #target_nooutcome
-from #targets_agg t
-CROSS JOIN
-( select distinct cohort_definition_id from #outcomes_agg) o
-left outer join #target_with_outcome two
-on t.cohort_definition_id = two.target_cohort_id
-and t.subject_id = two.subject_id
-and o.cohort_definition_id = two.outcome_cohort_id
-where two.subject_id IS NULL;
-
-drop table if exists #target_noout_f;
-select
-t.subject_id,
-t.cohort_start_date,
-t.cohort_end_date,
-t.cohort_definition_id as target_cohort_id,
-o.cohort_definition_id as outcome_cohort_id
-into #target_noout_f
-from #targets_agg t
-CROSS JOIN
-( select distinct cohort_definition_id from #outcomes_agg) o
-left outer join #target_outcome_f two
-on t.cohort_definition_id = two.target_cohort_id
-and t.subject_id = two.subject_id
-and o.cohort_definition_id = two.outcome_cohort_id
-where two.subject_id IS NULL;
+--drop table if exists #target_nooutcome;
+--select
+--t.subject_id,
+--t.cohort_start_date,
+--t.cohort_end_date,
+--t.cohort_definition_id as target_cohort_id,
+--o.cohort_definition_id as outcome_cohort_id
+--into #target_nooutcome
+--from #targets_agg t
+--CROSS JOIN
+--( select distinct cohort_definition_id from #outcomes_agg) o
+--left outer join #target_with_outcome two
+--on t.cohort_definition_id = two.target_cohort_id
+--and t.subject_id = two.subject_id
+--and o.cohort_definition_id = two.outcome_cohort_id
+--where two.subject_id IS NULL;
+
+--drop table if exists #target_noout_f;
+--select
+--t.subject_id,
+--t.cohort_start_date,
+--t.cohort_end_date,
+--t.cohort_definition_id as target_cohort_id,
+--o.cohort_definition_id as outcome_cohort_id
+--into #target_noout_f
+--from #targets_agg t
+--CROSS JOIN
+--( select distinct cohort_definition_id from #outcomes_agg) o
+--left outer join #target_outcome_f two
+--on t.cohort_definition_id = two.target_cohort_id
+--and t.subject_id = two.subject_id
+--and o.cohort_definition_id = two.outcome_cohort_id
+--where two.subject_id IS NULL;
 
 -- Final: select into #agg_cohorts
 
@@ -294,33 +294,33 @@ union
 
 -- T without O
 
-select
-tnoc.subject_id,
-tnoc.cohort_start_date,
-tnoc.cohort_end_date,
-cd.cohort_definition_id
-from #target_nooutcome tnoc
-INNER JOIN #cohort_details cd
-on cd.target_cohort_id = tnoc.target_cohort_id
-and cd.outcome_cohort_id = tnoc.outcome_cohort_id
-and cd.cohort_type = 'TnOc'
+--select
+--tnoc.subject_id,
+--tnoc.cohort_start_date,
+--tnoc.cohort_end_date,
+--cd.cohort_definition_id
+--from #target_nooutcome tnoc
+--INNER JOIN #cohort_details cd
+--on cd.target_cohort_id = tnoc.target_cohort_id
+--and cd.outcome_cohort_id = tnoc.outcome_cohort_id
+--and cd.cohort_type = 'TnOc'
 
-union
+--union
 
 -- T without first O
 
-select
-tnoc.subject_id,
-tnoc.cohort_start_date,
-tnoc.cohort_end_date,
-cd.cohort_definition_id
-from #target_noout_f tnoc
-INNER JOIN #cohort_details cd
-on cd.target_cohort_id = tnoc.target_cohort_id
-and cd.outcome_cohort_id = tnoc.outcome_cohort_id
-and cd.cohort_type = 'TnfirstOc'
-
-union
+--select
+--tnoc.subject_id,
+--tnoc.cohort_start_date,
+--tnoc.cohort_end_date,
+--cd.cohort_definition_id
+--from #target_noout_f tnoc
+--INNER JOIN #cohort_details cd
+--on cd.target_cohort_id = tnoc.target_cohort_id
+--and cd.outcome_cohort_id = tnoc.outcome_cohort_id
+--and cd.cohort_type = 'TnfirstOc'
+
+--union
 
 -- Ts and Os
 

diff --git a/man/exportDatabaseToCsv.Rd b/man/exportDatabaseToCsv.Rd