diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R index 0be09dd..02e17ac 100644 --- a/R/AggregateCovariates.R +++ b/R/AggregateCovariates.R @@ -18,6 +18,7 @@ #' #' @param targetIds A list of cohortIds for the target cohorts #' @param outcomeIds A list of cohortIds for the outcome cohorts +#' @param minPriorObservation The minimum time in the database a patient in the target cohorts must be observed prior to index #' @template timeAtRisk #' @param covariateSettings An object created using \code{FeatureExtraction::createCovariateSettings} #' @@ -28,6 +29,7 @@ createAggregateCovariateSettings <- function( targetIds, outcomeIds, + minPriorObservation = 0, riskWindowStart = 1, startAnchor = 'cohort start', riskWindowEnd = 365, @@ -57,19 +59,26 @@ createAggregateCovariateSettings <- function( endAnchor = endAnchor, errorMessages = errorMessages ) - + # check covariateSettings .checkCovariateSettings( covariateSettings = covariateSettings, errorMessages = errorMessages ) + # check minPriorObservation + .checkMinPriorObservation( + minPriorObservation = minPriorObservation, + errorMessages = errorMessages + ) + checkmate::reportAssertions(errorMessages) # create list result <- list( targetIds = targetIds, outcomeIds = outcomeIds, + minPriorObservation = minPriorObservation, riskWindowStart = riskWindowStart, startAnchor = startAnchor, riskWindowEnd = riskWindowEnd , @@ -126,6 +135,7 @@ computeAggregateCovariateAnalyses <- function( createCohortsOfInterest( connection = connection, dbms = connectionDetails$dbms, + cdmDatabaseSchema = cdmDatabaseSchema, aggregateCovariateSettings, targetDatabaseSchema, targetTable, @@ -135,16 +145,16 @@ computeAggregateCovariateAnalyses <- function( ) ## get counts - sql <- 'select cohort_definition_id, count(*) N from #agg_cohorts group by cohort_definition_id;' + sql <- 'select cohort_definition_id, count(*) row_count, count(distinct subject_id) person_count from #agg_cohorts group by cohort_definition_id;' sql <- SqlRender::translate( sql = sql, targetDialect = connectionDetails$dbms ) counts <- DatabaseConnector::querySql( connection = connection, - sql = sql + sql = sql, + snakeCaseToCamelCase = T, ) - #print(counts) # testing message("Computing aggregate covariate results") @@ -159,6 +169,8 @@ computeAggregateCovariateAnalyses <- function( cdmVersion = cdmVersion, aggregated = T ) + # adding counts as a new table + result$cohortCounts <- counts # add databaseId and runId to each table in results # could add settings table with this and just have setting id @@ -234,6 +246,7 @@ computeAggregateCovariateAnalyses <- function( createCohortsOfInterest <- function( connection, + cdmDatabaseSchema, dbms, aggregateCovariateSettings, targetDatabaseSchema, @@ -247,6 +260,7 @@ createCohortsOfInterest <- function( sqlFilename = "createTargetOutcomeCombinations.sql", packageName = "Characterization", dbms = dbms, + cdm_database_schema = cdmDatabaseSchema, tempEmulationSchema = tempEmulationSchema, target_database_schema = targetDatabaseSchema, target_table = targetTable, @@ -254,6 +268,7 @@ createCohortsOfInterest <- function( outcome_table = outcomeTable, target_ids = paste(aggregateCovariateSettings$targetIds, collapse = ',', sep = ','), outcome_ids = paste(aggregateCovariateSettings$outcomeIds, collapse = ',', sep = ','), + min_prior_observation = aggregateCovariateSettings$minPriorObservation, tar_start = aggregateCovariateSettings$riskWindowStart, tar_start_anchor = ifelse( aggregateCovariateSettings$startAnchor == 'cohort start', diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 4da3ead..26eb408 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -246,3 +246,15 @@ } } + +.checkMinPriorObservation <- function( + minPriorObservation, + errorMessages +) { + checkmate::assertCount( + x = minPriorObservation, + null.ok = F, + .var.name = 'minPriorObservation', + add = errorMessages + ) +} diff --git a/R/SaveLoad.R b/R/SaveLoad.R index 73b5ca6..1720eee 100644 --- a/R/SaveLoad.R +++ b/R/SaveLoad.R @@ -404,6 +404,38 @@ exportAggregateCovariateToCsv <- function( } ) + # cohort details + Andromeda::batchApply( + tbl = result$cohortCounts, + fun = function(x) { + + append <- file.exists( + file.path( + saveDirectory, + "cohort_counts.csv" + ) + ) + + dat <- as.data.frame( + x %>% + dplyr::collect() + ) + + colnames(dat) <- SqlRender::camelCaseToSnakeCase( + string = colnames(dat) + ) + + readr::write_csv( + x = dat, + file = file.path( + saveDirectory, + "cohort_counts.csv" + ), + append = append + ) + + } + ) # cohort details Andromeda::batchApply( @@ -577,7 +609,8 @@ exportAggregateCovariateToCsv <- function( "analysis_ref.csv", "covariate_ref.csv", "covariates.csv", - "covariates_continuous.csv" + "covariates_continuous.csv", + "cohort_counts.csv" ) ) ) diff --git a/inst/settings/resultsDataModelSpecification.csv b/inst/settings/resultsDataModelSpecification.csv index f960897..29beb0a 100644 --- a/inst/settings/resultsDataModelSpecification.csv +++ b/inst/settings/resultsDataModelSpecification.csv @@ -1,94 +1,102 @@ -table_name,column_name,data_type,is_required,primary_key,empty_is_na,min_cell_count,description -time_to_event,database_id,varchar(100),Y,Y,N,N,The database identifier -time_to_event,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort -time_to_event,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort -time_to_event,outcome_type,varchar(100),Y,Y,N,N,Is the outvome a first occurrence or repeat -time_to_event,target_outcome_type,varchar(40),Y,Y,N,N,When does the outcome occur relative to target -time_to_event,time_to_event,int,Y,N,N,N,The time (in days) from target index to outcome start -time_to_event,num_events,int,Y,N,N,N,Number of events that occur during the specified time to event -time_to_event,time_scale,varchar(20),Y,N,N,N,time scale for the number of events -rechallenge_fail_case_series,database_id,varchar(100),Y,Y,N,N,The database identifier -rechallenge_fail_case_series,dechallenge_stop_interval,int,Y,N,N,N,The time period that É -rechallenge_fail_case_series,dechallenge_evaluation_window,int,Y,N,N,N,The time period that É -rechallenge_fail_case_series,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort -rechallenge_fail_case_series,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort -rechallenge_fail_case_series,person_key,int,Y,Y,N,N,The dense rank for the patient (an identifier that is not the same as the database) -rechallenge_fail_case_series,subject_id,bigint,Y,N,N,N,The person identifier for the failed case series (optional) -rechallenge_fail_case_series,dechallenge_exposure_number,int,Y,N,N,N,The number of times a dechallenge has occurred -rechallenge_fail_case_series,dechallenge_exposure_start_date_offset,int,Y,N,N,N,The offset for the dechallenge start (number of days after index) -rechallenge_fail_case_series,dechallenge_exposure_end_date_offset,int,Y,N,N,N,The offset for the dechallenge end (number of days after index) -rechallenge_fail_case_series,dechallenge_outcome_number,int,Y,N,N,N,The number of times an outcome has occurred during the dechallenge -rechallenge_fail_case_series,dechallenge_outcome_start_date_offset,int,Y,N,N,N,The offset for the outcome start (number of days after index) -rechallenge_fail_case_series,rechallenge_exposure_number,int,Y,N,N,N,The number of times a rechallenge exposure has occurred -rechallenge_fail_case_series,rechallenge_exposure_start_date_offset,int,Y,N,N,N,The offset for the rechallenge start (number of days after index) -rechallenge_fail_case_series,rechallenge_exposure_end_date_offset,int,Y,N,N,N,The offset for the rechallenge end (number of days after index) -rechallenge_fail_case_series,rechallenge_outcome_number,int,Y,N,N,N,The number of times the outcome has occurred during the rechallenge -rechallenge_fail_case_series,rechallenge_outcome_start_date_offset,int,Y,N,N,N,The offset for the outcome start (number of days after index) -dechallenge_rechallenge,database_id,varchar(100),Y,Y,N,N,The database identifier -dechallenge_rechallenge,dechallenge_stop_interval,int,Y,Y,N,N,The dechallenge stop interval -dechallenge_rechallenge,dechallenge_evaluation_window,int,Y,Y,N,N,The dechallenge evaluation window -dechallenge_rechallenge,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort -dechallenge_rechallenge,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort -dechallenge_rechallenge,num_exposure_eras,int,Y,N,N,N,The number of exposure eras -dechallenge_rechallenge,num_persons_exposed,int,Y,N,N,N,The number of persons exposed -dechallenge_rechallenge,num_cases,int,Y,N,N,N,The number of cases -dechallenge_rechallenge,dechallenge_attempt,int,Y,N,N,N,The number of dechallenge attempts -dechallenge_rechallenge,dechallenge_fail,int,Y,N,N,N,The dechallenge fail count -dechallenge_rechallenge,dechallenge_success,int,Y,N,N,N,The dechallenge success count -dechallenge_rechallenge,rechallenge_attempt,int,Y,N,N,N,The rechallenge attempt count -dechallenge_rechallenge,rechallenge_fail,int,Y,N,N,N,The rechallenge fail count -dechallenge_rechallenge,rechallenge_success,int,Y,N,N,N,The rechallenge success count -dechallenge_rechallenge,pct_dechallenge_attempt,float,Y,N,N,N,The percentage of dechallenge attempts -dechallenge_rechallenge,pct_dechallenge_success,float,Y,N,N,N,The percentage of dechallenge success -dechallenge_rechallenge,pct_dechallenge_fail,float,Y,N,N,N,The percentage of dechallenge fails -dechallenge_rechallenge,pct_rechallenge_attempt,float,Y,N,N,N,The percentage of rechallenge attempts -dechallenge_rechallenge,pct_rechallenge_success,float,Y,N,N,N,The percentage of rechallenge success -dechallenge_rechallenge,pct_rechallenge_fail,float,Y,N,N,N,The percentage of rechallenge fails -analysis_ref,database_id,varchar(100),Y,Y,N,N,The database identifier -analysis_ref,run_id,int,Y,Y,N,N,The run identifier -analysis_ref,analysis_id,int,Y,Y,N,N,The analysis identifier -analysis_ref,analysis_name,varchar,Y,N,N,N,The analysis name -analysis_ref,domain_id,varchar,Y,N,N,N,The domain id -analysis_ref,start_day,int,Y,N,N,N,The start day -analysis_ref,end_day,int,Y,N,N,N,The end day -analysis_ref,is_binary,varchar(1),Y,N,N,N,Is this a binary analysis -analysis_ref,missing_means_zero,varchar(1),Y,N,N,N,Missing means zero -covariate_ref,database_id,varchar(100),Y,Y,N,N,The database identifier -covariate_ref,run_id,int,Y,Y,N,N,The run identifier -covariate_ref,covariate_id,bigint,Y,Y,N,N,The covariate identifier -covariate_ref,covariate_name,varchar,Y,N,N,N,The covariate name -covariate_ref,analysis_id,int,Y,N,N,N,The analysis identifier -covariate_ref,concept_id,bigint,Y,N,N,N,The concept identifier -covariates,database_id,varchar(100),Y,Y,N,N,The database identifier -covariates,run_id,int,Y,Y,N,N,The run identifier -covariates,cohort_definition_id,int,Y,Y,N,N,The cohort definition id -covariates,covariate_id,bigint,Y,Y,N,N,The covaraite id -covariates,sum_value,int,Y,N,N,N,The sum value -covariates,average_value,float,Y,N,N,N,The average value -covariates_continuous,database_id,varchar(100),Y,Y,N,N,The database identifier -covariates_continuous,run_id,int,Y,Y,N,N,The run identifier -covariates_continuous,cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id -covariates_continuous,covariate_id,int,Y,Y,N,N,The covariate identifier -covariates_continuous,count_value,int,Y,N,N,N,The count value -covariates_continuous,min_value,float,Y,N,N,N,The min value -covariates_continuous,max_value,float,Y,N,N,N,The max value -covariates_continuous,average_value,float,Y,N,N,N,The average value -covariates_continuous,standard_deviation,float,Y,N,N,N,The standard devidation -covariates_continuous,median_value,float,Y,N,N,N,The median value -covariates_continuous,p_10_value,float,Y,N,N,N,The 10th percentile -covariates_continuous,p_25_value,float,Y,N,N,N,The 25th percentile -covariates_continuous,p_75_value,float,Y,N,N,N,The 75th percentile -covariates_continuous,p_90_value,float,Y,N,N,N,The 90th percentile -settings,run_id,int,Y,Y,N,N,The run identifier -settings,database_id,varchar(100),Y,Y,N,N,The database identifier -settings,covariate_setting_json,varchar,Y,N,N,N,The covariate settings JSON -settings,risk_window_start,int,Y,N,N,N,The risk window start -settings,risk_window_end,int,Y,N,N,N,The risk window end -settings,start_anchor,varchar(15),Y,N,N,N,The start anchor -settings,end_anchor,varchar(15),Y,N,N,N,The end anchor -cohort_details,run_id,int,Y,Y,N,N,The run identifier -cohort_details,database_id,varchar(100),Y,Y,N,N,The database identifier -cohort_details,cohort_definition_id,int,Y,N,N,N,The study cohort id -cohort_details,cohort_type,varchar(10),Y,N,N,N,The cohort type -cohort_details,target_cohort_id,int,Y,N,N,N,The target cohort id -cohort_details,outcome_cohort_id,int,Y,N,N,N,The outcome cohort id +table_name,column_name,data_type,is_required,primary_key,empty_is_na,min_cell_count,description +time_to_event,database_id,varchar(100),Y,Y,N,N,The database identifier +time_to_event,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort +time_to_event,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort +time_to_event,outcome_type,varchar(100),Y,Y,N,N,Is the outvome a first occurrence or repeat +time_to_event,target_outcome_type,varchar(40),Y,Y,N,N,When does the outcome occur relative to target +time_to_event,time_to_event,int,Y,N,N,N,The time (in days) from target index to outcome start +time_to_event,num_events,int,Y,N,N,N,Number of events that occur during the specified time to event +time_to_event,time_scale,varchar(20),Y,N,N,N,time scale for the number of events +rechallenge_fail_case_series,database_id,varchar(100),Y,Y,N,N,The database identifier +rechallenge_fail_case_series,dechallenge_stop_interval,int,Y,N,N,N,The time period that É +rechallenge_fail_case_series,dechallenge_evaluation_window,int,Y,N,N,N,The time period that É +rechallenge_fail_case_series,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort +rechallenge_fail_case_series,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort +rechallenge_fail_case_series,person_key,int,Y,Y,N,N,The dense rank for the patient (an identifier that is not the same as the database) +rechallenge_fail_case_series,subject_id,bigint,Y,N,N,N,The person identifier for the failed case series (optional) +rechallenge_fail_case_series,dechallenge_exposure_number,int,Y,N,N,N,The number of times a dechallenge has occurred +rechallenge_fail_case_series,dechallenge_exposure_start_date_offset,int,Y,N,N,N,The offset for the dechallenge start (number of days after index) +rechallenge_fail_case_series,dechallenge_exposure_end_date_offset,int,Y,N,N,N,The offset for the dechallenge end (number of days after index) +rechallenge_fail_case_series,dechallenge_outcome_number,int,Y,N,N,N,The number of times an outcome has occurred during the dechallenge +rechallenge_fail_case_series,dechallenge_outcome_start_date_offset,int,Y,N,N,N,The offset for the outcome start (number of days after index) +rechallenge_fail_case_series,rechallenge_exposure_number,int,Y,N,N,N,The number of times a rechallenge exposure has occurred +rechallenge_fail_case_series,rechallenge_exposure_start_date_offset,int,Y,N,N,N,The offset for the rechallenge start (number of days after index) +rechallenge_fail_case_series,rechallenge_exposure_end_date_offset,int,Y,N,N,N,The offset for the rechallenge end (number of days after index) +rechallenge_fail_case_series,rechallenge_outcome_number,int,Y,N,N,N,The number of times the outcome has occurred during the rechallenge +rechallenge_fail_case_series,rechallenge_outcome_start_date_offset,int,Y,N,N,N,The offset for the outcome start (number of days after index) +dechallenge_rechallenge,database_id,varchar(100),Y,Y,N,N,The database identifier +dechallenge_rechallenge,dechallenge_stop_interval,int,Y,Y,N,N,The dechallenge stop interval +dechallenge_rechallenge,dechallenge_evaluation_window,int,Y,Y,N,N,The dechallenge evaluation window +dechallenge_rechallenge,target_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the target cohort +dechallenge_rechallenge,outcome_cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id for the outcome cohort +dechallenge_rechallenge,num_exposure_eras,int,Y,N,N,N,The number of exposure eras +dechallenge_rechallenge,num_persons_exposed,int,Y,N,N,N,The number of persons exposed +dechallenge_rechallenge,num_cases,int,Y,N,N,N,The number of cases +dechallenge_rechallenge,dechallenge_attempt,int,Y,N,N,N,The number of dechallenge attempts +dechallenge_rechallenge,dechallenge_fail,int,Y,N,N,N,The dechallenge fail count +dechallenge_rechallenge,dechallenge_success,int,Y,N,N,N,The dechallenge success count +dechallenge_rechallenge,rechallenge_attempt,int,Y,N,N,N,The rechallenge attempt count +dechallenge_rechallenge,rechallenge_fail,int,Y,N,N,N,The rechallenge fail count +dechallenge_rechallenge,rechallenge_success,int,Y,N,N,N,The rechallenge success count +dechallenge_rechallenge,pct_dechallenge_attempt,float,Y,N,N,N,The percentage of dechallenge attempts +dechallenge_rechallenge,pct_dechallenge_success,float,Y,N,N,N,The percentage of dechallenge success +dechallenge_rechallenge,pct_dechallenge_fail,float,Y,N,N,N,The percentage of dechallenge fails +dechallenge_rechallenge,pct_rechallenge_attempt,float,Y,N,N,N,The percentage of rechallenge attempts +dechallenge_rechallenge,pct_rechallenge_success,float,Y,N,N,N,The percentage of rechallenge success +dechallenge_rechallenge,pct_rechallenge_fail,float,Y,N,N,N,The percentage of rechallenge fails +analysis_ref,database_id,varchar(100),Y,Y,N,N,The database identifier +analysis_ref,run_id,int,Y,Y,N,N,The run identifier +analysis_ref,analysis_id,int,Y,Y,N,N,The analysis identifier +analysis_ref,analysis_name,varchar,Y,N,N,N,The analysis name +analysis_ref,domain_id,varchar,Y,N,N,N,The domain id +analysis_ref,start_day,int,Y,N,N,N,The start day +analysis_ref,end_day,int,Y,N,N,N,The end day +analysis_ref,is_binary,varchar(1),Y,N,N,N,Is this a binary analysis +analysis_ref,missing_means_zero,varchar(1),Y,N,N,N,Missing means zero +covariate_ref,database_id,varchar(100),Y,Y,N,N,The database identifier +covariate_ref,run_id,int,Y,Y,N,N,The run identifier +covariate_ref,covariate_id,bigint,Y,Y,N,N,The covariate identifier +covariate_ref,covariate_name,varchar,Y,N,N,N,The covariate name +covariate_ref,analysis_id,int,Y,N,N,N,The analysis identifier +covariate_ref,concept_id,bigint,Y,N,N,N,The concept identifier +covariates,database_id,varchar(100),Y,Y,N,N,The database identifier +covariates,run_id,int,Y,Y,N,N,The run identifier +covariates,cohort_definition_id,int,Y,Y,N,N,The cohort definition id +covariates,covariate_id,bigint,Y,Y,N,N,The covaraite id +covariates,sum_value,int,Y,N,N,N,The sum value +covariates,average_value,float,Y,N,N,N,The average value +covariates_continuous,database_id,varchar(100),Y,Y,N,N,The database identifier +covariates_continuous,run_id,int,Y,Y,N,N,The run identifier +covariates_continuous,cohort_definition_id,bigint,Y,Y,N,N,The cohort definition id +covariates_continuous,covariate_id,int,Y,Y,N,N,The covariate identifier +covariates_continuous,count_value,int,Y,N,N,N,The count value +covariates_continuous,min_value,float,Y,N,N,N,The min value +covariates_continuous,max_value,float,Y,N,N,N,The max value +covariates_continuous,average_value,float,Y,N,N,N,The average value +covariates_continuous,standard_deviation,float,Y,N,N,N,The standard devidation +covariates_continuous,median_value,float,Y,N,N,N,The median value +covariates_continuous,p_10_value,float,Y,N,N,N,The 10th percentile +covariates_continuous,p_25_value,float,Y,N,N,N,The 25th percentile +covariates_continuous,p_75_value,float,Y,N,N,N,The 75th percentile +covariates_continuous,p_90_value,float,Y,N,N,N,The 90th percentile +settings,run_id,int,Y,Y,N,N,The run identifier +settings,database_id,varchar(100),Y,Y,N,N,The database identifier +settings,covariate_setting_json,varchar,Y,N,N,N,The covariate settings JSON +settings,risk_window_start,int,Y,N,N,N,The risk window start +settings,risk_window_end,int,Y,N,N,N,The risk window end +settings,start_anchor,varchar(15),Y,N,N,N,The start anchor +settings,end_anchor,varchar(15),Y,N,N,N,The end anchor +cohort_details,run_id,int,Y,Y,N,N,The run identifier +cohort_details,database_id,varchar(100),Y,Y,N,N,The database identifier +cohort_details,cohort_definition_id,int,Y,N,N,N,The study cohort id +cohort_details,cohort_type,varchar(10),Y,N,N,N,The cohort type +cohort_details,target_cohort_id,int,Y,N,N,N,The target cohort id +cohort_details,outcome_cohort_id,int,Y,N,N,N,The outcome cohort id +cohort_counts,run_id,int,Y,Y,N,N,The run identifier +cohort_counts,database_id,varchar(100),Y,Y,N,N,The database identifier +cohort_counts,cohort_definition_id,int,Y,N,N,N,The study cohort id +cohort_counts,row_count,int,Y,N,N,N,The number of rows in each cohort +cohort_counts,person_count,int,Y,N,N,N,The number of distinct people in each cohort + + + diff --git a/inst/sql/sql_server/DropAggregateCovariate.sql b/inst/sql/sql_server/DropAggregateCovariate.sql index fb46e83..b319cff 100644 --- a/inst/sql/sql_server/DropAggregateCovariate.sql +++ b/inst/sql/sql_server/DropAggregateCovariate.sql @@ -1,20 +1,32 @@ -- clean up by removing the temp tables +TRUNCATE TABLE #targets_agg_all; +DROP TABLE #targets_agg_all; + TRUNCATE TABLE #targets_agg; DROP TABLE #targets_agg; TRUNCATE TABLE #outcomes_agg; DROP TABLE #outcomes_agg; +TRUNCATE TABLE #outcomes_agg_first; +DROP TABLE #outcomes_agg_first; + TRUNCATE TABLE #cohort_details; DROP TABLE #cohort_details; TRUNCATE TABLE #target_with_outcome; DROP TABLE #target_with_outcome; +TRUNCATE TABLE #target_outcome_f; +DROP TABLE #target_outcome_f; + TRUNCATE TABLE #target_nooutcome; DROP TABLE #target_nooutcome; +TRUNCATE TABLE #target_noout_f; +DROP TABLE #target_noout_f; + TRUNCATE TABLE #agg_cohorts; DROP TABLE #agg_cohorts; diff --git a/inst/sql/sql_server/ResultTables.sql b/inst/sql/sql_server/ResultTables.sql index 95f47ae..663249e 100644 --- a/inst/sql/sql_server/ResultTables.sql +++ b/inst/sql/sql_server/ResultTables.sql @@ -121,3 +121,11 @@ CREATE TABLE @my_schema.@table_prefixcovariates_continuous ( p_75_value float, p_90_value float ); + +CREATE TABLE @my_schema.@table_prefixcohort_counts( + run_id int NOT NULL, + database_id varchar(100) NOT NULL, + cohort_definition_id int NOT NULL, + row_count int NOT NULL, + person_count int NOT NULL +); diff --git a/inst/sql/sql_server/createTargetOutcomeCombinations.sql b/inst/sql/sql_server/createTargetOutcomeCombinations.sql index c354232..0ba0f12 100644 --- a/inst/sql/sql_server/createTargetOutcomeCombinations.sql +++ b/inst/sql/sql_server/createTargetOutcomeCombinations.sql @@ -1,9 +1,26 @@ --need to know indication/target/outcome tuples +drop table if exists #targets_agg_all; +select * into #targets_agg_all +from @target_database_schema.@target_table +where cohort_definition_id in +(@target_ids); + +-- first T with > minPrioObs drop table if exists #targets_agg; select * into #targets_agg +from +(select *, +row_number() over(partition by subject_id, cohort_definition_id, cohort_start_date order by cohort_start_date asc) as rn from @target_database_schema.@target_table where cohort_definition_id in -(@target_ids); +(@target_ids) +) temp_t +inner join @cdm_database_schema.observation_period op +on op.person_id = temp_t.subject_id +and temp_t.cohort_start_date >= op.observation_period_start_date +and temp_t.cohort_start_date <= op.observation_period_end_date +where temp_t.rn = 1 +and datediff(day, op.observation_period_start_date, temp_t.cohort_start_date) >= @min_prior_observation; drop table if exists #outcomes_agg; select * into #outcomes_agg @@ -11,6 +28,16 @@ from @outcome_database_schema.@outcome_table where cohort_definition_id in (@outcome_ids); +-- first outcomes +drop table if exists #outcomes_agg_first; +select * into #outcomes_agg_first +from (select *, +row_number() over(partition by subject_id, cohort_definition_id, cohort_start_date order by cohort_start_date asc) as rn +from #outcomes_agg +) as o +where o.rn = 1 +; + -- create all the cohort details drop table if exists #cohort_details; @@ -53,6 +80,39 @@ CROSS JOIN union +select distinct +t.cohort_definition_id as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id, +'TnfirstO' as cohort_type +from +(select distinct cohort_definition_id from #targets_agg) as t +CROSS JOIN +(select distinct cohort_definition_id from #outcomes_agg) as o + +union + +select distinct +t.cohort_definition_id as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id, +'firstOnT' as cohort_type +from +(select distinct cohort_definition_id from #targets_agg) as t +CROSS JOIN +(select distinct cohort_definition_id from #outcomes_agg) as o + +union + +select distinct +t.cohort_definition_id as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id, +'TnfirstOc' as cohort_type +from +(select distinct cohort_definition_id from #targets_agg) as t +CROSS JOIN +(select distinct cohort_definition_id from #outcomes_agg) as o + +union + select distinct t.cohort_definition_id as target_cohort_id, 0 as outcome_cohort_id, @@ -61,12 +121,29 @@ from (select distinct cohort_definition_id from #targets_agg) as t union +select distinct +t.cohort_definition_id as target_cohort_id, +0 as outcome_cohort_id, +'allT' as cohort_type +from (select distinct cohort_definition_id from #targets_agg) as t + +union + select distinct 0 as target_cohort_id, o.cohort_definition_id as outcome_cohort_id, 'O' as cohort_type from (select distinct cohort_definition_id from #outcomes_agg) as o +union + +select distinct +0 as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id, +'firstO' as cohort_type +from (select distinct cohort_definition_id from #outcomes_agg) as o + + ) temp; @@ -92,6 +169,26 @@ and -- outcome starts (ends?) after TAR start o.cohort_start_date >= dateadd(day, @tar_start, t.@tar_start_anchor); +-- TnfirstO +drop table if exists #target_outcome_f; +select +t.subject_id, +t.cohort_start_date, +t.cohort_end_date, +o.cohort_start_date as outcome_start_date, +o.cohort_end_date as outcome_end_date, +t.cohort_definition_id as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id +into #target_outcome_f +from #targets_agg t inner join #outcomes_agg_first o +on t.subject_id = o.subject_id +where +-- outcome starts before TAR end +o.cohort_start_date <= dateadd(day, @tar_end, t.@tar_end_anchor) +and +-- outcome starts (ends?) after TAR start +o.cohort_start_date >= dateadd(day, @tar_start, t.@tar_start_anchor); + -- 2) get all the people without the outcome in TAR drop table if exists #target_nooutcome; @@ -111,6 +208,23 @@ and t.subject_id = two.subject_id and o.cohort_definition_id = two.outcome_cohort_id where two.subject_id IS NULL; +drop table if exists #target_noout_f; +select +t.subject_id, +t.cohort_start_date, +t.cohort_end_date, +t.cohort_definition_id as target_cohort_id, +o.cohort_definition_id as outcome_cohort_id +into #target_noout_f +from #targets_agg t +CROSS JOIN +( select distinct cohort_definition_id from #outcomes_agg) o +left outer join #target_outcome_f two +on t.cohort_definition_id = two.target_cohort_id +and t.subject_id = two.subject_id +and o.cohort_definition_id = two.outcome_cohort_id +where two.subject_id IS NULL; + -- Final: select into #agg_cohorts select * into #agg_cohorts @@ -132,6 +246,21 @@ and cd.cohort_type = 'TnO' union +-- T with first O indexed at T + +select +tno.subject_id, +tno.cohort_start_date, +tno.cohort_end_date, +cd.cohort_definition_id +from #target_outcome_f tno +INNER JOIN #cohort_details cd +on cd.target_cohort_id = tno.target_cohort_id +and cd.outcome_cohort_id = tno.outcome_cohort_id +and cd.cohort_type = 'TnfirstO' + +union + -- T with O indexed at O select @@ -143,7 +272,22 @@ from #target_with_outcome tno INNER JOIN #cohort_details cd on cd.target_cohort_id = tno.target_cohort_id and cd.outcome_cohort_id = tno.outcome_cohort_id -and cd.cohort_type = 'TnOc' +and cd.cohort_type = 'OnT' + +union + +-- T with first O indexed at O + +select +tno.subject_id, +tno.outcome_start_date as cohort_start_date, +tno.outcome_end_date as cohort_end_date, +cd.cohort_definition_id +from #target_outcome_f tno +INNER JOIN #cohort_details cd +on cd.target_cohort_id = tno.target_cohort_id +and cd.outcome_cohort_id = tno.outcome_cohort_id +and cd.cohort_type = 'firstOnT' union @@ -162,6 +306,21 @@ and cd.cohort_type = 'TnOc' union +-- T without first O + +select +tnoc.subject_id, +tnoc.cohort_start_date, +tnoc.cohort_end_date, +cd.cohort_definition_id +from #target_noout_f tnoc +INNER JOIN #cohort_details cd +on cd.target_cohort_id = tnoc.target_cohort_id +and cd.outcome_cohort_id = tnoc.outcome_cohort_id +and cd.cohort_type = 'TnfirstOc' + +union + -- Ts and Os select distinct * from ( @@ -178,6 +337,18 @@ and cd.cohort_type = 'T' union +select +t.subject_id, +t.cohort_start_date, +t.cohort_end_date, +cd.cohort_definition_id +from #targets_agg_all as t +INNER JOIN #cohort_details cd +on cd.target_cohort_id = t.cohort_definition_id +and cd.cohort_type = 'allT' + +union + select o.subject_id, o.cohort_start_date, @@ -187,6 +358,19 @@ from #outcomes_agg as o INNER JOIN #cohort_details cd on cd.outcome_cohort_id = o.cohort_definition_id and cd.cohort_type = 'O' + +union + +select +o.subject_id, +o.cohort_start_date, +o.cohort_end_date, +cd.cohort_definition_id +from #outcomes_agg_first as o +INNER JOIN #cohort_details cd +on cd.outcome_cohort_id = o.cohort_definition_id +and cd.cohort_type = 'firstO' + ) temp_ts ) temp_ts2; diff --git a/man/createAggregateCovariateSettings.Rd b/man/createAggregateCovariateSettings.Rd index 76f7f1c..aed6f80 100644 --- a/man/createAggregateCovariateSettings.Rd +++ b/man/createAggregateCovariateSettings.Rd @@ -7,6 +7,7 @@ createAggregateCovariateSettings( targetIds, outcomeIds, + minPriorObservation = 0, riskWindowStart = 1, startAnchor = "cohort start", riskWindowEnd = 365, @@ -19,6 +20,8 @@ createAggregateCovariateSettings( \item{outcomeIds}{A list of cohortIds for the outcome cohorts} +\item{minPriorObservation}{The minimum time in the database a patient in the target cohorts must be observed prior to index} + \item{riskWindowStart}{The start of the risk window (in days) relative to the `startAnchor`.} \item{startAnchor}{The anchor point for the start of the risk window. Can be `"cohort start"` diff --git a/tests/testthat/test-aggregateCovariate.R b/tests/testthat/test-aggregateCovariate.R index b7493b8..af70431 100644 --- a/tests/testthat/test-aggregateCovariate.R +++ b/tests/testthat/test-aggregateCovariate.R @@ -15,6 +15,7 @@ test_that("createAggregateCovariateSettings", { res <- createAggregateCovariateSettings( targetIds = targetIds, outcomeIds = outcomeIds, + minPriorObservation = 10, riskWindowStart = 1, startAnchor = "cohort start", riskWindowEnd = 365, endAnchor = "cohort start", covariateSettings = covariateSettings @@ -28,6 +29,11 @@ test_that("createAggregateCovariateSettings", { res$covariateSettings, covariateSettings ) + + testthat::expect_equal( + res$minPriorObservation, + 10 + ) }) test_that("createAggregateCovariateSettingsList", { @@ -73,6 +79,7 @@ test_that("computeAggregateCovariateAnalyses", { res <- createAggregateCovariateSettings( targetIds = targetIds, outcomeIds = outcomeIds, + minPriorObservation = 30, riskWindowStart = 1, startAnchor = "cohort start", riskWindowEnd = 5 * 365, endAnchor = "cohort start", covariateSettings = covariateSettings @@ -90,7 +97,7 @@ test_that("computeAggregateCovariateAnalyses", { testthat::expect_true(inherits(agc, "CovariateData")) testthat::expect_true(length(unique(as.data.frame(agc$covariates)$cohortDefinitionId)) - <= length(res$targetIds) * length(res$outcomeIds) * 3 + length(res$targetIds) + length(res$outcomeIds)) + <= length(res$targetIds) * length(res$outcomeIds) * 6 + length(res$targetIds)*2 + length(res$outcomeIds)*2) testthat::expect_true( sum(names(agc) %in% c( "analysisRef", @@ -102,6 +109,11 @@ test_that("computeAggregateCovariateAnalyses", { )) == 6 ) + testthat::expect_true( + nrow(as.data.frame(agc$cohortDetails)) == + nrow(as.data.frame(agc$cohortCounts)) + ) + # check cohortDetails testthat::expect_true( length(unique(as.data.frame(agc$cohortDetails)$cohortDefinitionId)) == @@ -109,7 +121,7 @@ test_that("computeAggregateCovariateAnalyses", { ) testthat::expect_true( - nrow(as.data.frame(agc$cohortDetails)) == 13 # 4 T/Os, 3 TnO, 3 TnOc, 3 OnT + nrow(as.data.frame(agc$cohortDetails)) == 26 # 8 T/Os, 6 TnO, 6 TnOc, 6 OnT ) # test saving/loading