diff --git a/DESCRIPTION b/DESCRIPTION index ec8a067..8f7a543 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: Characterization Type: Package Title: Characterizations of Cohorts -Version: 0.1.4 -Date: 2024-02-26 +Version: 0.1.5 +Date: 2024-04-03 Authors@R: c( person("Jenna", "Reps", , "reps@ohdsi.org", role = c("aut", "cre")), person("Patrick", "Ryan", , "ryan@ohdsi.org", role = c("aut")) @@ -42,6 +42,6 @@ Remotes: ohdsi/ShinyAppBuilder, ohdsi/DatabaseConnector NeedsCompilation: no -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Encoding: UTF-8 VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index dc879bf..acf123e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +Characterization 0.1.5 +====================== +Changed export to csv approach to use batch export from SQLite (#41) + Characterization 0.1.4 ====================== Added extra error logging @@ -21,7 +25,7 @@ Fixing bug where cohort_counts were not being saved in the database Characterization 0.1.0 ====================== -- added support to enable target cohorts with multiple cohort entries for the aggregate covariate analysis by restructing to first cohort entry and ensuring the subject has a user specified minPriorObservation days observation in the database at first entry and also perform analysis on first outcomes and any outcome that is recorded during TAR. +- added support to enable target cohorts with multiple cohort entries for the aggregate covariate analysis by restricting to first cohort entry and ensuring the subject has a user specified minPriorObservation days observation in the database at first entry and also perform analysis on first outcomes and any outcome that is recorded during TAR. - added shiny app diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R index 02e17ac..bad74ec 100644 --- a/R/AggregateCovariates.R +++ b/R/AggregateCovariates.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -27,27 +27,25 @@ #' #' @export createAggregateCovariateSettings <- function( - targetIds, - outcomeIds, - minPriorObservation = 0, - riskWindowStart = 1, - startAnchor = 'cohort start', - riskWindowEnd = 365, - endAnchor = 'cohort start', - covariateSettings -){ - + targetIds, + outcomeIds, + minPriorObservation = 0, + riskWindowStart = 1, + startAnchor = "cohort start", + riskWindowEnd = 365, + endAnchor = "cohort start", + covariateSettings) { errorMessages <- checkmate::makeAssertCollection() # check targetIds is a vector of int/double .checkCohortIds( cohortIds = targetIds, - type = 'target', + type = "target", errorMessages = errorMessages ) # check outcomeIds is a vector of int/double .checkCohortIds( cohortIds = outcomeIds, - type = 'outcome', + type = "outcome", errorMessages = errorMessages ) @@ -81,12 +79,12 @@ createAggregateCovariateSettings <- function( minPriorObservation = minPriorObservation, riskWindowStart = riskWindowStart, startAnchor = startAnchor, - riskWindowEnd = riskWindowEnd , + riskWindowEnd = riskWindowEnd, endAnchor = endAnchor, covariateSettings = covariateSettings ) - class(result) <- 'aggregateCovariateSettings' + class(result) <- "aggregateCovariateSettings" return(result) } @@ -106,19 +104,17 @@ createAggregateCovariateSettings <- function( #' #' @export computeAggregateCovariateAnalyses <- function( - connectionDetails = NULL, - cdmDatabaseSchema, - cdmVersion = 5, - targetDatabaseSchema, - targetTable, - outcomeDatabaseSchema = targetDatabaseSchema, # remove - outcomeTable = targetTable, # remove - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), - aggregateCovariateSettings, - databaseId = 'database 1', - runId = 1 -) { - + connectionDetails = NULL, + cdmDatabaseSchema, + cdmVersion = 5, + targetDatabaseSchema, + targetTable, + outcomeDatabaseSchema = targetDatabaseSchema, # remove + outcomeTable = targetTable, # remove + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + aggregateCovariateSettings, + databaseId = "database 1", + runId = 1) { # check inputs start <- Sys.time() @@ -128,7 +124,7 @@ computeAggregateCovariateAnalyses <- function( ) on.exit( DatabaseConnector::disconnect(connection) - ) + ) # select T, O, create TnO, TnOc, Onprior T # into temp table #agg_cohorts @@ -145,11 +141,11 @@ computeAggregateCovariateAnalyses <- function( ) ## get counts - sql <- 'select cohort_definition_id, count(*) row_count, count(distinct subject_id) person_count from #agg_cohorts group by cohort_definition_id;' + sql <- "select cohort_definition_id, count(*) row_count, count(distinct subject_id) person_count from #agg_cohorts group by cohort_definition_id;" sql <- SqlRender::translate( sql = sql, targetDialect = connectionDetails$dbms - ) + ) counts <- DatabaseConnector::querySql( connection = connection, sql = sql, @@ -162,7 +158,7 @@ computeAggregateCovariateAnalyses <- function( connection = connection, oracleTempSchema = tempEmulationSchema, cdmDatabaseSchema = cdmDatabaseSchema, - cohortTable = '#agg_cohorts', + cohortTable = "#agg_cohorts", cohortTableIsTemp = T, cohortId = -1, covariateSettings = aggregateCovariateSettings$covariateSettings, @@ -176,7 +172,7 @@ computeAggregateCovariateAnalyses <- function( # could add settings table with this and just have setting id # as single extra column? - for(tableName in names(result)){ + for (tableName in names(result)) { result[[tableName]] <- result[[tableName]] %>% dplyr::mutate( runId = !!runId, @@ -185,16 +181,16 @@ computeAggregateCovariateAnalyses <- function( dplyr::relocate( "databaseId", "runId" - ) + ) } -# cohort details: + # cohort details: result$cohortDetails <- DatabaseConnector::querySql( connection = connection, sql = SqlRender::translate( - sql = " select * from #cohort_details;", - targetDialect = connectionDetails$dbms + sql = " select * from #cohort_details;", + targetDialect = connectionDetails$dbms ), snakeCaseToCamelCase = T ) %>% @@ -218,14 +214,14 @@ computeAggregateCovariateAnalyses <- function( ) result$settings <- data.frame( - runId = runId, - databaseId = databaseId, - covariateSettingJson = covariateSettingsJson, - riskWindowStart = aggregateCovariateSettings$riskWindowStart, - startAnchor = aggregateCovariateSettings$startAnchor, - riskWindowEnd = aggregateCovariateSettings$riskWindowEnd , - endAnchor = aggregateCovariateSettings$endAnchor - ) + runId = runId, + databaseId = databaseId, + covariateSettingJson = covariateSettingsJson, + riskWindowStart = aggregateCovariateSettings$riskWindowStart, + startAnchor = aggregateCovariateSettings$startAnchor, + riskWindowEnd = aggregateCovariateSettings$riskWindowEnd, + endAnchor = aggregateCovariateSettings$endAnchor + ) sql <- SqlRender::loadRenderTranslateSql( sqlFilename = "DropAggregateCovariate.sql", @@ -245,17 +241,15 @@ computeAggregateCovariateAnalyses <- function( createCohortsOfInterest <- function( - connection, - cdmDatabaseSchema, - dbms, - aggregateCovariateSettings, - targetDatabaseSchema, - targetTable, - outcomeDatabaseSchema, - outcomeTable, - tempEmulationSchema -){ - + connection, + cdmDatabaseSchema, + dbms, + aggregateCovariateSettings, + targetDatabaseSchema, + targetTable, + outcomeDatabaseSchema, + outcomeTable, + tempEmulationSchema) { sql <- SqlRender::loadRenderTranslateSql( sqlFilename = "createTargetOutcomeCombinations.sql", packageName = "Characterization", @@ -266,21 +260,21 @@ createCohortsOfInterest <- function( target_table = targetTable, outcome_database_schema = outcomeDatabaseSchema, outcome_table = outcomeTable, - target_ids = paste(aggregateCovariateSettings$targetIds, collapse = ',', sep = ','), - outcome_ids = paste(aggregateCovariateSettings$outcomeIds, collapse = ',', sep = ','), + target_ids = paste(aggregateCovariateSettings$targetIds, collapse = ",", sep = ","), + outcome_ids = paste(aggregateCovariateSettings$outcomeIds, collapse = ",", sep = ","), min_prior_observation = aggregateCovariateSettings$minPriorObservation, tar_start = aggregateCovariateSettings$riskWindowStart, tar_start_anchor = ifelse( - aggregateCovariateSettings$startAnchor == 'cohort start', - 'cohort_start_date', - 'cohort_end_date' - ), + aggregateCovariateSettings$startAnchor == "cohort start", + "cohort_start_date", + "cohort_end_date" + ), tar_end = aggregateCovariateSettings$riskWindowEnd, tar_end_anchor = ifelse( - aggregateCovariateSettings$endAnchor == 'cohort start', - 'cohort_start_date', - 'cohort_end_date' - ) + aggregateCovariateSettings$endAnchor == "cohort start", + "cohort_start_date", + "cohort_end_date" + ) ) DatabaseConnector::executeSql( @@ -289,5 +283,4 @@ createCohortsOfInterest <- function( progressBar = FALSE, reportOverallTime = FALSE ) - } diff --git a/R/Characterization.R b/R/Characterization.R index 3885f20..9c47bbb 100644 --- a/R/Characterization.R +++ b/R/Characterization.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # diff --git a/R/Database.R b/R/Database.R index 592ec33..249d761 100644 --- a/R/Database.R +++ b/R/Database.R @@ -1,6 +1,6 @@ # @file Database.R # -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -30,15 +30,13 @@ #' #' @export createSqliteDatabase <- function( - sqliteLocation = tempdir() -){ - + sqliteLocation = tempdir()) { sqliteLocation <- file.path( sqliteLocation, - 'sqliteCharacterization' + "sqliteCharacterization" ) - if(!dir.exists(sqliteLocation )){ + if (!dir.exists(sqliteLocation)) { dir.create( path = sqliteLocation, recursive = T @@ -46,8 +44,8 @@ createSqliteDatabase <- function( } connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = 'sqlite', - server = file.path(sqliteLocation, 'sqlite.sqlite') + dbms = "sqlite", + server = file.path(sqliteLocation, "sqlite.sqlite") ) connection <- DatabaseConnector::connect( connectionDetails = connectionDetails @@ -64,10 +62,9 @@ insertAndromedaToDatabase <- function( andromedaObject, tempEmulationSchema, bulkLoad = T, - tablePrefix = 'c_', + tablePrefix = "c_", minCellCount = 0, - minCellCountColumns = list() -){ + minCellCountColumns = list()) { errorMessages <- checkmate::makeAssertCollection() .checkTablePrefix( tablePrefix = tablePrefix, @@ -75,12 +72,11 @@ insertAndromedaToDatabase <- function( ) checkmate::reportAssertions(errorMessages) - message('Inserting Andromeda table into database table ', tablePrefix, tableName) + message("Inserting Andromeda table into database table ", tablePrefix, tableName) Andromeda::batchApply( tbl = andromedaObject, - fun = function(x){ - + fun = function(x) { data <- as.data.frame(x %>% dplyr::collect()) # apply minCellCount data <- removeMinCell( data = data, @@ -91,7 +87,7 @@ insertAndromedaToDatabase <- function( DatabaseConnector::insertTable( connection = connection, databaseSchema = databaseSchema, - tableName = paste0(tablePrefix,tableName), + tableName = paste0(tablePrefix, tableName), data = data, dropTableIfExists = F, createTable = F, @@ -108,22 +104,21 @@ insertAndromedaToDatabase <- function( removeMinCell <- function( data, minCellCount = 0, - minCellCountColumns = list() -){ - for(columns in minCellCountColumns){ + minCellCountColumns = list()) { + for (columns in minCellCountColumns) { ind <- apply( - X = data[,columns, drop = FALSE], + X = data[, columns, drop = FALSE], MARGIN = 1, - FUN = function(x) sum(x < minCellCount)>0 + FUN = function(x) sum(x < minCellCount) > 0 ) - if(sum(ind) > 0 ){ + if (sum(ind) > 0) { ParallelLogger::logInfo( paste0( - 'Removing values less than ', + "Removing values less than ", minCellCount, - ' from ', - paste(columns, collapse = ' and ') + " from ", + paste(columns, collapse = " and ") ) ) data[ind, columns] <- -1 @@ -158,12 +153,11 @@ removeMinCell <- function( createCharacterizationTables <- function( conn, resultSchema, - targetDialect = 'postgresql', + targetDialect = "postgresql", deleteExistingTables = T, createTables = T, - tablePrefix = 'c_', - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") -){ + tablePrefix = "c_", + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { errorMessages <- checkmate::makeAssertCollection() .checkTablePrefix( tablePrefix = tablePrefix, @@ -172,8 +166,8 @@ createCharacterizationTables <- function( checkmate::reportAssertions(errorMessages) - if(deleteExistingTables){ - message('Deleting existing tables') + if (deleteExistingTables) { + message("Deleting existing tables") tables <- getResultTables() tables <- paste0(tablePrefix, tables) @@ -184,9 +178,9 @@ createCharacterizationTables <- function( ) ) - for(tb in tables){ - if(tb %in% alltables){ - sql <- 'DELETE FROM @my_schema.@table' + for (tb in tables) { + if (tb %in% alltables) { + sql <- "DELETE FROM @my_schema.@table" sql <- SqlRender::render( sql = sql, my_schema = resultSchema, @@ -202,7 +196,7 @@ createCharacterizationTables <- function( sql = sql ) - sql <- 'DROP TABLE @my_schema.@table' + sql <- "DROP TABLE @my_schema.@table" sql <- SqlRender::render( sql = sql, my_schema = resultSchema, @@ -218,13 +212,11 @@ createCharacterizationTables <- function( sql = sql ) } - } - } - if(createTables){ - ParallelLogger::logInfo('Creating characterization results tables') + if (createTables) { + ParallelLogger::logInfo("Creating characterization results tables") renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "ResultTables.sql", packageName = "Characterization", @@ -241,7 +233,6 @@ createCharacterizationTables <- function( # add database migration here in the future } - } #' Exports all tables in the result database to csv files @@ -274,11 +265,9 @@ exportDatabaseToCsv <- function( targetDialect = NULL, tablePrefix = "c_", filePrefix = NULL, - tempEmulationSchema = NULL, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), saveDirectory, - minMeanCovariateValue = 0.001 -){ - + minMeanCovariateValue = 0.001) { errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) .checkTablePrefix( @@ -291,7 +280,7 @@ exportDatabaseToCsv <- function( } if (is.null(filePrefix)) { - filePrefix = '' + filePrefix <- "" } # connect to result database @@ -303,7 +292,7 @@ exportDatabaseToCsv <- function( ) # create the folder to save the csv files - if(!dir.exists(saveDirectory)){ + if (!dir.exists(saveDirectory)) { dir.create( path = saveDirectory, recursive = T @@ -317,55 +306,116 @@ exportDatabaseToCsv <- function( tables <- getResultTables() # extract result per table - for(table in tables){ - message(paste0("Exporting ", table)) - sql <- "select * from @resultSchema.@appendtotable@tablename;" + for (table in tables) { + ParallelLogger::logInfo(paste0("Exporting rows from ", table, " to csv file")) + # get row count and figure out number of loops + sql <- "select count(*) as N from @resultSchema.@appendtotable@tablename;" sql <- SqlRender::render( sql = sql, resultSchema = resultSchema, appendtotable = tablePrefix, tablename = table ) - resultSet <- DatabaseConnector::dbSendQuery(connection, sql) - tryCatch({ - i <- 1 - while (i == 1 || !DatabaseConnector::dbHasCompleted(resultSet)) { - start <- format(x = (i-1)*maxRowCount+1, scientific = F, big.mark = ",") - end <- format(x = maxRowCount*i, scientific = F, big.mark = ",") - message(paste0(" -- Rows ", start, " to ", end)) - result <- DatabaseConnector::dbFetch(resultSet, n = maxRowCount) - if (table == "covariates" && minMeanCovariateValue > 0) { - result <- result %>% - dplyr::filter(.data$average_value >= minMeanCovariateValue) - } - result <- formatDouble(result) - # save the results as a csv - readr::write_csv( - x = result, - file = file.path(saveDirectory, paste0(tolower(filePrefix), table,'.csv')), - append = (i > 1) - ) - i <- i + 1 + sql <- SqlRender::translate( + sql = sql, + targetDialect = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema + ) + countN <- DatabaseConnector::querySql( + connection = connection, + sql = sql, + snakeCaseToCamelCase = F + )$N + + # get column names + sql <- "select * from @resultSchema.@appendtotable@tablename where 1=0;" + sql <- SqlRender::render( + sql = sql, + resultSchema = resultSchema, + appendtotable = tablePrefix, + tablename = table + ) + sql <- SqlRender::translate( + sql = sql, + targetDialect = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema + ) + cnames <- colnames(DatabaseConnector::querySql( + connection = connection, + sql = sql, + snakeCaseToCamelCase = F + )) + + inds <- floor(countN / maxRowCount) + tableAppend <- F + # NOTE: If the table has 0 rows (countN == 0), + # then setting the txtProgressBar will fail since + # min < max. So, setting max = countN+1 for this + # reason. + pb <- utils::txtProgressBar(min = 0, max = countN + 1, initial = 0) + + for (i in 1:length(inds)) { + startRow <- (i - 1) * maxRowCount + 1 + endRow <- min(i * maxRowCount, countN) + + sql <- "select @cnames from + (select *, + ROW_NUMBER() OVER(ORDER BY @cnames) AS row + from @resultSchema.@appendtotable@tablename + ) temp + where + temp.row >= @start_row and + temp.row <= @end_row;" + sql <- SqlRender::render( + sql = sql, + resultSchema = resultSchema, + appendtotable = tablePrefix, + tablename = table, + cnames = paste(cnames, collapse = ","), + start_row = startRow, + end_row = endRow + ) + sql <- SqlRender::translate( + sql = sql, + targetDialect = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema + ) + result <- DatabaseConnector::querySql( + connection = connection, + sql = sql, + snakeCaseToCamelCase = F + ) + result <- formatDouble(result) + + # save the results as a csv + readr::write_csv( + x = result, + file = file.path(saveDirectory, paste0(tolower(filePrefix), table, ".csv")), + append = tableAppend + ) + tableAppend <- T + # NOTE: Handling progresss bar per note on txtProgressBar + # above. Otherwise the progress bar doesn't show that it completed. + if (endRow == countN) { + utils::setTxtProgressBar(pb, countN + 1) + } else { + utils::setTxtProgressBar(pb, endRow) } - }, - error = function(e) { - message(paste0("ERROR in export to csv: ", e$message)); - }, - finally = { - DatabaseConnector::dbClearResult(resultSet) - }) + } + close(pb) } + invisible(saveDirectory) } -getResultTables <- function(){ +getResultTables <- function() { return( unique( readr::read_csv( file = system.file( - 'settings', - 'resultsDataModelSpecification.csv', - package = 'Characterization' + "settings", + "resultsDataModelSpecification.csv", + package = "Characterization" ), show_col_types = FALSE )$table_name @@ -384,4 +434,3 @@ formatDouble <- function(x, scientific = F, ...) { return(x) } - diff --git a/R/DechallengeRechallenge.R b/R/DechallengeRechallenge.R index 24bf0b7..4ad299b 100644 --- a/R/DechallengeRechallenge.R +++ b/R/DechallengeRechallenge.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -26,23 +26,21 @@ #' #' @export createDechallengeRechallengeSettings <- function( - targetIds, - outcomeIds, - dechallengeStopInterval = 30, - dechallengeEvaluationWindow = 30 -){ - + targetIds, + outcomeIds, + dechallengeStopInterval = 30, + dechallengeEvaluationWindow = 30) { errorMessages <- checkmate::makeAssertCollection() # check targetIds is a vector of int/double .checkCohortIds( cohortIds = targetIds, - type = 'target', + type = "target", errorMessages = errorMessages - ) + ) # check outcomeIds is a vector of int/double .checkCohortIds( cohortIds = outcomeIds, - type = 'outcome', + type = "outcome", errorMessages = errorMessages ) @@ -53,7 +51,7 @@ createDechallengeRechallengeSettings <- function( finite = TRUE, any.missing = FALSE, len = 1, - .var.name = 'dechallengeStopInterval', + .var.name = "dechallengeStopInterval", add = errorMessages ) @@ -64,7 +62,7 @@ createDechallengeRechallengeSettings <- function( finite = TRUE, any.missing = FALSE, len = 1, - .var.name = 'dechallengeEvaluationWindow', + .var.name = "dechallengeEvaluationWindow", add = errorMessages ) @@ -78,7 +76,7 @@ createDechallengeRechallengeSettings <- function( dechallengeEvaluationWindow = dechallengeEvaluationWindow ) - class(result) <- 'dechallengeRechallengeSettings' + class(result) <- "dechallengeRechallengeSettings" return(result) } @@ -95,45 +93,43 @@ createDechallengeRechallengeSettings <- function( #' #' @export computeDechallengeRechallengeAnalyses <- function( - connectionDetails = NULL, - targetDatabaseSchema, - targetTable, - outcomeDatabaseSchema = targetDatabaseSchema, - outcomeTable = targetTable, - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), - dechallengeRechallengeSettings, - databaseId = 'database 1' -) { - + connectionDetails = NULL, + targetDatabaseSchema, + targetTable, + outcomeDatabaseSchema = targetDatabaseSchema, + outcomeTable = targetTable, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + dechallengeRechallengeSettings, + databaseId = "database 1") { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) .checkCohortDetails( cohortDatabaseSchema = targetDatabaseSchema, cohortTable = targetTable, - type = 'target', - errorMessages = errorMessages - ) + type = "target", + errorMessages = errorMessages + ) .checkCohortDetails( cohortDatabaseSchema = outcomeDatabaseSchema, cohortTable = outcomeTable, - type = 'outcome', - errorMessages = errorMessages + type = "outcome", + errorMessages = errorMessages ) .checkTempEmulationSchema( tempEmulationSchema = tempEmulationSchema, - errorMessages = errorMessages + errorMessages = errorMessages ) .checkDechallengeRechallengeSettings( settings = dechallengeRechallengeSettings, - errorMessages = errorMessages - ) + errorMessages = errorMessages + ) valid <- checkmate::reportAssertions( collection = errorMessages - ) + ) - if(valid){ + if (valid) { # inputs all pass if getting here message("Inputs checked") @@ -144,7 +140,7 @@ computeDechallengeRechallengeAnalyses <- function( ) on.exit( DatabaseConnector::disconnect(connection) - ) + ) message("Computing dechallenge rechallenge results") sql <- SqlRender::loadRenderTranslateSql( @@ -157,8 +153,8 @@ computeDechallengeRechallengeAnalyses <- function( target_table = targetTable, outcome_database_schema = outcomeDatabaseSchema, outcome_table = outcomeTable, - target_ids = paste(dechallengeRechallengeSettings$targetCohortDefinitionIds, sep='', collapse = ','), - outcome_ids = paste(dechallengeRechallengeSettings$outcomeCohortDefinitionIds, sep='', collapse = ','), + target_ids = paste(dechallengeRechallengeSettings$targetCohortDefinitionIds, sep = "", collapse = ","), + outcome_ids = paste(dechallengeRechallengeSettings$outcomeCohortDefinitionIds, sep = "", collapse = ","), dechallenge_stop_interval = dechallengeRechallengeSettings$dechallengeStopInterval, dechallenge_evaluation_window = dechallengeRechallengeSettings$dechallengeEvaluationWindow ) @@ -167,17 +163,17 @@ computeDechallengeRechallengeAnalyses <- function( sql = sql ) - sql <- 'select * from #challenge;' + sql <- "select * from #challenge;" sql <- SqlRender::translate( sql = sql, - targetDialect = connection@dbms, + targetDialect = connection@dbms, tempEmulationSchema = tempEmulationSchema ) result <- DatabaseConnector::querySqlToAndromeda( connection = connection, andromeda = Andromeda::andromeda(), - andromedaTableName = 'dechallengeRechallenge', + andromedaTableName = "dechallengeRechallenge", sql = sql, snakeCaseToCamelCase = TRUE ) @@ -200,7 +196,7 @@ computeDechallengeRechallengeAnalyses <- function( paste0( "Computing dechallenge rechallenge for ", length(dechallengeRechallengeSettings$targetCohortDefinitionIds), " target ids and ", - length(dechallengeRechallengeSettings$outcomeCohortDefinitionIds),"outcome ids took ", + length(dechallengeRechallengeSettings$outcomeCohortDefinitionIds), "outcome ids took ", signif(delta, 3), " ", attr(delta, "units") ) @@ -225,44 +221,42 @@ computeDechallengeRechallengeAnalyses <- function( #' #' @export computeRechallengeFailCaseSeriesAnalyses <- function( - connectionDetails = NULL, - targetDatabaseSchema, - targetTable, - outcomeDatabaseSchema = targetDatabaseSchema, - outcomeTable = targetTable, - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), - dechallengeRechallengeSettings, - databaseId = 'database 1', - showSubjectId = F -) { - + connectionDetails = NULL, + targetDatabaseSchema, + targetTable, + outcomeDatabaseSchema = targetDatabaseSchema, + outcomeTable = targetTable, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + dechallengeRechallengeSettings, + databaseId = "database 1", + showSubjectId = F) { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) .checkCohortDetails( cohortDatabaseSchema = targetDatabaseSchema, cohortTable = targetTable, - type = 'target', - errorMessages = errorMessages + type = "target", + errorMessages = errorMessages ) .checkCohortDetails( cohortDatabaseSchema = outcomeDatabaseSchema, cohortTable = outcomeTable, - type = 'outcome', - errorMessages = errorMessages + type = "outcome", + errorMessages = errorMessages ) .checkTempEmulationSchema( tempEmulationSchema = tempEmulationSchema, - errorMessages = errorMessages + errorMessages = errorMessages ) .checkDechallengeRechallengeSettings( settings = dechallengeRechallengeSettings, - errorMessages = errorMessages + errorMessages = errorMessages ) valid <- checkmate::reportAssertions(errorMessages) - if(valid){ + if (valid) { # inputs all pass if getting here message("Inputs checked") @@ -273,7 +267,7 @@ computeRechallengeFailCaseSeriesAnalyses <- function( ) on.exit( DatabaseConnector::disconnect(connection) - ) + ) message("Computing dechallenge rechallenge results") sql <- SqlRender::loadRenderTranslateSql( @@ -286,8 +280,8 @@ computeRechallengeFailCaseSeriesAnalyses <- function( target_table = targetTable, outcome_database_schema = outcomeDatabaseSchema, outcome_table = outcomeTable, - target_ids = paste(dechallengeRechallengeSettings$targetCohortDefinitionIds, sep='', collapse = ','), - outcome_ids = paste(dechallengeRechallengeSettings$outcomeCohortDefinitionIds, sep='', collapse = ','), + target_ids = paste(dechallengeRechallengeSettings$targetCohortDefinitionIds, sep = "", collapse = ","), + outcome_ids = paste(dechallengeRechallengeSettings$outcomeCohortDefinitionIds, sep = "", collapse = ","), dechallenge_stop_interval = dechallengeRechallengeSettings$dechallengeStopInterval, dechallenge_evaluation_window = dechallengeRechallengeSettings$dechallengeEvaluationWindow, show_subject_id = showSubjectId @@ -297,17 +291,17 @@ computeRechallengeFailCaseSeriesAnalyses <- function( sql = sql ) - sql <- 'select * from #fail_case_series;' + sql <- "select * from #fail_case_series;" sql <- SqlRender::translate( sql = sql, - targetDialect = connection@dbms, + targetDialect = connection@dbms, tempEmulationSchema = tempEmulationSchema ) result <- DatabaseConnector::querySqlToAndromeda( connection = connection, andromeda = Andromeda::andromeda(), - andromedaTableName = 'rechallengeFailCaseSeries', + andromedaTableName = "rechallengeFailCaseSeries", sql = sql, snakeCaseToCamelCase = TRUE ) @@ -330,7 +324,7 @@ computeRechallengeFailCaseSeriesAnalyses <- function( paste0( "Computing dechallenge failed case series for ", length(dechallengeRechallengeSettings$targetCohortDefinitionIds), " target IDs and ", - length(dechallengeRechallengeSettings$outcomeCohortDefinitionIds)," outcome IDs took ", + length(dechallengeRechallengeSettings$outcomeCohortDefinitionIds), " outcome IDs took ", signif(delta, 3), " ", attr(delta, "units") ) diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 26eb408..973ec25 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -15,25 +15,23 @@ # limitations under the License. .checkConnection <- function( - connection, - errorMessages - ) { + connection, + errorMessages) { checkmate::assertClass( x = connection, classes = "DatabaseConnectorConnection", add = errorMessages - ) + ) } .checkConnectionDetails <- function( - connectionDetails, - errorMessages - ) { + connectionDetails, + errorMessages) { if (inherits(connectionDetails, "connectionDetails")) { - checkmate::assertClass( - x = connectionDetails, - classes = "connectionDetails", - add = errorMessages + checkmate::assertClass( + x = connectionDetails, + classes = "connectionDetails", + add = errorMessages ) } else { checkmate::assertClass( @@ -41,48 +39,42 @@ classes = "ConnectionDetails", add = errorMessages ) - } } .checkDechallengeRechallengeSettings <- function( - settings, - errorMessages - ) { + settings, + errorMessages) { checkmate::assertClass( x = settings, classes = "dechallengeRechallengeSettings", add = errorMessages - ) + ) } .checkDechallengeRechallengeSettingsList <- function( - settings, - errorMessages -) { - - if(is.null(settings)){ + settings, + errorMessages) { + if (is.null(settings)) { return() } - if(inherits(settings, 'dechallengeRechallengeSettings')){ + if (inherits(settings, "dechallengeRechallengeSettings")) { settings <- list(settings) } - lapply(settings, function(x){ + lapply(settings, function(x) { checkmate::assertClass( x = x, classes = "dechallengeRechallengeSettings", add = errorMessages ) - } - ) + }) } .checkTimeToEventSettings <- function( - settings, - errorMessages -) { + settings, + errorMessages) { checkmate::assertClass( x = settings, classes = "timeToEventSettings", @@ -91,32 +83,28 @@ } .checkTimeToEventSettingsList <- function( - settings, - errorMessages -) { - - if(is.null(settings)){ + settings, + errorMessages) { + if (is.null(settings)) { return() } - if(inherits(settings,'timeToEventSettings')){ + if (inherits(settings, "timeToEventSettings")) { settings <- list(settings) } - lapply(settings, function(x){ + lapply(settings, function(x) { checkmate::assertClass( x = x, classes = "timeToEventSettings", add = errorMessages ) - } - ) + }) } .checkAggregateCovariateSettings <- function( - settings, - errorMessages -) { + settings, + errorMessages) { checkmate::assertClass( x = settings, classes = "aggregateCovariateSettings", @@ -125,32 +113,28 @@ } .checkAggregateCovariateSettingsList <- function( - settings, - errorMessages -) { - - if(is.null(settings)){ + settings, + errorMessages) { + if (is.null(settings)) { return() } - if(inherits(settings,'aggregateCovariateSettings')){ + if (inherits(settings, "aggregateCovariateSettings")) { settings <- list(settings) } - lapply(settings, function(x){ + lapply(settings, function(x) { checkmate::assertClass( x = x, classes = "aggregateCovariateSettings", add = errorMessages ) - } - ) + }) } .checkCharacterizationSettings <- function( - settings, - errorMessages -) { + settings, + errorMessages) { checkmate::assertClass( x = settings, classes = "characterizationSettings", @@ -158,45 +142,42 @@ ) } -.checkCohortDetails<- function( - cohortDatabaseSchema, - cohortTable, - type = 'cohort', - errorMessages - ) { +.checkCohortDetails <- function( + cohortDatabaseSchema, + cohortTable, + type = "cohort", + errorMessages) { checkmate::assertCharacter( x = cohortDatabaseSchema, len = 1, add = errorMessages, - .var.name = paste0(type, 'DatabaseSchema') - ) + .var.name = paste0(type, "DatabaseSchema") + ) checkmate::assertCharacter( x = cohortTable, len = 1, add = errorMessages, - .var.name = paste0(type, 'Table') - ) + .var.name = paste0(type, "Table") + ) } .checkCohortIds <- function( - cohortIds, - type = 'cohort', - errorMessages -) { + cohortIds, + type = "cohort", + errorMessages) { checkmate::assertNumeric( x = cohortIds, add = errorMessages, - .var.name = paste0(type, 'Id') + .var.name = paste0(type, "Id") ) } .checkTimeAtRisk <- function( - riskWindowStart, - startAnchor, - riskWindowEnd, - endAnchor, - errorMessages -) { + riskWindowStart, + startAnchor, + riskWindowEnd, + endAnchor, + errorMessages) { checkmate::assertInt(riskWindowStart, add = errorMessages) checkmate::assertChoice(startAnchor, c("cohort start", "cohort end"), add = errorMessages) checkmate::assertInt(riskWindowEnd, add = errorMessages) @@ -204,22 +185,20 @@ } .checkTempEmulationSchema <- function( - tempEmulationSchema, - errorMessages - ) { + tempEmulationSchema, + errorMessages) { checkmate::assertCharacter( x = tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages - ) + ) } .checkTablePrefix <- function( tablePrefix, - errorMessages -) { + errorMessages) { checkmate::assertCharacter( pattern = "[a-zA-Z]_$", x = tablePrefix, @@ -234,27 +213,29 @@ .checkCovariateSettings <- function(covariateSettings, errorMessages) { if (class(covariateSettings) == "covariateSettings") { - checkmate::assertClass(x = covariateSettings, - classes = "covariateSettings", - add = errorMessages) + checkmate::assertClass( + x = covariateSettings, + classes = "covariateSettings", + add = errorMessages + ) } else { for (j in (1:length(covariateSettings))) { - checkmate::assertClass(x = covariateSettings[[j]], - classes = "covariateSettings", - add = errorMessages) + checkmate::assertClass( + x = covariateSettings[[j]], + classes = "covariateSettings", + add = errorMessages + ) } } - } .checkMinPriorObservation <- function( minPriorObservation, - errorMessages -) { + errorMessages) { checkmate::assertCount( x = minPriorObservation, null.ok = F, - .var.name = 'minPriorObservation', + .var.name = "minPriorObservation", add = errorMessages ) } diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R index 531631c..94bc488 100644 --- a/R/RunCharacterization.R +++ b/R/RunCharacterization.R @@ -16,9 +16,7 @@ createCharacterizationSettings <- function( timeToEventSettings = NULL, dechallengeRechallengeSettings = NULL, - aggregateCovariateSettings = NULL -) -{ + aggregateCovariateSettings = NULL) { errorMessages <- checkmate::makeAssertCollection() .checkTimeToEventSettingsList( settings = timeToEventSettings, @@ -67,13 +65,12 @@ createCharacterizationSettings <- function( #' @param fileName The location to save the json settings #' #' @return -#' Returns the location of the drectory containing the json settings +#' Returns the location of the directory containing the json settings #' #' @export saveCharacterizationSettings <- function( settings, - fileName -) { + fileName) { ParallelLogger::saveSettingsToJson( object = settings, fileName = fileName @@ -96,9 +93,7 @@ saveCharacterizationSettings <- function( #' #' @export loadCharacterizationSettings <- function( - fileName -) { - + fileName) { settings <- ParallelLogger::loadSettingsFromJson( fileName = fileName ) @@ -122,7 +117,7 @@ loadCharacterizationSettings <- function( #' @param characterizationSettings The study settings created using \code{createCharacterizationSettings} #' @param saveDirectory The location to save the results to #' @param tablePrefix A string to append the tables in the results -#' @param databaseId The unqiue identifier for the cdm database +#' @param databaseId The unique identifier for the cdm database #' @param showSubjectId Whether to include subjectId of failed rechallenge case series or hide #' @param minCellCount The minimum count value that is calculated #' @@ -144,8 +139,7 @@ runCharacterizationAnalyses <- function( tablePrefix = "c_", databaseId = "1", showSubjectId = F, - minCellCount = 0 -) { + minCellCount = 0) { # inputs checks errorMessages <- checkmate::makeAssertCollection() .checkCharacterizationSettings( @@ -155,10 +149,10 @@ runCharacterizationAnalyses <- function( .checkTablePrefix( tablePrefix = tablePrefix, errorMessages = errorMessages - ) + ) checkmate::reportAssertions( errorMessages - ) + ) # create the Database conn <- createSqliteDatabase( @@ -166,7 +160,7 @@ runCharacterizationAnalyses <- function( ) on.exit( DatabaseConnector::disconnect(conn) - ) + ) createCharacterizationTables( conn = conn, @@ -179,7 +173,6 @@ runCharacterizationAnalyses <- function( if (!is.null(characterizationSettings$timeToEventSettings)) { for (i in 1:length(characterizationSettings$timeToEventSettings)) { - message("Running time to event analysis ", i) result <- tryCatch( @@ -197,7 +190,7 @@ runCharacterizationAnalyses <- function( ) }, error = function(e) { - message(paste0("ERROR in time-to-event analysis: ", e$message)); + message(paste0("ERROR in time-to-event analysis: ", e$message)) return(NULL) } ) @@ -222,10 +215,9 @@ runCharacterizationAnalyses <- function( andromedaObject = result$timeToEvent, tablePrefix = tablePrefix, minCellCount = minCellCount, - minCellCountColumns = list('numEvents') + minCellCountColumns = list("numEvents") ) } - } } @@ -247,7 +239,7 @@ runCharacterizationAnalyses <- function( ) }, error = function(e) { - message(paste0("ERROR in dechallenge rechallenge analysis: ", e$message)); + message(paste0("ERROR in dechallenge rechallenge analysis: ", e$message)) return(NULL) } ) @@ -273,12 +265,12 @@ runCharacterizationAnalyses <- function( tablePrefix = tablePrefix, minCellCount = minCellCount, minCellCountColumns = list( - c('numCases'), - c('dechallengeAttempt'), - c('dechallengeFail', 'dechallengeSuccess'), - c('rechallengeAttempt'), - c('rechallengeFail', 'rechallengeSuccess') - ) + c("numCases"), + c("dechallengeAttempt"), + c("dechallengeFail", "dechallengeSuccess"), + c("rechallengeAttempt"), + c("rechallengeFail", "rechallengeSuccess") + ) ) } @@ -300,7 +292,7 @@ runCharacterizationAnalyses <- function( ) }, error = function(e) { - message(paste0("ERROR in rechallenge failed case analysis: ", e$message)); + message(paste0("ERROR in rechallenge failed case analysis: ", e$message)) return(NULL) } ) @@ -350,8 +342,8 @@ runCharacterizationAnalyses <- function( ) }, error = function(e) { - message(paste0("ERROR in aggregate covariate analyses: ", e$message)); - message(e); + message(paste0("ERROR in aggregate covariate analyses: ", e$message)) + message(e) return(NULL) } ) @@ -417,7 +409,7 @@ runCharacterizationAnalyses <- function( tablePrefix = tablePrefix, minCellCount = minCellCount, minCellCountColumns = list( - c('sumValue') #c('SUM_VALUE') #AVERAGE_VALUE + c("sumValue") # c('SUM_VALUE') #AVERAGE_VALUE ) ) } @@ -431,7 +423,7 @@ runCharacterizationAnalyses <- function( tablePrefix = tablePrefix, minCellCount = minCellCount, minCellCountColumns = list( - c('countValue') + c("countValue") ) ) } diff --git a/R/SaveLoad.R b/R/SaveLoad.R index f6c2e23..af7bb4e 100644 --- a/R/SaveLoad.R +++ b/R/SaveLoad.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -32,8 +32,7 @@ colnamesLower <- function(data) { #' @export saveTimeToEventAnalyses <- function( result, - fileName -) { + fileName) { Andromeda::saveAndromeda( andromeda = result, fileName = fileName, @@ -56,19 +55,17 @@ saveTimeToEventAnalyses <- function( exportTimeToEventToCsv <- function( result, saveDirectory, - minCellCount = 0 -) { + minCellCount = 0) { if (!dir.exists(saveDirectory)) { dir.create( path = saveDirectory, recursive = T - ) + ) } Andromeda::batchApply( tbl = result$timeToEvent, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -79,14 +76,14 @@ exportTimeToEventToCsv <- function( dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) ) - if(sum(dat$NUM_EVENTS < minCellCount)>0){ - ParallelLogger::logInfo(paste0('Removing NUM_EVENTS less than ', minCellCount)) + if (sum(dat$NUM_EVENTS < minCellCount) > 0) { + ParallelLogger::logInfo(paste0("Removing NUM_EVENTS less than ", minCellCount)) dat$NUM_EVENTS[dat$NUM_EVENTS < minCellCount] <- -1 } @@ -95,10 +92,9 @@ exportTimeToEventToCsv <- function( file = file.path( saveDirectory, "time_to_event.csv" - ), + ), append = append ) - } ) @@ -134,8 +130,7 @@ loadTimeToEventAnalyses <- function(fileName) { #' @export saveDechallengeRechallengeAnalyses <- function( result, - fileName -) { + fileName) { Andromeda::saveAndromeda( andromeda = result, fileName = fileName, @@ -157,8 +152,7 @@ saveDechallengeRechallengeAnalyses <- function( #' @export saveRechallengeFailCaseSeriesAnalyses <- function( result, - fileName -) { + fileName) { Andromeda::saveAndromeda( andromeda = result, fileName = fileName, @@ -178,8 +172,7 @@ saveRechallengeFailCaseSeriesAnalyses <- function( #' #' @export loadDechallengeRechallengeAnalyses <- function( - fileName -) { + fileName) { result <- Andromeda::loadAndromeda(fileName) return(result) } @@ -193,8 +186,7 @@ loadDechallengeRechallengeAnalyses <- function( #' #' @export loadRechallengeFailCaseSeriesAnalyses <- function( - fileName -) { + fileName) { result <- Andromeda::loadAndromeda(fileName) return(result) } @@ -212,9 +204,7 @@ loadRechallengeFailCaseSeriesAnalyses <- function( exportDechallengeRechallengeToCsv <- function( result, saveDirectory, - minCellCount = 0 -) { - + minCellCount = 0) { countN <- dplyr::pull( dplyr::count(result$dechallengeRechallenge) ) @@ -232,49 +222,49 @@ exportDechallengeRechallengeToCsv <- function( dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) ) removeInd <- dat$NUM_EVENTS < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing NUM_EVENTS counts less than ', minCellCount)) - if(sum(removeInd) > 0){ - dat$NUM_CASES[removeInd] <- -1 - } + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing NUM_EVENTS counts less than ", minCellCount)) + if (sum(removeInd) > 0) { + dat$NUM_CASES[removeInd] <- -1 + } } removeInd <- dat$DECHALLENGE_ATTEMPT < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing DECHALLENGE_ATTEMPT counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing DECHALLENGE_ATTEMPT counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$DECHALLENGE_ATTEMPT[removeInd] <- -1 } } removeInd <- dat$DECHALLENGE_FAIL < minCellCount | dat$DECHALLENGE_SUCCESS < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing DECHALLENGE FAIL or SUCCESS counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing DECHALLENGE FAIL or SUCCESS counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$DECHALLENGE_FAIL[removeInd] <- -1 dat$DECHALLENGE_SUCCESS[removeInd] <- -1 } } removeInd <- dat$RECHALLENGE_ATTEMPT < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing RECHALLENGE_ATTEMPT counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing RECHALLENGE_ATTEMPT counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$RECHALLENGE_ATTEMPT[removeInd] <- -1 } } removeInd <- dat$RECHALLENGE_FAIL < minCellCount | dat$RECHALLENGE_SUCCESS < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing RECHALLENGE FAIL or SUCCESS counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing RECHALLENGE FAIL or SUCCESS counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$RECHALLENGE_FAIL[removeInd] <- -1 dat$RECHALLENGE_SUCCESS[removeInd] <- -1 } @@ -288,7 +278,6 @@ exportDechallengeRechallengeToCsv <- function( ), append = append ) - } ) @@ -311,8 +300,7 @@ exportDechallengeRechallengeToCsv <- function( #' @export exportRechallengeFailCaseSeriesToCsv <- function( result, - saveDirectory -) { + saveDirectory) { if (!dir.exists(saveDirectory)) { dir.create( path = saveDirectory, @@ -322,39 +310,37 @@ exportRechallengeFailCaseSeriesToCsv <- function( countN <- dplyr::pull( dplyr::count(result$rechallengeFailCaseSeries) - ) + ) message("Writing ", countN, " rows to csv") Andromeda::batchApply( tbl = result$rechallengeFailCaseSeries, fun = function(x) { - append <- file.exists( file.path( saveDirectory, "rechallenge_fail_case_series.csv" - ) ) + ) dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) - ) + ) readr::write_csv( x = dat, file = file.path( saveDirectory, "rechallenge_fail_case_series.csv" - ), + ), append = append ) - } ) @@ -377,8 +363,7 @@ exportRechallengeFailCaseSeriesToCsv <- function( #' @export saveAggregateCovariateAnalyses <- function( result, - fileName -) { + fileName) { Andromeda::saveAndromeda( andromeda = result, fileName = fileName, @@ -397,8 +382,7 @@ saveAggregateCovariateAnalyses <- function( #' #' @export loadAggregateCovariateAnalyses <- function( - fileName -) { + fileName) { result <- Andromeda::loadAndromeda( fileName = fileName ) @@ -419,8 +403,7 @@ loadAggregateCovariateAnalyses <- function( exportAggregateCovariateToCsv <- function( result, saveDirectory, - minCellCount = 0 -) { + minCellCount = 0) { if (!dir.exists(saveDirectory)) { dir.create(saveDirectory, recursive = T) } @@ -429,7 +412,6 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$settings, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -454,14 +436,12 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) # cohort details Andromeda::batchApply( tbl = result$cohortCounts, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -486,7 +466,6 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) @@ -494,7 +473,6 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$cohortDetails, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -519,7 +497,6 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) @@ -527,7 +504,6 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$analysisRef, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -538,7 +514,7 @@ exportAggregateCovariateToCsv <- function( dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) @@ -552,7 +528,6 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) @@ -560,7 +535,6 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$covariateRef, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -571,7 +545,7 @@ exportAggregateCovariateToCsv <- function( dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) @@ -585,7 +559,6 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) @@ -593,27 +566,26 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$covariates, fun = function(x) { - append <- file.exists( file.path( saveDirectory, "covariates.csv" - ) ) + ) dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) ) removeInd <- dat$SUM_VALUE < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing SUM_VALUE counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing SUM_VALUE counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$SUM_VALUE[removeInd] <- -1 dat$AVERAGE_VALUE[removeInd] <- -1 } @@ -627,7 +599,6 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) @@ -635,7 +606,6 @@ exportAggregateCovariateToCsv <- function( Andromeda::batchApply( tbl = result$covariatesContinuous, fun = function(x) { - append <- file.exists( file.path( saveDirectory, @@ -646,16 +616,16 @@ exportAggregateCovariateToCsv <- function( dat <- as.data.frame( x %>% dplyr::collect() - ) + ) colnames(dat) <- SqlRender::camelCaseToSnakeCase( string = colnames(dat) ) removeInd <- dat$COUNT_VALUE < minCellCount - if(sum(removeInd) > 0){ - ParallelLogger::logInfo(paste0('Removing COUNT_VALUE counts less than ', minCellCount)) - if(sum(removeInd) > 0){ + if (sum(removeInd) > 0) { + ParallelLogger::logInfo(paste0("Removing COUNT_VALUE counts less than ", minCellCount)) + if (sum(removeInd) > 0) { dat$COUNT_VALUE[removeInd] <- -1 } } @@ -668,13 +638,13 @@ exportAggregateCovariateToCsv <- function( ), append = append ) - } ) invisible( file.path( saveDirectory, - c("cohort_details.csv", + c( + "cohort_details.csv", "settings.csv", "analysis_ref.csv", "covariate_ref.csv", diff --git a/R/TimeToEvent.R b/R/TimeToEvent.R index 3c9d7b9..5affa92 100644 --- a/R/TimeToEvent.R +++ b/R/TimeToEvent.R @@ -1,4 +1,4 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics +# Copyright 2024 Observational Health Data Sciences and Informatics # # This file is part of Characterization # @@ -24,22 +24,20 @@ #' #' @export createTimeToEventSettings <- function( - targetIds, - outcomeIds -){ - + targetIds, + outcomeIds) { # check indicationIds errorMessages <- checkmate::makeAssertCollection() # check targetIds is a vector of int/double .checkCohortIds( cohortIds = targetIds, - type = 'target', + type = "target", errorMessages = errorMessages ) # check outcomeIds is a vector of int/double .checkCohortIds( cohortIds = outcomeIds, - type = 'outcome', + type = "outcome", errorMessages = errorMessages ) checkmate::reportAssertions(errorMessages) @@ -51,7 +49,7 @@ createTimeToEventSettings <- function( outcomeIds = outcomeIds ) - class(result) <- 'timeToEventSettings' + class(result) <- "timeToEventSettings" return(result) } @@ -69,35 +67,33 @@ createTimeToEventSettings <- function( #' #' @export computeTimeToEventAnalyses <- function( - connectionDetails = NULL, - targetDatabaseSchema, - targetTable, - outcomeDatabaseSchema = targetDatabaseSchema, - outcomeTable = targetTable, - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), - cdmDatabaseSchema, - timeToEventSettings, - databaseId = 'database 1' -) { - + connectionDetails = NULL, + targetDatabaseSchema, + targetTable, + outcomeDatabaseSchema = targetDatabaseSchema, + outcomeTable = targetTable, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + cdmDatabaseSchema, + timeToEventSettings, + databaseId = "database 1") { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) .checkCohortDetails( cohortDatabaseSchema = targetDatabaseSchema, cohortTable = targetTable, - type = 'target', - errorMessages = errorMessages + type = "target", + errorMessages = errorMessages ) .checkCohortDetails( cohortDatabaseSchema = outcomeDatabaseSchema, cohortTable = outcomeTable, - type = 'outcome', - errorMessages = errorMessages + type = "outcome", + errorMessages = errorMessages ) .checkTempEmulationSchema( tempEmulationSchema = tempEmulationSchema, - errorMessages = errorMessages + errorMessages = errorMessages ) .checkTimeToEventSettings( settings = timeToEventSettings, @@ -106,7 +102,7 @@ computeTimeToEventAnalyses <- function( valid <- checkmate::reportAssertions(errorMessages) - if(valid){ + if (valid) { start <- Sys.time() connection <- DatabaseConnector::connect( @@ -114,7 +110,7 @@ computeTimeToEventAnalyses <- function( ) on.exit( DatabaseConnector::disconnect(connection) - ) + ) # upload table to #cohort_settings message("Uploading #cohort_settings") @@ -155,10 +151,10 @@ computeTimeToEventAnalyses <- function( sql = sql ) - sql <- 'select * from #two_tte_summary;' + sql <- "select * from #two_tte_summary;" sql <- SqlRender::translate( sql = sql, - targetDialect = connection@dbms, + targetDialect = connection@dbms, tempEmulationSchema = tempEmulationSchema ) @@ -166,7 +162,7 @@ computeTimeToEventAnalyses <- function( connection = connection, sql = sql, andromeda = Andromeda::andromeda(), - andromedaTableName = 'timeToEvent', + andromedaTableName = "timeToEvent", snakeCaseToCamelCase = TRUE ) diff --git a/R/ViewShiny.R b/R/ViewShiny.R index a9c3290..3eea909 100644 --- a/R/ViewShiny.R +++ b/R/ViewShiny.R @@ -12,9 +12,7 @@ #' @export viewCharacterization <- function( resultLocation, - cohortDefinitionSet = NULL - ) { - + cohortDefinitionSet = NULL) { databaseSettings <- prepareCharacterizationShiny( resultLocation = resultLocation, cohortDefinitionSet = cohortDefinitionSet @@ -25,12 +23,11 @@ viewCharacterization <- function( prepareCharacterizationShiny <- function( resultLocation, - cohortDefinitionSet - ){ - server <- file.path(resultLocation, 'sqliteCharacterization', 'sqlite.sqlite') + cohortDefinitionSet) { + server <- file.path(resultLocation, "sqliteCharacterization", "sqlite.sqlite") connectionDetailsSettings <- list( - dbms = 'sqlite', + dbms = "sqlite", server = server ) @@ -42,24 +39,24 @@ prepareCharacterizationShiny <- function( con <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(con)) - tables <- tolower(DatabaseConnector::getTableNames(con, 'main')) + tables <- tolower(DatabaseConnector::getTableNames(con, "main")) - if(!'cg_cohort_definition' %in% tables){ + if (!"cg_cohort_definition" %in% tables) { cohortIds <- unique( c( - DatabaseConnector::querySql(con, 'select distinct TARGET_COHORT_ID from c_cohort_details where TARGET_COHORT_ID != 0;')$TARGET_COHORT_ID, - DatabaseConnector::querySql(con, 'select distinct OUTCOME_COHORT_ID from c_cohort_details where OUTCOME_COHORT_ID != 0;')$OUTCOME_COHORT_ID, - DatabaseConnector::querySql(con, 'select distinct TARGET_COHORT_DEFINITION_ID from c_time_to_event;')$TARGET_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, 'select distinct OUTCOME_COHORT_DEFINITION_ID from c_time_to_event;')$OUTCOME_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, 'select distinct TARGET_COHORT_DEFINITION_ID from c_rechallenge_fail_case_series;')$TARGET_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, 'select distinct OUTCOME_COHORT_DEFINITION_ID from c_rechallenge_fail_case_series;')$OUTCOME_COHORT_DEFINITION_ID + DatabaseConnector::querySql(con, "select distinct TARGET_COHORT_ID from c_cohort_details where TARGET_COHORT_ID != 0;")$TARGET_COHORT_ID, + DatabaseConnector::querySql(con, "select distinct OUTCOME_COHORT_ID from c_cohort_details where OUTCOME_COHORT_ID != 0;")$OUTCOME_COHORT_ID, + DatabaseConnector::querySql(con, "select distinct TARGET_COHORT_DEFINITION_ID from c_time_to_event;")$TARGET_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, "select distinct OUTCOME_COHORT_DEFINITION_ID from c_time_to_event;")$OUTCOME_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, "select distinct TARGET_COHORT_DEFINITION_ID from c_rechallenge_fail_case_series;")$TARGET_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, "select distinct OUTCOME_COHORT_DEFINITION_ID from c_rechallenge_fail_case_series;")$OUTCOME_COHORT_DEFINITION_ID ) ) DatabaseConnector::insertTable( connection = con, - databaseSchema = 'main', - tableName = 'cg_COHORT_DEFINITION', + databaseSchema = "main", + tableName = "cg_COHORT_DEFINITION", data = data.frame( cohortDefinitionId = cohortIds, cohortName = getCohortNames(cohortIds, cohortDefinitionSet) @@ -68,51 +65,51 @@ prepareCharacterizationShiny <- function( ) } - if(!'database_meta_data' %in% tables){ + if (!"database_meta_data" %in% tables) { dbIds <- unique( c( - DatabaseConnector::querySql(con, 'select distinct DATABASE_ID from c_analysis_ref;')$DATABASE_ID, - DatabaseConnector::querySql(con, 'select distinct DATABASE_ID from c_dechallenge_rechallenge;')$DATABASE_ID, - DatabaseConnector::querySql(con, 'select distinct DATABASE_ID from c_time_to_event;')$DATABASE_ID + DatabaseConnector::querySql(con, "select distinct DATABASE_ID from c_analysis_ref;")$DATABASE_ID, + DatabaseConnector::querySql(con, "select distinct DATABASE_ID from c_dechallenge_rechallenge;")$DATABASE_ID, + DatabaseConnector::querySql(con, "select distinct DATABASE_ID from c_time_to_event;")$DATABASE_ID ) ) DatabaseConnector::insertTable( connection = con, - databaseSchema = 'main', - tableName = 'DATABASE_META_DATA', + databaseSchema = "main", + tableName = "DATABASE_META_DATA", data = data.frame( databaseId = dbIds, - cdmSourceAbbreviation = paste0('database ', dbIds) + cdmSourceAbbreviation = paste0("database ", dbIds) ), camelCaseToSnakeCase = T ) } - if(!'i_incidence_summary' %in% tables){ - - x <- c("refId", "databaseId", "sourceName", - "targetCohortDefinitionId", "targetName", "tarId", - "tarStartWith", "tarStartOffset", "tarEndWith", "tarEndOffset", - "subgroupId", 'subgroupName', - 'outcomeId','outcomeCohortDefinitionId', 'outcomeName', - 'clean_window', - 'ageId', 'ageGroupName', - 'genderId', 'genderName', - 'startYear', 'personsAtRiskPe', 'personsAtRisk', - 'personDaysPe', 'personDays', - 'personOutcomesPe', 'personOutcomes', - 'outcomesPe', 'outcomes', - 'incidenceProportionP100p', - 'incidenceRateP100py' + if (!"i_incidence_summary" %in% tables) { + x <- c( + "refId", "databaseId", "sourceName", + "targetCohortDefinitionId", "targetName", "tarId", + "tarStartWith", "tarStartOffset", "tarEndWith", "tarEndOffset", + "subgroupId", "subgroupName", + "outcomeId", "outcomeCohortDefinitionId", "outcomeName", + "clean_window", + "ageId", "ageGroupName", + "genderId", "genderName", + "startYear", "personsAtRiskPe", "personsAtRisk", + "personDaysPe", "personDays", + "personOutcomesPe", "personOutcomes", + "outcomesPe", "outcomes", + "incidenceProportionP100p", + "incidenceRateP100py" ) df <- data.frame(matrix(ncol = length(x), nrow = 0)) colnames(df) <- x DatabaseConnector::insertTable( connection = con, - databaseSchema = 'main', - tableName = 'i_incidence_summary', + databaseSchema = "main", + tableName = "i_incidence_summary", data = df, camelCaseToSnakeCase = T ) @@ -120,11 +117,11 @@ prepareCharacterizationShiny <- function( databaseSettings <- list( connectionDetailsSettings = connectionDetailsSettings, - schema = 'main', - tablePrefix = 'c_', - cohortTablePrefix = 'cg_', - incidenceTablePrefix = 'i_', - databaseTable = 'DATABASE_META_DATA' + schema = "main", + tablePrefix = "c_", + cohortTablePrefix = "cg_", + incidenceTablePrefix = "i_", + databaseTable = "DATABASE_META_DATA" ) return(databaseSettings) @@ -132,8 +129,7 @@ prepareCharacterizationShiny <- function( viewChars <- function( databaseSettings, - testApp = F - ){ + testApp = F) { ensure_installed("ShinyAppBuilder") ensure_installed("ResultModelManager") @@ -145,76 +141,70 @@ viewChars <- function( databaseSettings$connectionDetailsSettings <- NULL - if(utils::packageVersion('ShinyAppBuilder') < '1.2.0'){ - #use old method + if (utils::packageVersion("ShinyAppBuilder") < "1.2.0") { + # use old method # set database settings into system variables Sys.setenv("resultDatabaseDetails_characterization" = as.character(ParallelLogger::convertSettingsToJson(databaseSettings))) config <- ParallelLogger::loadSettingsFromJson( fileName = system.file( - 'shinyConfig.json', + "shinyConfig.json", package = "Characterization" ) ) - if(!testApp){ + if (!testApp) { ShinyAppBuilder::viewShiny( config = config, connection = connection ) - } else{ + } else { ShinyAppBuilder::createShinyApp(config = config, connection = connection) } - - } else{ + } else { # use new method config <- ParallelLogger::loadSettingsFromJson( fileName = system.file( - 'shinyConfigUpdate.json', + "shinyConfigUpdate.json", package = "Characterization" ) ) - databaseSettings$cTablePrefix = databaseSettings$tablePrefix - databaseSettings$cgTablePrefix = databaseSettings$cohortTablePrefix - databaseSettings$databaseTable = 'DATABASE_META_DATA' - databaseSettings$databaseTablePrefix = '' - databaseSettings$iTablePrefix = databaseSettings$incidenceTablePrefix + databaseSettings$cTablePrefix <- databaseSettings$tablePrefix + databaseSettings$cgTablePrefix <- databaseSettings$cohortTablePrefix + databaseSettings$databaseTable <- "DATABASE_META_DATA" + databaseSettings$databaseTablePrefix <- "" + databaseSettings$iTablePrefix <- databaseSettings$incidenceTablePrefix databaseSettings$cgTable <- "cohort_definition" - if(!testApp){ + if (!testApp) { ShinyAppBuilder::viewShiny( config = config, connection = connection, resultDatabaseSettings = databaseSettings ) - } else{ + } else { ShinyAppBuilder::createShinyApp( config = config, connection = connection, resultDatabaseSettings = databaseSettings - ) - + ) } - } - - } -getCohortNames <- function(cohortIds, cohortDefinitionSet){ - - if(!is.null(cohortDefinitionSet)){ - cohortNames <- sapply( - cohortIds, - function(x){ - cohortDefinitionSet$cohortName[cohortDefinitionSet$cohortId == x] - } - ) - } else{ - cohortNames <- paste0('cohort ', cohortIds) +getCohortNames <- function(cohortIds, cohortDefinitionSet) { + if (!is.null(cohortDefinitionSet)) { + cohortNames <- sapply( + cohortIds, + function(x) { + cohortDefinitionSet$cohortName[cohortDefinitionSet$cohortId == x] + } + ) + } else { + cohortNames <- paste0("cohort ", cohortIds) } return(cohortNames) @@ -222,9 +212,10 @@ getCohortNames <- function(cohortIds, cohortDefinitionSet){ # Borrowed from devtools: https://github.com/hadley/devtools/blob/ba7a5a4abd8258c52cb156e7b26bb4bf47a79f0b/R/utils.r#L44 -is_installed <- function (pkg, version = 0) { +is_installed <- function(pkg, version = 0) { installed_version <- tryCatch(utils::packageVersion(pkg), - error = function(e) NA) + error = function(e) NA + ) !is.na(installed_version) && installed_version >= version } @@ -235,17 +226,17 @@ ensure_installed <- function(pkg) { if (interactive()) { message(msg, "\nWould you like to install it?") if (utils::menu(c("Yes", "No")) == 1) { - if(pkg%in%c("ShinyAppBuilder", "ResultModelManager")){ - + if (pkg %in% c("ShinyAppBuilder", "ResultModelManager")) { # add code to check for devtools... - dvtCheck <- tryCatch(utils::packageVersion('devtools'), - error = function(e) NA) - if(is.na(dvtCheck)){ - utils::install.packages('devtools') + dvtCheck <- tryCatch(utils::packageVersion("devtools"), + error = function(e) NA + ) + if (is.na(dvtCheck)) { + utils::install.packages("devtools") } - devtools::install_github(paste0('OHDSI/',pkg)) - }else{ + devtools::install_github(paste0("OHDSI/", pkg)) + } else { utils::install.packages(pkg) } } else { diff --git a/docs/404.html b/docs/404.html index ac0bc8f..a8cd17d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@
diff --git a/docs/articles/InstallationGuide.html b/docs/articles/InstallationGuide.html index 1f631f2..9d0754e 100644 --- a/docs/articles/InstallationGuide.html +++ b/docs/articles/InstallationGuide.html @@ -33,7 +33,7 @@ @@ -96,7 +96,7 @@vignettes/InstallationGuide.Rmd
InstallationGuide.Rmd
This vignette describes how you need to install the Observational
-Health Data Sciencs and Informatics (OHDSI) Characterization
+Health Data Sciences and Informatics (OHDSI) Characterization
package under Windows, Mac, and Linux.
If you do not want the official release you could install the -bleading edge version of the package (latest develop branch).
+bleeding edge version of the package (latest develop branch).Note that the latest develop branch could contain bugs, please report them to us if you experience problems.
If you have an error when trying to install a package in R saying
-‘Dependancy X not available …’ then this can sometimes
+‘Dependency X not available …’ then this can sometimes
be fixed by running install.packages('X')
and then once
that completes trying to reinstall the package that had the
error.
##
## To cite package 'Characterization' in publications use:
##
-## Reps J, Ryan P (2023). _Characterization: Characterizations of
+## Reps J, Ryan P (2024). _Characterization: Characterizations of
## Cohorts_. https://ohdsi.github.io/Characterization,
## https://github.com/OHDSI/Characterization.
##
@@ -207,7 +207,7 @@ Acknowledgments## @Manual{,
## title = {Characterization: Characterizations of Cohorts},
## author = {Jenna Reps and Patrick Ryan},
-## year = {2023},
+## year = {2024},
## note = {https://ohdsi.github.io/Characterization, https://github.com/OHDSI/Characterization},
## }
vignettes/UsingCharacterizationPackage.Rmd
UsingCharacterizationPackage.Rmd
connectionDetails <- Eunomia::getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails = connectionDetails)
## Connecting using SQLite driver
-## Creating cohort: Celecoxib
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00908 secs
-## Creating cohort: Diclofenac
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00446 secs
-## Creating cohort: GiBleed
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00896 secs
-## Creating cohort: NSAIDs
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.0542 secs
+## Creating cohort: Celecoxib
+## | | | 0% | |======================================================================| 100%
+## Executing SQL took 0.0152 secs
+## Creating cohort: Diclofenac
+## | | | 0% | |======================================================================| 100%
+## Executing SQL took 0.0169 secs
+## Creating cohort: GiBleed
+## | | | 0% | |======================================================================| 100%
+## Executing SQL took 0.0208 secs
+## Creating cohort: NSAIDs
+## | | | 0% | |======================================================================| 100%
+## Executing SQL took 0.0579 secs
## Cohorts created in table main.cohort
## cohortId name
## 1 1 Celecoxib
@@ -247,47 +231,47 @@ Aggreagate Covariates
- exampleTargetIds <- c(1,2,4)
- exampleOutcomeIds <- 3
+exampleTargetIds <- c(1, 2, 4)
+exampleOutcomeIds <- 3
If we want to get information on the sex assigned at birth, age at
index and Charlson Comorbidity index we can create the settings using
FeatureExtraction::createCovariateSettings
:
- exampleCovariateSettings <- FeatureExtraction::createCovariateSettings(
- useDemographicsGender = T,
- useDemographicsAge = T,
- useCharlsonIndex = T
- )
exampleCovariateSettings <- FeatureExtraction::createCovariateSettings(
+ useDemographicsGender = T,
+ useDemographicsAge = T,
+ useCharlsonIndex = T
+)
If we want to create the aggregate features for all our target cohorts, our outcome cohort and each target cohort restricted to those with a record of the outcome 1 day after target cohort start date until 365 days after target cohort end date we can run:
- exampleAggregateCovariateSettings <- createAggregateCovariateSettings(
- targetIds = exampleTargetIds,
- outcomeIds = exampleOutcomeIds,
- riskWindowStart = 1, startAnchor = 'cohort start',
- riskWindowEnd = 365, endAnchor = 'cohort start',
- covariateSettings = exampleCovariateSettings
- )
exampleAggregateCovariateSettings <- createAggregateCovariateSettings(
+ targetIds = exampleTargetIds,
+ outcomeIds = exampleOutcomeIds,
+ riskWindowStart = 1, startAnchor = "cohort start",
+ riskWindowEnd = 365, endAnchor = "cohort start",
+ covariateSettings = exampleCovariateSettings
+)
Next we need to use the
exampleAggregateCovariateSettings
as the settings to
computeAggregateCovariateAnalyses
, we need to use the
Eunomia connectionDetails and in Eunomia the OMOP CDM data and cohort
table are in the ‘main’ schema. The cohort table name is ‘cohort’. The
-following code will apply the aggregative covariates analysis using the
+following code will apply the aggregated covariates analysis using the
previously specified settings on the simulated Eunomia data:
agc <- computeAggregateCovariateAnalyses(
- connectionDetails = connectionDetails,
- cdmDatabaseSchema = 'main',
- cdmVersion = 5,
- targetDatabaseSchema = 'main',
- targetTable = 'cohort',
- aggregateCovariateSettings = exampleAggregateCovariateSettings,
- databaseId = 'Eunomia',
- runId = 1
- )
If you would like to save the results you can use the function
saveAggregateCovariateAnalyses
and this can then be loaded
using loadAggregateCovariateAnalyses
.
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
-## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
+ kableExtra::kbl()