From 12b0f8691f1c4c952425f8dea83dc73bfb2af6e2 Mon Sep 17 00:00:00 2001 From: Mikhail-iontsev Date: Tue, 22 Oct 2024 13:17:01 +0400 Subject: [PATCH] Update docs example script to generate Ares files --- docs/docs/ares-docs.md | 78 +++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/docs/docs/ares-docs.md b/docs/docs/ares-docs.md index 0cbd483..0cc596a 100644 --- a/docs/docs/ares-docs.md +++ b/docs/docs/ares-docs.md @@ -86,37 +86,31 @@ existing OMOP CDM database and require both [Achilles](https://github.com/ohdsi/achilles) and [DataQualityDashboard](https://github.com/ohdsi/dataqualitydashboard). -Here's an example script to generate Ares files based on a sample dataset (replace connectionDetails to generate results -for your database). +Here's an example script to generate Ares files (replace connectionDetails with your own) ```r -# DatabaseConnector::downloadJdbcDrivers("postgresql","D:/OHDSI/Drivers") - use it to download the JDBC driver +# DatabaseConnector::downloadJdbcDrivers("postgresql","./") - use it to download the JDBC driver options(connectionObserver = NULL) -cdmDatabaseSchema = "main" #indicate the name of the cdm schema you created -resultsDatabaseSchema = "main" #indicate the name of the results schema you created -vocabDatabaseSchema = "main" #vocab should be located in the cdm schema -numThreads = 1 #i only managed to get till the end in single-threaded mode, had problems utilizing 2 or more +cdmDatabaseSchema = "cdm" +resultsDatabaseSchema = "results" # used to store Achilles output +vocabDatabaseSchema = "cdm" #usually located within the cdm schema +numThreads = 1 cdmSourceName = 'synthea' -cdmVersion = "5.4" #do not change +cdmVersion = "5.4" -aresDataDirectory = "./data" #output directory -sourceFolders = "./data" #source files folder for indexers. +aresDataDirectory = "./data" #Achilles output directory -connectionDetails <- Eunomia::getEunomiaConnectionDetails() #Provides a sample dataset - -sourceReleaseKey = AresIndexer::getSourceReleaseKey(connectionDetails, cdmDatabaseSchema) - - -# Example connection details -#connectionDetails <- DatabaseConnector::createConnectionDetails( -# dbms = dbms, #your database management system -# server = server, #server address -# user = user, -# password = password, -# pathToDriver = pathToDriver #location of the JDBC driver -#) +#connectionDetails <- Eunomia::getEunomiaConnectionDetails() #Provides a sample dataset +connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "postgresql", + server = "localhost/synthea", + user = "postgres", + password = "12345", + port = 5432, + pathToDriver = "./" #indicate path to your chosen dbms JDBC driver +) #Used to connect to your existing CDM # Run Achilles Achilles::achilles( @@ -125,23 +119,25 @@ Achilles::achilles( resultsDatabaseSchema= resultsDatabaseSchema, vocabDatabaseSchema = vocabDatabaseSchema, numThreads = 1, - cdmVersion = "5.4.0", + cdmVersion = cdmVersion, createIndices = F, createTable = T, smallCellCount = 0, ) -# Export statistics generated by Achilles + +# Export Achilles results to Ares supported format Achilles::exportToAres( connectionDetails = connectionDetails, cdmDatabaseSchema = cdmDatabaseSchema, resultsDatabaseSchema = resultsDatabaseSchema, vocabDatabaseSchema = vocabDatabaseSchema, - outputPath = sourceFolders, + outputPath = aresDataDirectory, ) -datasourceReleaseOutputFolder <- file.path(aresDataDirectory, releaseKey) +sourceReleaseKey = AresIndexer::getSourceReleaseKey(connectionDetails, cdmDatabaseSchema) +datasourceReleaseOutputFolder <- file.path(aresDataDirectory, sourceReleaseKey) -# Run Data Quality Dashboard +#Run data quality checks DataQualityDashboard::executeDqChecks( connectionDetails = connectionDetails, cdmDatabaseSchema = cdmDatabaseSchema, @@ -152,15 +148,35 @@ DataQualityDashboard::executeDqChecks( outputFolder = datasourceReleaseOutputFolder, outputFile = "dq-result.json", verboseMode = T, - writeToTable = F + writeToTable = F, + cdmVersion = cdmVersion + +) +# Run Achilles temporal characterization +outputFile <- file.path(datasourceReleaseOutputFolder, "temporal-characterization.csv") +Achilles::performTemporalCharacterization( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + outputFile = outputFile, ) +# Get a list of sources generated by the exportToAres function +list <- list.dirs(aresDataDirectory, recursive = FALSE) -list <- list.dirs(sourceFolders, recursive = FALSE) -AresIndexer::augmentConceptFiles(sourceFolders = list) +# Augment concept files with data quality details +AresIndexer::augmentConceptFiles(releaseFolder = file.path(aresDataDirectory, cdmSourceName, sourceReleaseKey)) +# Export index of all sql functions used in data processing AresIndexer::buildExportQueryIndex(aresDataDirectory) +# Compare data quality issues with previous source releases (used to display the issue delta) +AresIndexer::augmentDataQualityFiles(sourceFolders = list) +# Create index of quality issues across releases (used to render data quality delta chart) +AresIndexer::buildSourceDataQualityDelta(sourceFolders = list) +# Create index of all available sources and releases AresIndexer::buildNetworkIndex(list, outputFolder = aresDataDirectory) +# Create quality index across network AresIndexer::buildDataQualityIndex(list, outputFolder = aresDataDirectory) +# Create index of unmapped source codes across the network AresIndexer::buildNetworkUnmappedSourceCodeIndex(list, outputFolder = aresDataDirectory) ```