diff --git a/.github/workflows/R_CMD_check_Hades.yml b/.github/workflows/R_CMD_check_Hades.yml index 04a216d4..c7a63e86 100644 --- a/.github/workflows/R_CMD_check_Hades.yml +++ b/.github/workflows/R_CMD_check_Hades.yml @@ -29,21 +29,25 @@ jobs: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }} + CDM5_ORACLE_CDM54_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }} CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }} CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }} CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }} CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }} CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }} + CDM5_POSTGRESQL_CDM54_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }} CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }} CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }} CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }} CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }} CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }} + CDM5_SQL_SERVER_CDM54_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }} CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }} CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }} + CDM5_REDSHIFT_CDM54_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }} CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} diff --git a/.github/workflows/R_CMD_check_main_weekly.yaml b/.github/workflows/R_CMD_check_main_weekly.yaml index a5624680..4337cbb4 100644 --- a/.github/workflows/R_CMD_check_main_weekly.yaml +++ b/.github/workflows/R_CMD_check_main_weekly.yaml @@ -21,21 +21,25 @@ jobs: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }} + CDM5_ORACLE_CDM54_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }} CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }} CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }} CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }} CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }} CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }} + CDM5_POSTGRESQL_CDM54_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }} CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }} CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }} CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }} CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }} CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }} + CDM5_SQL_SERVER_CDM54_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }} CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }} CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }} + CDM5_REDSHIFT_CDM54_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }} CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} diff --git a/DESCRIPTION b/DESCRIPTION index aee1eb0f..910a09e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: DataQualityDashboard Type: Package Title: Execute and View Data Quality Checks on OMOP CDM Database -Version: 2.4.0 -Date: 2023-07-26 +Version: 2.4.1 +Date: 2023-10-18 Authors@R: c( person("Katy", "Sadowski", email = "sadowski@ohdsi.org", role = c("aut", "cre")), person("Clair", "Blacketer", role = c("aut")), diff --git a/NAMESPACE b/NAMESPACE index 69aad71c..690105a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ importFrom(jsonlite,fromJSON) importFrom(jsonlite,parse_json) importFrom(jsonlite,toJSON) importFrom(magrittr,"%>%") +importFrom(readr,local_edition) importFrom(readr,read_csv) importFrom(rlang,.data) importFrom(stats,na.omit) diff --git a/NEWS.md b/NEWS.md index 62533c1f..921498ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +DataQualityDashboard 2.4.1 +========================== +This release includes: + +- Minor documentation updates +- A patch for an issue in one of DQD's transitive dependencies, `vroom` +- Test suite upgrades to run remote DB tests against OMOP v5.4, and to add Redshift to remote DB tests + DataQualityDashboard 2.4.0 ========================== This release includes: diff --git a/R/convertResultsCase.R b/R/convertResultsCase.R index 0668b211..0efa1f5e 100644 --- a/R/convertResultsCase.R +++ b/R/convertResultsCase.R @@ -30,6 +30,7 @@ #' @importFrom SqlRender snakeCaseToCamelCase camelCaseToSnakeCase #' @importFrom dplyr rename_with #' @importFrom tools file_path_sans_ext +#' @importFrom readr local_edition #' #' @export @@ -47,6 +48,9 @@ convertJsonResultsFileCase <- function( stop("You must specify an output folder if writing to file.") } + # temporary patch to work around vroom 1.6.4 bug + readr::local_edition(1) + results <- jsonlite::fromJSON(jsonFilePath) if ("numViolatedRows" %in% names(results$CheckResults) && targetCase == "camel") { diff --git a/R/executeDqChecks.R b/R/executeDqChecks.R index 7f81fd5b..5e9d1070 100644 --- a/R/executeDqChecks.R +++ b/R/executeDqChecks.R @@ -16,7 +16,7 @@ #' @title Execute DQ checks #' -#' @description This function will connect to the database, generate the sql scripts, and run the data quality checks against the database. +#' @description This function will connect to the database, generate the sql scripts, and run the data quality checks against the database. By default, results will be written to a json file as well as a database table. #' #' @param connectionDetails A connectionDetails object for connecting to the CDM database #' @param cdmDatabaseSchema The fully qualified database name of the CDM schema @@ -54,7 +54,7 @@ #' @importFrom utils packageVersion write.table #' @importFrom rlang .data #' @importFrom tidyselect all_of -#' @importFrom readr read_csv +#' @importFrom readr read_csv local_edition #' @importFrom dplyr mutate case_when #' #' @export @@ -119,6 +119,9 @@ executeDqChecks <- function(connectionDetails, } } + # temporary patch to work around vroom 1.6.4 bug + readr::local_edition(1) + # capture metadata ----------------------------------------------------------------------- if (!sqlOnly) { connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) diff --git a/R/listChecks.R b/R/listChecks.R index ddeccbb4..6e286a29 100644 --- a/R/listChecks.R +++ b/R/listChecks.R @@ -24,10 +24,13 @@ #' @param fieldCheckThresholdLoc The location of the threshold file for evaluating the field checks. If not specified the default thresholds will be applied. #' @param conceptCheckThresholdLoc The location of the threshold file for evaluating the concept checks. If not specified the default thresholds will be applied. #' -#' @importFrom readr read_csv +#' @importFrom readr read_csv local_edition #' #' @export listDqChecks <- function(cdmVersion = "5.3", tableCheckThresholdLoc = "default", fieldCheckThresholdLoc = "default", conceptCheckThresholdLoc = "default") { + # temporary patch to work around vroom 1.6.4 bug + readr::local_edition(1) + dqChecks <- {} dqChecks$checkDescriptions <- read_csv(system.file( diff --git a/README.md b/README.md index dc3c21e8..9498f682 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ System Requirements =================== Requires R (version 3.2.2 or higher). Requires [DatabaseConnector](https://github.com/OHDSI/DatabaseConnector) (version 2.0.2 or higher). +A variety of database platforms are supported, as documented [here](https://ohdsi.github.io/Hades/supportedPlatforms.html). + +Note that while data quality check threshold files are provided for OMOP CDM versions 5.2, 5.3, and 5.4, the package is currently only tested against versions 5.3 and 5.4. + Installation ============= 1. See the instructions [here](https://ohdsi.github.io/Hades/rSetup.html) for configuring your R environment, including RTools and Java. diff --git a/docs/404.html b/docs/404.html index 08b81bfe..ca0796ad 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index e17e3b8a..31e872f3 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/articles/AddNewCheck.html b/docs/articles/AddNewCheck.html index 0deb430c..22f4f9a4 100644 --- a/docs/articles/AddNewCheck.html +++ b/docs/articles/AddNewCheck.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -108,7 +108,7 @@

Add a New Data Quality Check

Don Torok

-

2023-07-26

+

2023-10-18

Source: vignettes/AddNewCheck.rmd diff --git a/docs/articles/CheckStatusDefinitions.html b/docs/articles/CheckStatusDefinitions.html index 054f2894..f75e735e 100644 --- a/docs/articles/CheckStatusDefinitions.html +++ b/docs/articles/CheckStatusDefinitions.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

Check Status Descriptions

Dmitry Ilyn

-

2023-07-26

+

2023-10-18

Source: vignettes/CheckStatusDefinitions.rmd diff --git a/docs/articles/CheckTypeDescriptions.html b/docs/articles/CheckTypeDescriptions.html index b81d54e3..3e1ec5c5 100644 --- a/docs/articles/CheckTypeDescriptions.html +++ b/docs/articles/CheckTypeDescriptions.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

Data Quality Check Type Definitions

Clair Blacketer

-

2023-07-26

+

2023-10-18

Source: vignettes/CheckTypeDescriptions.rmd diff --git a/docs/articles/DataQualityDashboard.html b/docs/articles/DataQualityDashboard.html index 9fed8026..023b49f0 100644 --- a/docs/articles/DataQualityDashboard.html +++ b/docs/articles/DataQualityDashboard.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

Getting Started

Clair Blacketer

-

2023-07-26

+

2023-10-18

Source: vignettes/DataQualityDashboard.rmd @@ -218,6 +218,14 @@

Executing Data Quality Checks# which DQ checks to run? ------------------------------------ checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv +# want to EXCLUDE a pre-specified list of checks? run the following code: +# +# checksToExclude <- c() # Names of check types to exclude from your DQD run +# allChecks <- DataQualityDashboard::listDqChecks() +# checkNames <- allChecks$checkDescriptions %>% +# subset(!(checkName %in% checksToExclude)) %>% +# select(checkName) + # which CDM tables to exclude? ------------------------------------ tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN") # list of CDM table names to skip evaluating checks against; by default DQD excludes the vocab tables @@ -226,11 +234,13 @@

Executing Data Quality Checks= cdmDatabaseSchema, resultsDatabaseSchema = resultsDatabaseSchema, cdmSourceName = cdmSourceName, + cdmVersion = cdmVersion, numThreads = numThreads, sqlOnly = sqlOnly, sqlOnlyUnionCount = sqlOnlyUnionCount, sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, outputFolder = outputFolder, + outputFile = outputFile, verboseMode = verboseMode, writeToTable = writeToTable, writeToCsv = writeToCsv, diff --git a/docs/articles/DqdForCohorts.html b/docs/articles/DqdForCohorts.html index af81a451..b0aa7263 100644 --- a/docs/articles/DqdForCohorts.html +++ b/docs/articles/DqdForCohorts.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

Running the DQD on a Cohort

Clair Blacketer

-

2023-07-26

+

2023-10-18

Source: vignettes/DqdForCohorts.rmd diff --git a/docs/articles/SqlOnly.html b/docs/articles/SqlOnly.html index 999ee7db..3ad568b6 100644 --- a/docs/articles/SqlOnly.html +++ b/docs/articles/SqlOnly.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

SqlOnly

Maxim Moinat

-

2023-07-26

+

2023-10-18

Source: vignettes/SqlOnly.rmd diff --git a/docs/articles/Thresholds.html b/docs/articles/Thresholds.html index d8327119..dc6f190c 100644 --- a/docs/articles/Thresholds.html +++ b/docs/articles/Thresholds.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -109,7 +109,7 @@

Failure Thresholds and How to Change Them

Clair Blacketer

-

2023-07-26

+

2023-10-18

Source: vignettes/Thresholds.rmd diff --git a/docs/articles/index.html b/docs/articles/index.html index 13de8fdd..78d2d687 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/authors.html b/docs/authors.html index 06995b97..22af3d2d 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/index.html b/docs/index.html index 1f06facc..9eff8be6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -224,6 +224,8 @@

TechnologySystem Requirements

Requires R (version 3.2.2 or higher). Requires DatabaseConnector (version 2.0.2 or higher).

+

A variety of database platforms are supported, as documented here.

+

Note that while data quality check threshold files are provided for OMOP CDM versions 5.2, 5.3, and 5.4, the package is currently only tested against versions 5.3 and 5.4.

Installation diff --git a/docs/news/index.html b/docs/news/index.html index 4221e1c5..1431a0ed 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1

@@ -84,6 +84,14 @@

Changelog

Source: NEWS.md +
+ +

This release includes:

+

This release includes:

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index f8935271..ec39574d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -9,5 +9,5 @@ articles: DqdForCohorts: DqdForCohorts.html SqlOnly: SqlOnly.html Thresholds: Thresholds.html -last_built: 2023-07-26T22:27Z +last_built: 2023-10-19T03:18Z diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html index 3b09607c..a0d1e7e4 100644 --- a/docs/pull_request_template.html +++ b/docs/pull_request_template.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1
diff --git a/docs/reference/convertJsonResultsFileCase.html b/docs/reference/convertJsonResultsFileCase.html index f500b713..9fea5133 100644 --- a/docs/reference/convertJsonResultsFileCase.html +++ b/docs/reference/convertJsonResultsFileCase.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-evaluateThresholds.html b/docs/reference/dot-evaluateThresholds.html index a2a4a2aa..fe120aa0 100644 --- a/docs/reference/dot-evaluateThresholds.html +++ b/docs/reference/dot-evaluateThresholds.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-getCheckId.html b/docs/reference/dot-getCheckId.html index 05ebe3d2..0ead8722 100644 --- a/docs/reference/dot-getCheckId.html +++ b/docs/reference/dot-getCheckId.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-processCheck.html b/docs/reference/dot-processCheck.html index 1512a950..518d5bd0 100644 --- a/docs/reference/dot-processCheck.html +++ b/docs/reference/dot-processCheck.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-recordResult.html b/docs/reference/dot-recordResult.html index 05f65915..c8970af0 100644 --- a/docs/reference/dot-recordResult.html +++ b/docs/reference/dot-recordResult.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-runCheck.html b/docs/reference/dot-runCheck.html index a0fe4284..9ad867ae 100644 --- a/docs/reference/dot-runCheck.html +++ b/docs/reference/dot-runCheck.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-summarizeResults.html b/docs/reference/dot-summarizeResults.html index c446b8a6..9d0aadf6 100644 --- a/docs/reference/dot-summarizeResults.html +++ b/docs/reference/dot-summarizeResults.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-writeResultsToCsv.html b/docs/reference/dot-writeResultsToCsv.html index f7eb0746..fce5b97b 100644 --- a/docs/reference/dot-writeResultsToCsv.html +++ b/docs/reference/dot-writeResultsToCsv.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-writeResultsToJson.html b/docs/reference/dot-writeResultsToJson.html index 7f72e2ce..2df4c8b7 100644 --- a/docs/reference/dot-writeResultsToJson.html +++ b/docs/reference/dot-writeResultsToJson.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/dot-writeResultsToTable.html b/docs/reference/dot-writeResultsToTable.html index e9b772dc..86861bc0 100644 --- a/docs/reference/dot-writeResultsToTable.html +++ b/docs/reference/dot-writeResultsToTable.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/executeDqChecks.html b/docs/reference/executeDqChecks.html index b0aa1a39..eb39a4ec 100644 --- a/docs/reference/executeDqChecks.html +++ b/docs/reference/executeDqChecks.html @@ -1,5 +1,5 @@ -Execute DQ checks — executeDqChecks • DataQualityDashboardExecute DQ checks — executeDqChecks • DataQualityDashboard @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 @@ -86,7 +86,7 @@

Execute DQ checks

-

This function will connect to the database, generate the sql scripts, and run the data quality checks against the database.

+

This function will connect to the database, generate the sql scripts, and run the data quality checks against the database. By default, results will be written to a json file as well as a database table.

diff --git a/docs/reference/index.html b/docs/reference/index.html index 0c6d792b..fb1fd296 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1
diff --git a/docs/reference/listDqChecks.html b/docs/reference/listDqChecks.html index fa949f69..ef17b90d 100644 --- a/docs/reference/listDqChecks.html +++ b/docs/reference/listDqChecks.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/reEvaluateThresholds.html b/docs/reference/reEvaluateThresholds.html index c23b381b..0a7df4a7 100644 --- a/docs/reference/reEvaluateThresholds.html +++ b/docs/reference/reEvaluateThresholds.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/viewDqDashboard.html b/docs/reference/viewDqDashboard.html index b8e38e32..41557122 100644 --- a/docs/reference/viewDqDashboard.html +++ b/docs/reference/viewDqDashboard.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/writeJsonResultsToCsv.html b/docs/reference/writeJsonResultsToCsv.html index 904f171f..3083ad75 100644 --- a/docs/reference/writeJsonResultsToCsv.html +++ b/docs/reference/writeJsonResultsToCsv.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/docs/reference/writeJsonResultsToTable.html b/docs/reference/writeJsonResultsToTable.html index 8f1cff77..43b428f9 100644 --- a/docs/reference/writeJsonResultsToTable.html +++ b/docs/reference/writeJsonResultsToTable.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.4.0 + 2.4.1 diff --git a/extras/DataQualityDashboard.pdf b/extras/DataQualityDashboard.pdf index 71a5842d..24a693fa 100644 Binary files a/extras/DataQualityDashboard.pdf and b/extras/DataQualityDashboard.pdf differ diff --git a/extras/DevelopersREADME.md b/extras/DevelopersREADME.md new file mode 100644 index 00000000..629d8d08 --- /dev/null +++ b/extras/DevelopersREADME.md @@ -0,0 +1,69 @@ +DQD Developers README +==================== + +Dev Setup +==================== +1. R setup: https://ohdsi.github.io/Hades/rSetup.html + +2. Local OMOP CDM setup + + If you already have a CDM available for development work/testing, you may skip this step + + a. Install Postgres and create a localhost server + + b. Create a new database in localhost for your test CDM, and create a schema in that database for the CDM tables + + c. Using the [CDMConnector](https://odyosg.github.io/CDMConnector/index.html) package: + + - i. Download a sample OMOP CDM into a DuckDB database, as documented [here](https://odyosg.github.io/CDMConnector/reference/eunomiaDir.html) + + - ii. Copy the CDM into your local Postgres database, as documented [here](https://odyosg.github.io/CDMConnector/reference/copy_cdm_to.html) + + +3. Fork the DataQualityDashboard repo + + 4. Clone your fork to your computer + + + + +PR Process +==================== + +Be sure you're aware of our Pull Request guidelines before diving into development work: https://github.com/OHDSI/DataQualityDashboard/blob/main/.github/pull_request_template.md +1. Sync your fork's develop branch with the upstream repo +2. Check out and pull the develop branch of your fork +3. Create a new branch named (briefly) according to the issue being fixed / feature being added + + a. If possible, limit the changes made on each branch to those needed for a single GitHub issue + + b. If an issue or new feature requires extensive changes, split your work across multiple sub-branches off of your feature branch, or across multiple feature branches +4. Make your changes + + a. If you are adding new functionality, you must add unit tests to cover the new function(s)/code + + b. If you are fixing a bug, you must add a unit test for the regression +5. Run R CMD Check and resolve all errors, warnings, and notes + + a. At the time of writing, the NOTE regarding the size of the package is expected and does not need to be resolved +6. Run `test_file(path = "tests/testthat/test-executeDqChecks.R")` and resolve all test failures + + a. This file contains tests using testthat's snapshot feature, which do not work when tests are run via R CMD Check + + b. See testthat docs to learn more about snapshots and how to resolve snapshot test failures: https://testthat.r-lib.org/articles/snapshotting.html +7. Build & install the package locally, then run DQD against your local Postgres database and view the results. Resolve any errors that arise +8. Commit your changes and push them to GitHub +9. Back on GitHub, open up a PR for your changes, making sure to set the target branch to the `develop` branch of the parent OHDSI/DataQualityDashboard repo +10. Wait for the automated checks to complete + + a. If they all succeed, your PR is ready for review! + + b. If any checks fail, check the logs and address errors in your code by repeating steps 4-7 above +11. Once your PR is approved by a maintainer, you may merge it into the `develop` branch + +General Guidance +==================== +HADES Developer Guidelines: https://ohdsi.github.io/Hades/developerGuidelines.html +HADES Code Style Requirements: https://ohdsi.github.io/Hades/codeStyle.html +HADES Release Process: https://ohdsi.github.io/Hades/releaseProcess.html + \ No newline at end of file diff --git a/extras/codeToRun.R b/extras/codeToRun.R index 4807d3c2..81cc0e45 100644 --- a/extras/codeToRun.R +++ b/extras/codeToRun.R @@ -19,11 +19,11 @@ library(DatabaseConnector) # fill out the connection details ----------------------------------------------------------------------- connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = "", - user = "", - password = "", - server = "", - port = "", + dbms = "", + user = "", + password = "", + server = "", + port = "", extraSettings = "", pathToDriver = "" ) @@ -31,30 +31,47 @@ connectionDetails <- DatabaseConnector::createConnectionDetails( cdmDatabaseSchema <- "yourCdmSchema" # the fully qualified database schema name of the CDM resultsDatabaseSchema <- "yourResultsSchema" # the fully qualified database schema name of the results schema (that you can write to) cdmSourceName <- "Your CDM Source" # a human readable name for your CDM source -cdmVersion <- "5.4" # the CDM version you are targetting. Currently supporst 5.2.2, 5.3.1, and 5.4 +cdmVersion <- "5.4" # the CDM version you are targetting. Currently supports 5.2, 5.3, and 5.4 # determine how many threads (concurrent SQL sessions) to use ---------------------------------------- numThreads <- 1 # on Redshift, 3 seems to work well # specify if you want to execute the queries or inspect them ------------------------------------------ -sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries. See codeToRun_sqlOnly.R for other sqlOnly parameters +sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries +sqlOnlyIncrementalInsert <- FALSE # set to TRUE if you want the generated SQL queries to calculate DQD results and insert them into a database table (@resultsDatabaseSchema.@writeTableName) +sqlOnlyUnionCount <- 1 # in sqlOnlyIncrementalInsert mode, the number of check sqls to union in a single query; higher numbers can improve performance in some DBMS (e.g. a value of 25 may be 25x faster) + +# NOTES specific to sqlOnly <- TRUE option ------------------------------------------------------------ +# 1. You do not need a live database connection. Instead, connectionDetails only needs these parameters: +# connectionDetails <- DatabaseConnector::createConnectionDetails( +# dbms = "", # specify your dbms +# pathToDriver = "/" +# ) +# 2. Since these are fully functional queries, this can help with debugging. +# 3. In the results output by the sqlOnlyIncrementalInsert queries, placeholders are populated for execution_time, query_text, and warnings/errors; and the NOT_APPLICABLE rules are not applied. +# 4. In order to use the generated SQL to insert metadata and check results into output table, you must set sqlOnlyIncrementalInsert = TRUE. Otherwise sqlOnly is backwards compatable with <= v2.2.0, generating queries which run the checks but don't store the results. # where should the results and logs go? ---------------------------------------------------------------- outputFolder <- "output" outputFile <- "results.json" + # logging type ------------------------------------------------------------------------------------- verboseMode <- TRUE # set to FALSE if you don't want the logs to be printed to the console -# write results to table? ----------------------------------------------------------------------- -writeToTable <- FALSE # set to TRUE if you want to write to a SQL table in the results schema +# write results to table? ------------------------------------------------------------------------------ +writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table in the results schema + +# specify the name of the results table (used when writeToTable = TRUE and when sqlOnlyIncrementalInsert = TRUE) +writeTableName <- "dqdashboard_results" # write results to a csv file? ----------------------------------------------------------------------- writeToCsv <- FALSE # set to FALSE if you want to skip writing to csv file csvFile <- "" # only needed if writeToCsv is set to TRUE # if writing to table and using Redshift, bulk loading can be initialized ------------------------------- + # Sys.setenv("AWS_ACCESS_KEY_ID" = "", # "AWS_SECRET_ACCESS_KEY" = "", # "AWS_DEFAULT_REGION" = "", @@ -67,46 +84,46 @@ csvFile <- "" # only needed if writeToCsv is set to TRUE checkLevels <- c("TABLE", "FIELD", "CONCEPT") # which DQ checks to run? ------------------------------------ -checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3.1_Check_Desciptions.csv +checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv + +# want to EXCLUDE a pre-specified list of checks? run the following code: +# +# checksToExclude <- c() # Names of check types to exclude from your DQD run +# allChecks <- DataQualityDashboard::listDqChecks() +# checkNames <- allChecks$checkDescriptions %>% +# subset(!(checkName %in% checksToExclude)) %>% +# select(checkName) # which CDM tables to exclude? ------------------------------------ -tablesToExclude <- c() +tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN") # list of CDM table names to skip evaluating checks against; by default DQD excludes the vocab tables # run the job -------------------------------------------------------------------------------------- -DataQualityDashboard::executeDqChecks( - connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - resultsDatabaseSchema = resultsDatabaseSchema, - cdmSourceName = cdmSourceName, - cdmVersion = cdmVersion - numThreads = numThreads, - sqlOnly = sqlOnly, - outputFolder = outputFolder, - outputFile = outputFile, - verboseMode = verboseMode, - writeToTable = writeToTable, - writeToCsv = writeToCsv, - csvFile = csvFile, - checkLevels = checkLevels, - tablesToExclude = tablesToExclude, - checkNames = checkNames -) +DataQualityDashboard::executeDqChecks(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + cdmSourceName = cdmSourceName, + cdmVersion = cdmVersion, + numThreads = numThreads, + sqlOnly = sqlOnly, + sqlOnlyUnionCount = sqlOnlyUnionCount, + sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, + outputFolder = outputFolder, + outputFile = outputFile, + verboseMode = verboseMode, + writeToTable = writeToTable, + writeToCsv = writeToCsv, + csvFile = csvFile, + checkLevels = checkLevels, + tablesToExclude = tablesToExclude, + checkNames = checkNames) # inspect logs ---------------------------------------------------------------------------- -ParallelLogger::launchLogViewer( - logFileName = file.path(outputFolder, - sprintf("log_DqDashboard_%s.txt", cdmSourceName)) -) - -# View the Data Quality Dashboard using the integrated shiny application ------------------------------------ -DataQualityDashboard::viewDqDashboard( - jsonPath = file.path(getwd(), outputFolder, outputFile) -) +ParallelLogger::launchLogViewer(logFileName = file.path(outputFolder, cdmSourceName, + sprintf("log_DqDashboard_%s.txt", cdmSourceName))) # (OPTIONAL) if you want to write the JSON file to the results table separately ----------------------------- -jsonFilePath <- "" # put the path to the outputted JSON file -DataQualityDashboard::writeJsonResultsToTable( - connectionDetails = connectionDetails, - resultsDatabaseSchema = resultsDatabaseSchema, - jsonFilePath = jsonFilePath -) +jsonFilePath <- "" +DataQualityDashboard::writeJsonResultsToTable(connectionDetails = connectionDetails, + resultsDatabaseSchema = resultsDatabaseSchema, + jsonFilePath = jsonFilePath) + diff --git a/inst/doc/AddNewCheck.pdf b/inst/doc/AddNewCheck.pdf index 50f6d97c..ff24ed57 100644 Binary files a/inst/doc/AddNewCheck.pdf and b/inst/doc/AddNewCheck.pdf differ diff --git a/inst/doc/CheckStatusDefinitions.pdf b/inst/doc/CheckStatusDefinitions.pdf index 0b2209f7..b158d7cb 100644 Binary files a/inst/doc/CheckStatusDefinitions.pdf and b/inst/doc/CheckStatusDefinitions.pdf differ diff --git a/inst/doc/CheckTypeDescriptions.pdf b/inst/doc/CheckTypeDescriptions.pdf index 6fddddff..4fc209fa 100644 Binary files a/inst/doc/CheckTypeDescriptions.pdf and b/inst/doc/CheckTypeDescriptions.pdf differ diff --git a/inst/doc/DataQualityDashboard.pdf b/inst/doc/DataQualityDashboard.pdf index 89042448..9d1faceb 100644 Binary files a/inst/doc/DataQualityDashboard.pdf and b/inst/doc/DataQualityDashboard.pdf differ diff --git a/inst/doc/DqdForCohorts.pdf b/inst/doc/DqdForCohorts.pdf index d692f689..dc381e31 100644 Binary files a/inst/doc/DqdForCohorts.pdf and b/inst/doc/DqdForCohorts.pdf differ diff --git a/inst/doc/SqlOnly.pdf b/inst/doc/SqlOnly.pdf index 4f9f5637..f9107177 100644 Binary files a/inst/doc/SqlOnly.pdf and b/inst/doc/SqlOnly.pdf differ diff --git a/inst/doc/Thresholds.pdf b/inst/doc/Thresholds.pdf index 6775e554..bb044544 100644 Binary files a/inst/doc/Thresholds.pdf and b/inst/doc/Thresholds.pdf differ diff --git a/man/executeDqChecks.Rd b/man/executeDqChecks.Rd index 1ae7d2e2..0b7b5f7f 100644 --- a/man/executeDqChecks.Rd +++ b/man/executeDqChecks.Rd @@ -92,5 +92,5 @@ with the fields cohort_definition_id and subject_id.} If sqlOnly = FALSE, a list object of results } \description{ -This function will connect to the database, generate the sql scripts, and run the data quality checks against the database. +This function will connect to the database, generate the sql scripts, and run the data quality checks against the database. By default, results will be written to a json file as well as a database table. } diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 16ae8c13..f1aa8636 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -6,17 +6,9 @@ if (Sys.getenv("DONT_DOWNLOAD_JDBC_DRIVERS", "") == "TRUE") { downloadJdbcDrivers("postgresql", jdbcDriverFolder) downloadJdbcDrivers("sql server", jdbcDriverFolder) downloadJdbcDrivers("oracle", jdbcDriverFolder) + downloadJdbcDrivers("redshift", jdbcDriverFolder) } connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails() cdmDatabaseSchemaEunomia <- "main" resultsDatabaseSchemaEunomia <- "main" - -remove_sql_comments <- function(sql) { - sql0 <- gsub("--.*?\\n|--.*?\\r", " ", sql) # remove single-line SQL comments - sql1 <- gsub("\\r|\\n|\\t", " ", sql0) # convert tabs and newlines to spaces - sql2 <- gsub("/*", "@@@@ ", sql1, fixed = TRUE) # must add spaces between multi-line comments for quote removal to work - sql3 <- gsub("*/", " @@@@", sql2, fixed = TRUE) # must add spaces between multi-line comments for quote removal to work - sql4 <- gsub("@@@@ .+? @@@@", " ", sql3, ) # remove multi-line comments - sql5 <- gsub("\\s+", " ", sql4) # remove multiple spaces -} diff --git a/tests/testthat/test-executeDqChecks.R b/tests/testthat/test-executeDqChecks.R index e4183cbd..01cb11b3 100644 --- a/tests/testthat/test-executeDqChecks.R +++ b/tests/testthat/test-executeDqChecks.R @@ -1,5 +1,5 @@ library(testthat) -local_edition(3) +testthat::local_edition(3) test_that("Execute a single DQ check on Synthea/Eunomia", { outputFolder <- tempfile("dqd_") @@ -119,7 +119,8 @@ test_that("Execute a single DQ check on remote databases", { dbTypes <- c( "oracle", "postgresql", - "sql server" + "sql server", + "redshift" ) for (dbType in dbTypes) { @@ -129,7 +130,7 @@ test_that("Execute a single DQ check on remote databases", { if (sysUser != "" & sysPassword != "" & sysServer != "") { - cdmDatabaseSchema <- Sys.getenv(sprintf("CDM5_%s_CDM_SCHEMA", toupper(gsub(" ", "_", dbType)))) + cdmDatabaseSchema <- Sys.getenv(sprintf("CDM5_%s_CDM54_SCHEMA", toupper(gsub(" ", "_", dbType)))) resultsDatabaseSchema <- Sys.getenv("CDM5_%s_OHDSI_SCHEMA", toupper(gsub(" ", "_", dbType))) connectionDetails <- createConnectionDetails( @@ -151,7 +152,8 @@ test_that("Execute a single DQ check on remote databases", { outputFolder = outputFolder, verboseMode = FALSE, writeToTable = FALSE, - checkNames = "measurePersonCompleteness" + checkNames = "measurePersonCompleteness", + cdmVersion = "5.4" ), regexp = "^Missing check names.*" ) diff --git a/vignettes/DataQualityDashboard.rmd b/vignettes/DataQualityDashboard.rmd index f9c4ada0..1a60e78a 100644 --- a/vignettes/DataQualityDashboard.rmd +++ b/vignettes/DataQualityDashboard.rmd @@ -111,6 +111,14 @@ checkLevels <- c("TABLE", "FIELD", "CONCEPT") # which DQ checks to run? ------------------------------------ checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv +# want to EXCLUDE a pre-specified list of checks? run the following code: +# +# checksToExclude <- c() # Names of check types to exclude from your DQD run +# allChecks <- DataQualityDashboard::listDqChecks() +# checkNames <- allChecks$checkDescriptions %>% +# subset(!(checkName %in% checksToExclude)) %>% +# select(checkName) + # which CDM tables to exclude? ------------------------------------ tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN") # list of CDM table names to skip evaluating checks against; by default DQD excludes the vocab tables @@ -119,11 +127,13 @@ DataQualityDashboard::executeDqChecks(connectionDetails = connectionDetails, cdmDatabaseSchema = cdmDatabaseSchema, resultsDatabaseSchema = resultsDatabaseSchema, cdmSourceName = cdmSourceName, + cdmVersion = cdmVersion, numThreads = numThreads, sqlOnly = sqlOnly, sqlOnlyUnionCount = sqlOnlyUnionCount, sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, outputFolder = outputFolder, + outputFile = outputFile, verboseMode = verboseMode, writeToTable = writeToTable, writeToCsv = writeToCsv,