Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NotApplicable fix for measureConditionEraCompleteness #527

Merged
merged 9 commits into from
Jul 12, 2024
65 changes: 34 additions & 31 deletions R/calculateNotApplicableStatus.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,48 @@
naCheckNames <- c("cdmTable", "cdmField", "measureValueCompleteness")
missingNAChecks <- !(naCheckNames %in% checkNames)
if (any(missingNAChecks)) {
missingNACheckNames <- paste(naCheckNames[missingNAChecks], collapse = ", ")
return(FALSE)
}
return(TRUE)
}

#' Applies the 'Not Applicable' status to a single check
#'
#' @param x Results from a single check
#'
#' @keywords internal
.applyNotApplicable <- function(x) {
# Errors precede all other statuses
if (x$isError == 1) {
return(0)
}

# No NA status for cdmTable and cdmField if missing
if (x$checkName == "cdmTable" || x$checkName == "cdmField") {
return(0)
}

if (any(x$tableIsMissing, x$fieldIsMissing, x$tableIsEmpty, na.rm = TRUE)) {
return(1)
}

# No NA status for measureValueCompleteness if empty
if (x$checkName == "measureValueCompleteness") {
return(0)
}

if (any(x$fieldIsEmpty, x$conceptIsMissing, x$conceptAndUnitAreMissing, na.rm = TRUE)) {
return(1)
}

return(0)
}

#' Determines if check should be notApplicable and the notApplicableReason
#'
#' @param checkResults A dataframe containing the results of the data quality checks
#'
#' @keywords internal

.calculateNotApplicableStatus <- function(checkResults) {
# Look up missing tables and add variable tableIsMissing to checkResults
missingTables <- checkResults %>%
Expand Down Expand Up @@ -81,7 +111,7 @@
) %>%
dplyr::distinct()

# Look up empty fields and add variable tableIsEmpty to checkResults
# Look up empty fields and add variable fieldIsEmpty to checkResults
emptyFields <- checkResults %>%
dplyr::filter(
.data$checkName == "measureValueCompleteness"
Expand Down Expand Up @@ -148,7 +178,7 @@
.data$tableIsEmpty ~ sprintf("Table %s is empty.", .data$cdmTableName),
.data$fieldIsEmpty ~ sprintf("Field %s.%s is not populated.", .data$cdmTableName, .data$cdmFieldName),
.data$conceptIsMissing ~ sprintf("%s=%s is missing from the %s table.", .data$cdmFieldName, .data$conceptId, .data$cdmTableName),
.data$conceptAndUnitAreMissing ~ sprintf("Combination of %s=%s, unitConceptId=%s and VALUE_AS_NUMBER IS NOT NULL is missing from the %s table.", .data$cdmFieldName, .data$conceptId, .data$unitConceptId, .data$cdmTableName)
.data$conceptAndUnitAreMissing ~ sprintf("Combination of %s=%s, unitConceptId=%s and VALUE_AS_NUMBER IS NOT NULL is missing from the %s table.", .data$cdmFieldName, .data$conceptId, .data$unitConceptId, .data$cdmTableName) #nolint
),
NA
),
Expand All @@ -159,30 +189,3 @@

return(checkResults)
}

.applyNotApplicable <- function(x) {
# Errors precede all other statuses
if (x$isError == 1) {
return(0)
}

# No NA status for cdmTable and cdmField if missing
if (x$checkName == "cdmTable" || x$checkName == "cdmField") {
return(0)
}

if (any(x$tableIsMissing, x$fieldIsMissing, x$tableIsEmpty, na.rm = TRUE)) {
return(1)
}

# No NA status for measureValueCompleteness if empty
if (x$checkName == "measureValueCompleteness") {
return(0)
}

if (any(x$fieldIsEmpty, x$conceptIsMissing, x$conceptAndUnitAreMissing, na.rm = TRUE)) {
return(1)
}

return(0)
}
3 changes: 3 additions & 0 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ if (Sys.getenv("DONT_DOWNLOAD_JDBC_DRIVERS", "") == "TRUE") {
connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails()
cdmDatabaseSchemaEunomia <- "main"
resultsDatabaseSchemaEunomia <- "main"

# Separate connection details for NA tests, as this requires removing records
connectionDetailsEunomiaNaChecks <- Eunomia::getEunomiaConnectionDetails()
26 changes: 15 additions & 11 deletions tests/testthat/test-calculateNotApplicableStatus.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,23 @@ test_that("Not Applicable status Table Empty", {
outputFolder <- tempfile("dqd_")
on.exit(unlink(outputFolder, recursive = TRUE))

# Make sure the device exposure table is empty
connection <- DatabaseConnector::connect(connectionDetailsEunomia)
MaximMoinat marked this conversation as resolved.
Show resolved Hide resolved
DatabaseConnector::executeSql(connection, "DELETE FROM DEVICE_EXPOSURE;")
DatabaseConnector::disconnect(connection)

results <- executeDqChecks(
connectionDetails = connectionDetailsEunomia,
connectionDetails = connectionDetailsEunomiaNaChecks,
cdmDatabaseSchema = cdmDatabaseSchemaEunomia,
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
cdmSourceName = "Eunomia",
checkNames = c("cdmTable", "cdmField", "measureValueCompleteness"),
# Eunomia COST table has misspelled 'REVEUE_CODE_SOURCE_VALUE'
tablesToExclude = c("COST", "CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN"),
outputFolder = outputFolder,
writeToTable = F
writeToTable = FALSE
)

# Assumption that Eunomia has empty device_exposure table
r <- results$CheckResults[results$CheckResults$checkName == "measureValueCompleteness" &
results$CheckResults$tableName == "device_exposure", ]
expect_true(all(r$notApplicable == 1))
Expand All @@ -27,25 +31,25 @@ test_that("measureConditionEraCompleteness Not Applicable if condition_occurrenc
on.exit(unlink(outputFolder, recursive = TRUE))

# Remove records from Condition Occurrence
connection <- DatabaseConnector::connect(connectionDetailsEunomia)
connection <- DatabaseConnector::connect(connectionDetailsEunomiaNaChecks)
DatabaseConnector::executeSql(connection, "CREATE TABLE CONDITION_OCCURRENCE_BACK AS SELECT * FROM CONDITION_OCCURRENCE;")
DatabaseConnector::executeSql(connection, "DELETE FROM CONDITION_OCCURRENCE;")
DatabaseConnector::disconnect(connection)

results <- executeDqChecks(
connectionDetails = connectionDetailsEunomia,
connectionDetails = connectionDetailsEunomiaNaChecks,
cdmDatabaseSchema = cdmDatabaseSchemaEunomia,
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
cdmSourceName = "Eunomia",
checkNames = c("cdmTable", "cdmField", "measureValueCompleteness", "measureConditionEraCompleteness"),
# Eunomia COST table has misspelled 'REVEUE_CODE_SOURCE_VALUE'
tablesToExclude = c("COST", "CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN"),
outputFolder = outputFolder,
writeToTable = F
writeToTable = FALSE
)

# Reinstate Condition Occurrence
connection <- DatabaseConnector::connect(connectionDetailsEunomia)
connection <- DatabaseConnector::connect(connectionDetailsEunomiaNaChecks)
DatabaseConnector::executeSql(connection, "INSERT INTO CONDITION_OCCURRENCE SELECT * FROM CONDITION_OCCURRENCE_BACK;")
DatabaseConnector::executeSql(connection, "DROP TABLE CONDITION_OCCURRENCE_BACK;")
disconnect(connection)
Expand All @@ -59,25 +63,25 @@ test_that("measureConditionEraCompleteness Fails if condition_era empty", {
on.exit(unlink(outputFolder, recursive = TRUE))

# Remove records from Condition Era
connection <- DatabaseConnector::connect(connectionDetailsEunomia)
connection <- DatabaseConnector::connect(connectionDetailsEunomiaNaChecks)
DatabaseConnector::executeSql(connection, "CREATE TABLE CONDITION_ERA_BACK AS SELECT * FROM CONDITION_ERA;")
DatabaseConnector::executeSql(connection, "DELETE FROM CONDITION_ERA;")
DatabaseConnector::disconnect(connection)

results <- executeDqChecks(
connectionDetails = connectionDetailsEunomia,
connectionDetails = connectionDetailsEunomiaNaChecks,
cdmDatabaseSchema = cdmDatabaseSchemaEunomia,
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
cdmSourceName = "Eunomia",
checkNames = c("cdmTable", "cdmField", "measureValueCompleteness", "measureConditionEraCompleteness"),
# Eunomia COST table has misspelled 'REVEUE_CODE_SOURCE_VALUE'
tablesToExclude = c("COST", "CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN"),
outputFolder = outputFolder,
writeToTable = F
writeToTable = FALSE
)

# Reinstate the Condition Era
connection <- DatabaseConnector::connect(connectionDetailsEunomia)
connection <- DatabaseConnector::connect(connectionDetailsEunomiaNaChecks)
DatabaseConnector::executeSql(connection, "INSERT INTO CONDITION_ERA SELECT * FROM CONDITION_ERA_BACK;")
DatabaseConnector::executeSql(connection, "DROP TABLE CONDITION_ERA_BACK;")
DatabaseConnector::disconnect(connection)
Expand Down
Loading