Skip to content

Commit

Permalink
completing mincellcount
Browse files Browse the repository at this point in the history
- fixing bug where mincellcount wasnt being used by aggregate covs
- adding tests for all mincellcount cells
- turning off GHA mac spark tests due to odd java error on there (tested manually on my mac)
  • Loading branch information
jreps committed Aug 7, 2024
1 parent 0da14c2 commit 638d24e
Show file tree
Hide file tree
Showing 4 changed files with 295 additions and 10 deletions.
23 changes: 20 additions & 3 deletions R/AggregateCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ computeTargetAggregateCovariateAnalyses <- function(
databaseId = "database 1",
outputFolder = file.path(getwd(),'characterization_results'),
minCharacterizationMean = 0,
minCellCount = 0,
...
) {

Expand Down Expand Up @@ -317,7 +318,8 @@ computeTargetAggregateCovariateAnalyses <- function(
cohortDetails = cohortDetails,
counts = counts,
databaseId = databaseId,
minCharacterizationMean = minCharacterizationMean
minCharacterizationMean = minCharacterizationMean,
minCellCount = minCellCount
)

return(invisible(T))
Expand All @@ -337,6 +339,7 @@ computeCaseAggregateCovariateAnalyses <- function(
databaseId = "database 1",
outputFolder = file.path(getwd(),'characterization_results'),
minCharacterizationMean = 0,
minCellCount = 0,
...
) {
# check inputs
Expand Down Expand Up @@ -550,7 +553,8 @@ computeCaseAggregateCovariateAnalyses <- function(
cohortDetails = cohortDetails,
counts = counts,
databaseId = databaseId,
minCharacterizationMean = minCharacterizationMean
minCharacterizationMean = minCharacterizationMean,
minCellCount = minCellCount
)
exportAndromedaToCsv(
andromeda = result2,
Expand All @@ -559,7 +563,8 @@ computeCaseAggregateCovariateAnalyses <- function(
counts = NULL, # previously added
databaseId = databaseId,
minCharacterizationMean = minCharacterizationMean,
includeSettings = F
includeSettings = F,
minCellCount = minCellCount
)

return(invisible(T))
Expand Down Expand Up @@ -654,6 +659,8 @@ exportAndromedaToCsv <- function(
ParallelLogger::logInfo(paste0("Removing sum_value counts less than ", minCellCount))
if (sum(removeInd) > 0) {
data$sum_value[removeInd] <- -1*minCellCount
# adding other calculated columns
data$average_value[removeInd] <- NA
}
}

Expand Down Expand Up @@ -686,6 +693,16 @@ exportAndromedaToCsv <- function(
ParallelLogger::logInfo(paste0("Removing count_value counts less than ", minCellCount))
if (sum(removeInd) > 0) {
data$count_value[removeInd] <- -1*minCellCount
# adding columns calculated from count
data$min_value[removeInd] <- NA
data$max_value[removeInd] <- NA
data$average_value[removeInd] <- NA
data$standard_deviation[removeInd] <- NA
data$median_value[removeInd] <- NA
data$p_10_value[removeInd] <- NA
data$p_25_value[removeInd] <- NA
data$p_75_value[removeInd] <- NA
data$p_90_value[removeInd] <- NA
}
}

Expand Down
29 changes: 22 additions & 7 deletions R/SaveLoad.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
exportTimeToEventToCsv <- function(
result,
saveDirectory,
minCellCount = 0) {
minCellCount = 0
) {
if (!dir.exists(saveDirectory)) {
dir.create(
path = saveDirectory,
Expand Down Expand Up @@ -120,53 +121,67 @@ exportDechallengeRechallengeToCsv <- function(
string = colnames(dat)
)

removeInd <- dat$num_exposure_eras < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Censoring num_exposure_eras counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$num_exposure_eras[removeInd] <- -minCellCount
}
}

removeInd <- dat$num_persons_exposed < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing num_persons_exposed counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring num_persons_exposed counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$num_persons_exposed[removeInd] <- -minCellCount
}
}

removeInd <- dat$num_cases < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing num_cases counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring num_cases counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$num_cases[removeInd] <- -minCellCount
}
}

removeInd <- dat$dechallenge_attempt < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing dechallenge_attempt counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring/removing dechallenge_attempt counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$dechallenge_attempt[removeInd] <- -minCellCount
dat$pct_dechallenge_attempt[removeInd] <- NA
}
}

removeInd <- dat$dechallenge_fail < minCellCount | dat$dechallenge_success < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing DECHALLENGE FAIL or SUCCESS counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring/removing DECHALLENGE FAIL or SUCCESS counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$dechallenge_fail[removeInd] <- -minCellCount
dat$dechallenge_success[removeInd] <- -minCellCount
dat$pct_dechallenge_fail[removeInd] <- NA
dat$pct_dechallenge_success[removeInd] <- NA
}
}

removeInd <- dat$rechallenge_attempt < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing rechallenge_attempt counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring/removing rechallenge_attempt counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$rechallenge_attempt[removeInd] <- -minCellCount
dat$pct_rechallenge_attempt[removeInd] <- NA
}
}

removeInd <- dat$rechallenge_fail < minCellCount | dat$rechallenge_success < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing rechallenge_fail or rechallenge_success counts less than ", minCellCount))
ParallelLogger::logInfo(paste0("Censoring/removing rechallenge_fail or rechallenge_success counts less than ", minCellCount))
if (sum(removeInd) > 0) {
dat$rechallenge_fail[removeInd] <- -minCellCount
dat$rechallenge_success[removeInd] <- -minCellCount
dat$pct_rechallenge_fail[removeInd] <- NA
dat$pct_rechallenge_success[removeInd] <- NA
}
}

Expand Down
5 changes: 5 additions & 0 deletions tests/testthat/test-dbs.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) {
cohortDatabaseSchema <- Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA")
options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA"))
} else if (dbmsPlatform == "spark") {
if (.Platform$OS.type == "windows") { # skipping Mac for GHA due to JAVA issue
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = dbmsPlatform,
user = Sys.getenv("CDM5_SPARK_USER"),
Expand All @@ -113,6 +114,10 @@ getPlatformConnectionDetails <- function(dbmsPlatform) {
vocabularyDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA")
cohortDatabaseSchema <- Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA")
options(sqlRenderTempEmulationSchema = Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA"))
}
else{
return(NULL)
}
} else if (dbmsPlatform == "sql server") {
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = dbmsPlatform,
Expand Down
Loading

0 comments on commit 638d24e

Please sign in to comment.