Skip to content

Commit

Permalink
prepare release (#439)
Browse files Browse the repository at this point in the history
* Update DESCRIPTION

* only resrtict to first if many observations per subjectId

* fix assignment operator in configurePython (#421)

* Tibble dependancy removal (#422)

* remove unnecessary remotes (#423)

* Study population improvements (#424)

* assign population if existing and added a test (#428)

* 429 save cdm database name (#430)

* save dev database name and schema in trainDetails (#434)

* preserve attributes when splitting data

* Prevent plpData from being evaluated during do.call (#436)

* test improvements (#438)

* fix duplicate vignette titles

---------

Co-authored-by: jreps <[email protected]>
Co-authored-by: Henrik <[email protected]>
  • Loading branch information
3 people authored Apr 5, 2024
1 parent 7a55c31 commit 9e37b51
Show file tree
Hide file tree
Showing 53 changed files with 373 additions and 333 deletions.
3 changes: 1 addition & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ compare_versions
.github
docs/*
_pkgdown.yml


^vignettes/articles$
6 changes: 3 additions & 3 deletions .github/workflows/R_CMD_check_Hades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ jobs:
path: check/*.tar.gz

- name: Install covr
if: runner.os == 'Windows'
if: runner.os == 'Linux'
run: |
remotes::install_cran("covr")
shell: Rscript {0}

- name: Test coverage
if: runner.os == 'Windows'
if: runner.os == 'Linux'
run: covr::codecov()
shell: Rscript {0}

Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*-Ex.R
# R data files from past sessions
.Rdata
# R environ
.Renviron
# RStudio files
.Rproj.user/
.Rproj.user
Expand All @@ -20,4 +22,4 @@ standalone/build/*
/plpmodels/*
/python_models/*
/mycache/*
/inst/shiny/DiagnosticsExplorer/rsconnect/*
/inst/shiny/DiagnosticsExplorer/rsconnect/*
11 changes: 4 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: PatientLevelPrediction
Type: Package
Title: Developing patient level prediction using data in the OMOP Common Data
Model
Version: 6.3.6
Date: 2023-10-09
Version: 6.3.7
Date: 2024-04-04
Authors@R: c(
person("Jenna", "Reps", email = "[email protected]", role = c("aut", "cre")),
person("Martijn", "Schuemie", role = c("aut")),
Expand Down Expand Up @@ -43,7 +43,6 @@ Imports:
rlang,
SqlRender (>= 1.1.3),
survival,
tibble,
tidyr,
utils
Suggests:
Expand Down Expand Up @@ -74,9 +73,7 @@ Remotes:
ohdsi/BigKnn,
ohdsi/Eunomia,
ohdsi/FeatureExtraction,
ohdsi/IterativeHardThresholding,
ohdsi/ParallelLogger,
ohdsi/ShinyAppBuilder,
ohdsi/ResultModelManager
RoxygenNote: 7.2.3
ohdsi/ResultModelManager,
RoxygenNote: 7.3.1
Encoding: UTF-8
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
PatientLevelPrediction 6.3.7
======================
- Clean up dependencies, tibble removed and IHT and ParallelLogger from CRAN
- Use cohortIds for cohortCovariates to comply with FeatureExtraction
- Add cdmDatabaseName from DatabaseDetails to model output
- Fix bug when attributes weren't preserved on trainData$covariateData after split
- Fix warnings in tests and speed them up
- Fix bug in assignment operator in configurePython
- Delay evaluation of plpData when using do.call like in learningCurves and
runMultiplePlp
- Speed up population generation when subjectId's are distinct
- Fix bug when population was still generated when provided to runPlp


PatientLevelPrediction 6.3.6
======================
- fix bug with ohdsi shiny modules version check (issue 415)
Expand Down
8 changes: 4 additions & 4 deletions R/AdditionalCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' @param cohortTable the table name that contains the target population cohort
#' @param rowIdField string representing the unique identifier in the target population cohort
#' @param aggregated whether the covariate should be aggregated
#' @param cohortId cohort id for the target cohort
#' @param cohortIds cohort id for the target cohort
#' @param covariateSettings settings for the covariate cohorts and time periods
#'
#' @return
Expand All @@ -45,7 +45,7 @@ getCohortCovariateData <- function(
cohortTable = "#cohort_person",
rowIdField = "row_id",
aggregated,
cohortId,
cohortIds,
covariateSettings
){

Expand All @@ -69,7 +69,7 @@ getCohortCovariateData <- function(
sql,
covariate_cohort_schema = covariateSettings$cohortDatabaseSchema,
covariate_cohort_table = covariateSettings$cohortTable,
covariate_cohort_id = covariateSettings$cohortId,
covariate_cohort_id = covariateSettings$cohortIds,
cohort_temp_table = cohortTable,
row_id_field = rowIdField,
startDay = covariateSettings$startDay,
Expand Down Expand Up @@ -191,7 +191,7 @@ createCohortCovariateSettings <- function(
covariateId = cohortId*100000+settingId*1000+analysisId,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
cohortId = cohortId,
cohortIds = cohortId,
startDay = startDay,
endDays = endDay,
count = count,
Expand Down
2 changes: 1 addition & 1 deletion R/AndromedaHelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ calculatePrevs <- function(plpData, population){
#===========================

# add population to sqllite
population <- tibble::as_tibble(population)
population <- dplyr::as_tibble(population)
plpData$covariateData$population <- population %>%
dplyr::select("rowId", "outcomeCount")

Expand Down
3 changes: 2 additions & 1 deletion R/CyclopsModels.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ fitCyclopsModel <- function(
trainDetails = list(
analysisId = analysisId,
analysisSource = '', #TODO add from model
developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema,
attrition = attr(trainData, "metaData")$attrition,
trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
trainingDate = Sys.Date(),
Expand Down
7 changes: 2 additions & 5 deletions R/DataSplitting.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ splitData <- function(plpData = plpData,
outcomeId = attr(population, "metaData")$outcomeId,
targetId = attr(population, "metaData")$targetId,
cdmDatabaseSchema = plpData$metaData$databaseDetails$cdmDatabaseSchema,
cdmDatabaseName = plpData$metaData$databaseDetails$cdmDatabaseName,
cdmDatabaseId = plpData$metaData$databaseDetails$cdmDatabaseId,
restrictPlpDataSettings = attr(population, "metaData")$restrictPlpDataSettings,
covariateSettings = plpData$metaData$covariateSettings,
Expand Down Expand Up @@ -186,6 +187,7 @@ splitData <- function(plpData = plpData,
outcomeId = attr(population, "metaData")$outcomeId,
targetId = attr(population, "metaData")$targetId,
cdmDatabaseSchema = plpData$metaData$databaseDetails$cdmDatabaseSchema,
cdmDatabaseName = plpData$metaData$databaseDetails$cdmDatabaseName,
cdmDatabaseId = plpData$metaData$databaseDetails$cdmDatabaseId,
restrictPlpDataSettings = attr(population, "metaData")$restrictPlpDataSettings,
covariateSettings = plpData$metaData$covariateSettings,
Expand All @@ -195,10 +197,6 @@ splitData <- function(plpData = plpData,
populationSize = nrow(trainData$labels)
)

# add pop size to covariateData as used in tidyCovariates
attr(trainData$covariateData, "metaData") <- list(populationSize = nrow(trainData$labels))
class(trainData$covariateData) <- "CovariateData"

testId <- splitId[splitId$index<0,]
testData <- list()
class(testData) <- 'plpData'
Expand All @@ -214,7 +212,6 @@ splitData <- function(plpData = plpData,
data.frame(rowId = testId$rowId),
sizeN = 10000000)
}
class(testData$covariateData) <- "CovariateData"

result <- list(
Train = trainData,
Expand Down
20 changes: 10 additions & 10 deletions R/DemographicSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,30 +111,30 @@ getDemographicSummary_survival <- function(prediction, evalColumn, timepoint = N
tempDemo <- demographicSum %>%
dplyr::filter( .data$genGroup == gen & .data$ageGroup == age )

if(nrow(tempDemo)>0){
t1 <- tempDemo %>% dplyr::select("t")
y1 <- tempDemo %>% dplyr::select("y")
p1 <- tempDemo %>% dplyr::select("value")
if (nrow(tempDemo) > 1 & length(unique(tempDemo$y)) > 1) {
t <- tempDemo$t
y <- tempDemo$y
value <- tempDemo$value

out <- tryCatch(
{
summary(
survival::survfit(survival::Surv(t1$t, y1$y) ~ 1),
survival::survfit(survival::Surv(t, y) ~ 1),
times = timepoint
)
},
error = function(e){ParallelLogger::logError(e); return(NULL)}
error = function(e){ParallelLogger::logError(e);return(NULL)}
)

if(!is.null(out)){
demoTemp <- c(
genGroup = gen,
ageGroup = age,
PersonCountAtRisk = length(p1$value),
PersonCountWithOutcome = round(length(p1$value)*(1-out$surv)),
PersonCountAtRisk = length(value),
PersonCountWithOutcome = round(length(value)*(1-out$surv)),
observedRisk = 1-out$surv,
averagePredictedProbability = mean(p1$value, na.rm = T),
StDevPredictedProbability = stats::sd(p1$value, na.rm = T)
averagePredictedProbability = mean(value, na.rm = T),
StDevPredictedProbability = stats::sd(value, na.rm = T)
)

demographicData <- rbind(demographicData, demoTemp)
Expand Down
2 changes: 1 addition & 1 deletion R/EvaluationSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ getEvaluationStatistics_binary <- function(prediction, evalColumn, ...){

# auc
ParallelLogger::logInfo(paste0('Calculating Performance for ', evalType))
ParallelLogger::logInfo('=============')
ParallelLogger::logInfo('=============')

ParallelLogger::logTrace('Calculating AUC')
auc <- computeAuc(predictionOfInterest, confidenceInterval = T)
Expand Down
39 changes: 19 additions & 20 deletions R/FeatureImportance.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,37 +105,36 @@ pfi <- function(plpResult, population, plpData, repeats = 1,
ParallelLogger::logInfo(paste0('Using all ', cores))
ParallelLogger::logInfo(paste0('Set cores input to use fewer...'))
}

getVpiSettings <- function(i) {
result <- list(plpModel = plpResult$model,
population = population,
plpDataLocation = plpDataLocation,
covariateId = covariates[i],
repeats = repeats)
return(result)
}
if (cores > 1) {
cluster <- ParallelLogger::makeCluster(numberOfThreads = cores)
ParallelLogger::clusterRequire(cluster, c("PatientLevelPrediction", "Andromeda"))


getVpiSettings <- function(i){
result <-list(plpModel = plpResult$model,
population = population,
plpDataLocation = plpDataLocation,
covariateId = covariates[i],
repeats = repeats)
return(result)
}
vpiSettings <- lapply(1:length(covariates), getVpiSettings)


#lapply(vpiSettings, function(x) do.call(permutePerf, x))
aucP <- ParallelLogger::clusterApply(cluster = cluster,
x = vpiSettings,
fun = permutePerf,
stopOnError = FALSE,
progressBar = TRUE)
ParallelLogger::stopCluster(cluster)

} else {
ParallelLogger::logInfo("Running in serial")
aucP <- lapply(1:length(covariates), function(i) {
permutePerf(getVpiSettings(i))
})
}
aucP <- do.call(c, aucP)

# do this in parellel

varImp <- data.frame(covariateId = covariates,
pfi = auc-aucP)

pfi = auc - aucP)
return(varImp)

}
Expand Down Expand Up @@ -200,7 +199,7 @@ permute <- function(plpDataLocation,cId,population){

# find a new random selection of people and give them the covariate and value
newPlp <- sample(population$rowId,nSamp)
newData <- tibble::as_tibble(cbind(rowId = newPlp,coi[,-1]))
newData <- dplyr::as_tibble(cbind(rowId = newPlp,coi[,-1]))

# swap old covariate data with new
plpData$covariateData$covariates <- plpData$covariateData$covariates %>% dplyr::filter(.data$covariateId != !!cId) %>% dplyr::collect()
Expand All @@ -215,7 +214,7 @@ permute <- function(plpDataLocation,cId,population){

# sample the pop to replace
swapPlp <- sample(population$rowId,nSamp)
haveCidDataSwapped <- tibble::as_tibble(cbind(rowId = swapPlp,haveCidData[,-1]))
haveCidDataSwapped <- dplyr::as_tibble(cbind(rowId = swapPlp,haveCidData[,-1]))

# find the swapped people to switch
connectedCovs <- plpData$covariateData$covariateRef %>%
Expand All @@ -228,7 +227,7 @@ permute <- function(plpDataLocation,cId,population){
dplyr::filter(.data$rowId %in% swapPlp) %>%
dplyr::collect()

swappedForCid <- tibble::as_tibble(cbind(rowId = haveCidData$rowId[1:nrow(plpToSwap)],plpToSwap[,-1]))
swappedForCid <- dplyr::as_tibble(cbind(rowId = haveCidData$rowId[1:nrow(plpToSwap)],plpToSwap[,-1]))


# swap old covariate data with new
Expand Down
6 changes: 4 additions & 2 deletions R/HelperFunctions.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# fix issue with nrow - temp fix for me locally
nrow <- function(x){UseMethod("nrow",x)}
#' @exportS3Method NULL
nrow.default <- base::nrow
#' @exportS3Method NULL
nrow.tbl <- function(x){x %>% dplyr::tally() %>% dplyr::pull()}


Expand Down Expand Up @@ -101,9 +103,9 @@ configurePython <- function(envname='PLP', envtype=NULL, condaPythonVersion="3.1

if(is.null(envtype)){
if(getOs()=='windows'){
envtype=='conda'
envtype <- "conda"
} else {
envtype=='python'
envtype <- "python"
}
}

Expand Down
3 changes: 2 additions & 1 deletion R/KNN.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ fitKNN <- function(trainData, modelSettings, search = 'none', analysisId, ...){

trainDetails = list(
analysisId = analysisId,
developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema,
attrition = attr(trainData, "metaData")$attrition,
trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
trainingDate = Sys.Date(),
Expand Down
8 changes: 4 additions & 4 deletions R/LearningCurve.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ createLearningCurve <- function(
nRuns <- length(trainFractions)

settings = list(
plpData = plpData,
plpData = quote(plpData),
outcomeId = outcomeId,
analysisId = analysisId,
populationSettings = populationSettings,
Expand Down Expand Up @@ -238,7 +238,7 @@ createLearningCurve <- function(

lcWrapper <- function(settings){
plpData <- PatientLevelPrediction::loadPlpData(settings$plpData)
settings$plpData <- plpData
settings$plpData <- quote(plpData)
result <- tryCatch({do.call(runPlp, settings)},
warning = function(war) {
ParallelLogger::logInfo(paste0('a warning: ', war))
Expand Down Expand Up @@ -470,8 +470,8 @@ plotLearningCurve <- function(learningCurve,

# create plot object
plot <- tidyLearningCurve %>%
ggplot2::ggplot(ggplot2::aes_string(x = abscissa, y= 'value',
col = "Dataset")) +
ggplot2::ggplot(ggplot2::aes(x = .data[[abscissa]], y = .data[['value']],
col = .data[["Dataset"]])) +
ggplot2::geom_line() +
ggplot2::coord_cartesian(ylim = yAxisRange, expand = FALSE) +
ggplot2::labs(title = plotTitle, subtitle = plotSubtitle,
Expand Down
3 changes: 1 addition & 2 deletions R/PatientLevelPrediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
#'
#' @description A package for running predictions using data in the OMOP CDM
#'
#' @docType package
#' @name PatientLevelPrediction
#' @keywords internal
#' @importFrom dplyr %>%
#' @importFrom rlang .data
NULL
"_PACKAGE"

#' A simulation profile
#' @docType data
Expand Down
Loading

0 comments on commit 9e37b51

Please sign in to comment.