diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml index b85c0c946..f7c4ea17e 100644 --- a/.github/workflows/R_CMD_check_Hades.yaml +++ b/.github/workflows/R_CMD_check_Hades.yaml @@ -67,8 +67,6 @@ jobs: - name: Setup conda uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: "r-reticulate" - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index a16e3f37e..dde325a53 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: PatientLevelPrediction Type: Package Title: Developing patient level prediction using data in the OMOP Common Data Model -Version: 6.3.5 -Date: 2023-08-15 +Version: 6.3.6 +Date: 2023-10-09 Authors@R: c( person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut", "cre")), person("Martijn", "Schuemie", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index be5c6e18f..ea37b5ad1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +PatientLevelPrediction 6.3.6 +====================== +- fix bug with ohdsi shiny modules version check (issue 415) + PatientLevelPrediction 6.3.5 ====================== - Fix sklearnToJson to be compatible with scikit-learn>=1.3 diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index e3b939ff6..bf5f71b1b 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -94,9 +94,10 @@ listAppend <- function(a, b){ #' #' @param envname A string for the name of the virtual environment (default is 'PLP') #' @param envtype An option for specifying the environment as'conda' or 'python'. If NULL then the default is 'conda' for windows users and 'python' for non-windows users +#' @param condaPythonVersion String, Python version to use when creating a conda environment #' #' @export -configurePython <- function(envname='PLP', envtype=NULL){ +configurePython <- function(envname='PLP', envtype=NULL, condaPythonVersion="3.11"){ if(is.null(envtype)){ if(getOs()=='windows'){ @@ -113,7 +114,7 @@ configurePython <- function(envname='PLP', envtype=NULL){ warning(paste0('Conda environment ', envname,' exists. You can use reticulate::conda_remove() to remove if you want to fresh config')) } else { ParallelLogger::logInfo(paste0('Creating virtual conda environment called ', envname)) - location <- reticulate::conda_create(envname=envname, packages = "python", conda = "auto") + location <- reticulate::conda_create(envname=envname, packages = paste0("python==", condaPythonVersion), conda = "auto") } packages <- c('numpy','scipy','scikit-learn', 'pandas','pydotplus','joblib') ParallelLogger::logInfo(paste0('Adding python dependancies to ', envname)) diff --git a/R/ViewShinyPlp.R b/R/ViewShinyPlp.R index eb4f5c2cf..65d60b8b0 100644 --- a/R/ViewShinyPlp.R +++ b/R/ViewShinyPlp.R @@ -154,7 +154,7 @@ viewPlps <- function(databaseSettings){ ) } else{ ohdsiModulesVersion <- strsplit(x = as.character(utils::packageVersion('OhdsiShinyModules')), split = '\\.')[[1]] - if(ohdsiModulesVersion[1]>=1 & ohdsiModulesVersion[2]>= 2){ + if(paste0(ohdsiModulesVersion[1], ".", ohdsiModulesVersion[2])>= 1.2){ config <- ParallelLogger::loadSettingsFromJson( fileName = system.file( 'shinyConfigUpdate.json', diff --git a/R/uploadToDatabase.R b/R/uploadToDatabase.R index eef4f2021..a00c61e5c 100644 --- a/R/uploadToDatabase.R +++ b/R/uploadToDatabase.R @@ -1145,7 +1145,7 @@ addCohort <- function( targetDialect = targetDialect, tableName = 'cohort_definition', columnNames = c('cohort_name'), - values = c(paste0("'",cohortDefinition$cohortName,"'")), + values = c(paste0("'",gsub('\'', '', cohortDefinition$cohortName),"'")), tempEmulationSchema = tempEmulationSchema ) @@ -1157,12 +1157,12 @@ addCohort <- function( if(addNew){ cohortDefinitionId <- result$cohortDefinitionId[result$json %in% json] - ParallelLogger::logInfo(paste0('Cohort ',cohortDefinition$cohortName,' exists in cohort_definition with cohort id', result$cohortDefinitionId[result$json %in% json])) + ParallelLogger::logInfo(paste0('Cohort ',gsub('\'', '', cohortDefinition$cohortName),' exists in cohort_definition with cohort id', result$cohortDefinitionId[result$json %in% json])) } else{ - ParallelLogger::logInfo(paste0('Adding cohort ',cohortDefinition$cohortName)) + ParallelLogger::logInfo(paste0('Adding cohort ',gsub('\'', '', cohortDefinition$cohortName))) data <- data.frame( - cohortName = cohortDefinition$cohortName, + cohortName = gsub('\'', '', cohortDefinition$cohortName), cohortDefinitionId = cohortDefinition$cohortId, json = json ) @@ -1187,7 +1187,7 @@ addCohort <- function( targetDialect = targetDialect, tableName = 'cohort_definition', columnNames = c('cohort_name', 'cohort_definition_id'), - values = c(paste0("'",cohortDefinition$cohortName,"'"), cohortDefinition$cohortId), + values = c(paste0("'",gsub('\'', '', cohortDefinition$cohortName),"'"), cohortDefinition$cohortId), tempEmulationSchema = tempEmulationSchema ) @@ -1202,18 +1202,18 @@ addCohort <- function( targetDialect = targetDialect, tableName = 'cohorts', columnNames = c('cohort_definition_id','cohort_name'), - values = c(cohortDefinitionId, paste0("'",cohortDefinition$cohortName,"'")), + values = c(cohortDefinitionId, paste0("'",gsub('\'', '', cohortDefinition$cohortName),"'")), tempEmulationSchema = tempEmulationSchema ) if(nrow(result)>0){ - ParallelLogger::logInfo(paste0('Cohort ',cohortDefinition$cohortName,' exists in cohorts with cohort id', result$cohortId)) + ParallelLogger::logInfo(paste0('Cohort ',gsub('\'', '', cohortDefinition$cohortName),' exists in cohorts with cohort id', result$cohortId)) } else{ - ParallelLogger::logInfo(paste0('Adding cohort ',cohortDefinition$cohortName)) + ParallelLogger::logInfo(paste0('Adding cohort ',gsub('\'', '', cohortDefinition$cohortName))) data <- data.frame( cohortDefinitionId = cohortDefinitionId, - cohortName = cohortDefinition$cohortName + cohortName = gsub('\'', '', cohortDefinition$cohortName) ) DatabaseConnector::insertTable( connection = conn, @@ -1235,7 +1235,7 @@ addCohort <- function( targetDialect = targetDialect, tableName = 'cohorts', columnNames = c('cohort_definition_id','cohort_name'), - values = c(cohortDefinitionId, paste0("'",cohortDefinition$cohortName,"'")), + values = c(cohortDefinitionId, paste0("'",gsub('\'', '', cohortDefinition$cohortName),"'")), tempEmulationSchema = tempEmulationSchema ) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ad97fef6..d3f9ef550 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -20,6 +20,8 @@ navbar: - reference - articles - tutorial + - benchmarks + - predictors - bestpractice - news right: [hades, github] @@ -39,6 +41,12 @@ navbar: bestpractice: text: Best Practices href: articles/BestPractices.html + benchmarks: + text: Benchmarks + href: articles/BenchmarkTasks.html + predictors: + text: Predictors + href: articles/ConstrainedPredictors.html news: text: Changelog href: news/index.html diff --git a/man/configurePython.Rd b/man/configurePython.Rd index 9d014fa94..99e8d79aa 100644 --- a/man/configurePython.Rd +++ b/man/configurePython.Rd @@ -4,12 +4,14 @@ \alias{configurePython} \title{Sets up a virtual environment to use for PLP (can be conda or python)} \usage{ -configurePython(envname = "PLP", envtype = NULL) +configurePython(envname = "PLP", envtype = NULL, condaPythonVersion = "3.11") } \arguments{ \item{envname}{A string for the name of the virtual environment (default is 'PLP')} \item{envtype}{An option for specifying the environment as'conda' or 'python'. If NULL then the default is 'conda' for windows users and 'python' for non-windows users} + +\item{condaPythonVersion}{String, Python version to use when creating a conda environment} } \description{ Sets up a virtual environment to use for PLP (can be conda or python) diff --git a/vignettes/BenchmarkTasks.Rmd b/vignettes/BenchmarkTasks.Rmd new file mode 100644 index 000000000..13eb511ab --- /dev/null +++ b/vignettes/BenchmarkTasks.Rmd @@ -0,0 +1,306 @@ +--- +title: "Benchmark Tasks" +author: "Jenna Reps, Ross Williams, Peter R. Rijnbeek" +date: '`r Sys.Date()`' +header-includes: + - \usepackage{fancyhdr} + - \pagestyle{fancy} + - \fancyhead{} + - \fancyhead[CO,CE]{Installation Guide} + - \fancyfoot[CO,CE]{PatientLevelPrediction Package Version `r utils::packageVersion("PatientLevelPrediction")`} + - \fancyfoot[LE,RO]{\thepage} + - \renewcommand{\headrulewidth}{0.4pt} + - \renewcommand{\footrulewidth}{0.4pt} +output: + pdf_document: + includes: + in_header: preamble.tex + number_sections: yes + toc: yes + word_document: + toc: yes + html_document: + number_sections: yes + toc: yes +--- + + +## Benchmark Tasks For Large-Scale Empirical Analyses + +Here we provide a set of diverse prediction tasks that can be used when evaluating the impact of the model design choice when developing models using observational data. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Target Cohort (index) + +Outcome + +Time-at-risk + +Link +
+Patients with an outpatient visit in 2017 with no prior cancer (first visit in 2017) + +Lung cancer + +1 day - 3 years after index + +
+Patients newly diagnosed with major depressive disorder (date of first record) + +Bipolar + +1 day - 365 day after index + +
+Patients with an outpatient visit in 2019 + +Dementia + +1 day - 3 years after index + +
+Patients with an outpatient visit and a positive COVID test + +Hospitalization with pneumonia + +1 day - 30 days after index + +
+Patients with an outpatient visit and a positive COVID test + +Hospitalization with pneumonia that required intensive services (ventilation, intubation, tracheotomy, or extracorporeal membrane oxygenation) or death + +1 day - 30 days after index + +
+Patients with an outpatient visit and a positive COVID test + +Death + +1 day - 30 days after index + +
+Patients with T2DM who were treated with metformin and who became new adult users of one of sulfonylureas, thiazolidinediones, dipeptidyl peptidase-4 inhibitors, glucagon-like peptide-1 receptor agonists, or sodium-glucose co-transporter-2 inhibitors (date of secondary drug). Patients with HF or patients treated with insulin on or prior to the index date were excluded from the analysis. Patients were required to have been enrolled for at least 365 days before cohort entry. + +Heart Failure + +1 to 365 days + +
+Patients newly diagnosed with atrial fibrilation (date of initial afib record) + +Ischemic stroke + +1 to 365 days + +
+Patients undergoing elective major non-cardiac surgery (date of surgery). Patients were required to have been enrolled for at least 365 days before cohort entry. + +Earliest of AMI cardiac arrest or death (MACE) + +O to 30 days + +
+Patients starting intravitreal Anti-VEGF (date of administration) + +Kidney Failure + +1 to 365 days + +
+Pregnancy women (start of pregnancy) + +Preeclampsia + +During pregnancy + +
+Pregnancy women (start of pregnancy) + +Still birth + +During pregnancy + +
+Patients with COPD (first record) + +Cardiovascular event and death + +1-30 days and 1-90 days + +
+Patients starting menopause (first record) + +Depression + +1 day - 3-years + +
+Patients with anemia (date of first anemia record) + +Colorectal cancer + +1 day - 1-year + +
+Patients with quadriplegia (date of first quadriplegia record) + +Death + +1 day - 1-year + +
+Patient undergoing + + + + + +
+ + + + + + +
+ \ No newline at end of file diff --git a/vignettes/BestPractices.rmd b/vignettes/BestPractices.rmd index 9c27f3f7c..b59ba6c5e 100644 --- a/vignettes/BestPractices.rmd +++ b/vignettes/BestPractices.rmd @@ -101,7 +101,7 @@ Data Creation Impact of over/under-sampling -Study being developed +Paper under review @@ -125,7 +125,7 @@ Model development How much data do we need for prediction - Learning curves at scale -Preprint link +International Journal of Medical Informatics @@ -165,6 +165,18 @@ Study needs to be done + + +Model development + + +Can we use ensembles to combine different algorithm models within a database to improve models transportability? + + +Study Complete + + + Model development @@ -173,7 +185,7 @@ Model development Can we use ensembles to combine models developed using different databases to improve models transportability? - Paper under review at BMC + BMC Medical Informatics and Decision Making diff --git a/vignettes/ConstrainedPredictors.Rmd b/vignettes/ConstrainedPredictors.Rmd new file mode 100644 index 000000000..2776122cb --- /dev/null +++ b/vignettes/ConstrainedPredictors.Rmd @@ -0,0 +1,883 @@ +--- +title: "Constrained predictors" +author: "Jenna Reps" +date: '`r Sys.Date()`' +header-includes: + - \usepackage{fancyhdr} + - \pagestyle{fancy} + - \fancyhead{} + - \fancyhead[CO,CE]{Installation Guide} + - \fancyfoot[CO,CE]{PatientLevelPrediction Package Version `r utils::packageVersion("PatientLevelPrediction")`} + - \fancyfoot[LE,RO]{\thepage} + - \renewcommand{\headrulewidth}{0.4pt} + - \renewcommand{\footrulewidth}{0.4pt} +output: + pdf_document: + includes: + in_header: preamble.tex + number_sections: yes + toc: yes + word_document: + toc: yes + html_document: + number_sections: yes + toc: yes +--- + + +## Constrained Predictors + + +### How to use the PhenotypeLibrary R package +Here we provide a set of phenotypes that can be used as predictors in prediction models or best practice research. + +These phenotypes can be extracted from the PhenotypeLibrary R package. To install the R package run: + +```{r echo = T} +remotes::install_github('ohdsi/PhenotypeLibrary') +``` + + +To extract the cohort definition for Alcoholism with an id of 1165, just run: + +```{r echo = T} +PhenotypeLibrary::getPlCohortDefinitionSet(1165) +``` + +in general you can extract all the cohorts by running: + +```{r echo = T} +phenotypeDefinitions <- PhenotypeLibrary::getPlCohortDefinitionSet(1152:1215) +``` + +### The full set of predictor phenotypes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Phenotype Name + +Disorder classification + +OHDSI Phenotype library ID +
+Alcoholism + +Behavioral + +1165 +
+Smoking + +Behavioral + +1166 +
+Anemia + +Blood + +1188 +
+Osteoarthritis + +Bone + +1184 +
+Osteoporosis + +Bone + +1185 +
+Cancer + +Cancer + +1215 +
+Atrial fibrillation + +Cardiovascular + +1160 +
+Congestive heart failure + +Cardiovascular + +1154 +
+Coronary artery disease + +Cardiovascular + +1162 +
+Heart valve disorder + +Cardiovascular + +1172 +
+Hyperlipidemia + +Cardiovascular + +1170 +
+Hypertension + +Cardiovascular + +1198 +
+Angina + +Cardiovascular + +1159 +
+Skin Ulcer + +Debility + +1168 +
+Diabetes type 1 + +Endocrine + +1193 +
+Diabetes type 2 + +Endocrine + +1194 +
+Hypothyroidism + +Endocrine + +1171 +
+Obesity + +Endocrine + +1179 +
+Gastroesophageal reflux disease (GERD) + +GI + +1178 +
+Gastrointestinal (GI) bleed + +GI + +1197 +
+Inflammatory bowel disorder (IBD) + +GI/Rheumatology + +1180 +
+Hormonal contraceptives + +Gynecologic + +1190 +
+Antibiotics Aminoglycosides + +Infection + +1201 +
+Antibiotics Carbapenems + +Infection + +1202 +
+Antibiotics Cephalosporins + +Infection + +1203 +
+Antibiotics Fluoroquinolones + +Infection + +1204 +
+Antibiotics Glycopeptides and lipoglycopeptides + +Infection + +1205 +
+Antibiotics Macrolides + +Infection + +1206 +
+Antibiotics Monobactams + +Infection + +1207 +
+Antibiotics Oxazolidinones + +Infection + +1208 +
+Antibiotics Penicillins + +Infection + +1209 +
+Antibiotics Polypeptides + +Infection + +1210 +
+Antibiotics Rifamycins + +Infection + +1211 +
+Antibiotics Sulfonamides + +Infection + +1212 +
+Antibiotics Streptogramins + +Infection + +1213 +
+Antibiotics Tetracyclines + +Infection + +1214 +
+Pneumonia + +Infection/Respiratory + +1199 +
+Sepsis + +Infection + +1176 +
+Urinary tract infection (UTI) + +Infection + +1186 +
+Hepatitis + +Liver + +1169 +
+Anxiety + +Mood + +1189 +
+Depression (MDD) + +Mood + +1161 +
+Psychotic disorder + +Mood + +1175 +
+Antiepileptics (pain) + +Neurology/Pain + +1183 +
+Seizure + +Neurology + +1153 +
+Hemorrhagic stroke + +Neurology/Vascular + +1156 +
+Non-hemorrhagic stroke + +Neurology/Vascular + +1155 +
+Acetaminophen prescription + +Pain/Infection + +1187 +
+Low back pain + +Pain + +1173 +
+Neuropathy + +Pain/Neurology + +1174 +
+Opioids + +Pain + +1182 +
+Acute kidney injury + +Kidney + +1163 +
+Chronic kidney disease + +Kidney + +1191 +
+Asthma + +Respiratory + +1164 +
+Chronic obstructive pulmonary disorder (COPD) + +Respiratory + +1192 +
+Dyspnea + +Respiratory + +1195 +
+Respiratory failure + +Respiratory + +1177 +
+Sleep apnea + +Respiratory + +1167 +
+Rheumatoid arthritis + +Rheumatology + +1200 +
+Steroids + +Rheumatology/Pain/Pulmonary + +1181 +
+Peripheral vascular disease + +Vascular + +1157 +
+Aspirin + +Vascular + +1158 +
+Deep vein thrombosis (DVT) + +Vascular + +1152 +
+Edema + +Vascular + +1196 +
+Inpatient visit + +NA + +NA +
+ \ No newline at end of file