diff --git a/.Rbuildignore b/.Rbuildignore
index 3fbf93c69..4fac5d027 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -10,5 +10,4 @@ compare_versions
 .github
 docs/*
 _pkgdown.yml
-
-
+^vignettes/articles$
diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml
index f7c4ea17e..8f940f131 100644
--- a/.github/workflows/R_CMD_check_Hades.yaml
+++ b/.github/workflows/R_CMD_check_Hades.yaml
@@ -87,13 +87,13 @@ jobs:
           path: check/*.tar.gz
           
       - name: Install covr
-        if: runner.os == 'Windows'
+        if: runner.os == 'Linux'
         run: |
           remotes::install_cran("covr")
         shell: Rscript {0}
-
+        
       - name: Test coverage
-        if: runner.os == 'Windows'
+        if: runner.os == 'Linux'
         run: covr::codecov()
         shell: Rscript {0}
 
diff --git a/.gitignore b/.gitignore
index c7ce48620..41469fa63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,8 @@
 *-Ex.R
 # R data files from past sessions
 .Rdata
+# R environ
+.Renviron
 # RStudio files
 .Rproj.user/
 .Rproj.user
@@ -20,4 +22,4 @@ standalone/build/*
 /plpmodels/*
 /python_models/*
 /mycache/*
-/inst/shiny/DiagnosticsExplorer/rsconnect/*
\ No newline at end of file
+/inst/shiny/DiagnosticsExplorer/rsconnect/*
diff --git a/DESCRIPTION b/DESCRIPTION
index dde325a53..951f8e056 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: PatientLevelPrediction
 Type: Package
 Title: Developing patient level prediction using data in the OMOP Common Data
     Model
-Version: 6.3.6
-Date: 2023-10-09
+Version: 6.3.7
+Date: 2024-04-04
 Authors@R: c(
     person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut", "cre")),
     person("Martijn", "Schuemie", role = c("aut")),
@@ -43,7 +43,6 @@ Imports:
     rlang,
     SqlRender (>= 1.1.3),
     survival,
-    tibble,
     tidyr,
     utils
 Suggests:
@@ -74,9 +73,7 @@ Remotes:
     ohdsi/BigKnn,
     ohdsi/Eunomia,
     ohdsi/FeatureExtraction,
-    ohdsi/IterativeHardThresholding,
-    ohdsi/ParallelLogger,
     ohdsi/ShinyAppBuilder,
-    ohdsi/ResultModelManager
-RoxygenNote: 7.2.3
+    ohdsi/ResultModelManager,
+RoxygenNote: 7.3.1
 Encoding: UTF-8
diff --git a/NEWS.md b/NEWS.md
index ea37b5ad1..f81eb0a3d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,17 @@
+PatientLevelPrediction 6.3.7
+======================
+- Clean up dependencies, tibble removed and IHT and ParallelLogger from CRAN
+- Use cohortIds for cohortCovariates to comply with FeatureExtraction
+- Add cdmDatabaseName from DatabaseDetails to model output
+- Fix bug when attributes weren't preserved on trainData$covariateData after split
+- Fix warnings in tests and speed them up
+- Fix bug in assignment operator in configurePython
+- Delay evaluation of plpData when using do.call like in learningCurves and 
+runMultiplePlp
+- Speed up population generation when subjectId's are distinct
+- Fix bug when population was still generated when provided to runPlp
+
+
 PatientLevelPrediction 6.3.6
 ======================
 - fix bug with ohdsi shiny modules version check (issue 415)
diff --git a/R/AdditionalCovariates.R b/R/AdditionalCovariates.R
index 790fe9df4..77204d1de 100644
--- a/R/AdditionalCovariates.R
+++ b/R/AdditionalCovariates.R
@@ -30,7 +30,7 @@
 #' @param cohortTable  the table name that contains the target population cohort
 #' @param rowIdField  string representing the unique identifier in the target population cohort
 #' @param aggregated  whether the covariate should be aggregated
-#' @param cohortId  cohort id for the target cohort
+#' @param cohortIds  cohort id for the target cohort
 #' @param covariateSettings  settings for the covariate cohorts and time periods
 #'
 #' @return
@@ -45,7 +45,7 @@ getCohortCovariateData <- function(
   cohortTable = "#cohort_person",
   rowIdField = "row_id",
   aggregated,
-  cohortId,
+  cohortIds,
   covariateSettings
   ){
   
@@ -69,7 +69,7 @@ getCohortCovariateData <- function(
     sql,
     covariate_cohort_schema = covariateSettings$cohortDatabaseSchema,
     covariate_cohort_table = covariateSettings$cohortTable,
-    covariate_cohort_id = covariateSettings$cohortId,
+    covariate_cohort_id = covariateSettings$cohortIds,
     cohort_temp_table = cohortTable,
     row_id_field = rowIdField,
     startDay = covariateSettings$startDay,
@@ -191,7 +191,7 @@ createCohortCovariateSettings <- function(
     covariateId = cohortId*100000+settingId*1000+analysisId,
     cohortDatabaseSchema = cohortDatabaseSchema,
     cohortTable = cohortTable,
-    cohortId = cohortId,
+    cohortIds = cohortId,
     startDay = startDay,
     endDays = endDay,
     count = count,
diff --git a/R/AndromedaHelperFunctions.R b/R/AndromedaHelperFunctions.R
index bf70bdb06..b6182610c 100644
--- a/R/AndromedaHelperFunctions.R
+++ b/R/AndromedaHelperFunctions.R
@@ -97,7 +97,7 @@ calculatePrevs <- function(plpData, population){
   #===========================
   
   # add population to sqllite
-  population <- tibble::as_tibble(population)
+  population <- dplyr::as_tibble(population)
   plpData$covariateData$population <- population %>% 
     dplyr::select("rowId", "outcomeCount")
   
diff --git a/R/CyclopsModels.R b/R/CyclopsModels.R
index 83c61f098..16f035164 100644
--- a/R/CyclopsModels.R
+++ b/R/CyclopsModels.R
@@ -211,7 +211,8 @@ fitCyclopsModel <- function(
     trainDetails = list(
       analysisId = analysisId, 
       analysisSource = '', #TODO add from model
-      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
+      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
+      developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema, 
       attrition = attr(trainData, "metaData")$attrition, 
       trainingTime =  paste(as.character(abs(comp)), attr(comp,'units')),
       trainingDate = Sys.Date(),
diff --git a/R/DataSplitting.R b/R/DataSplitting.R
index 35519801d..195e2680c 100644
--- a/R/DataSplitting.R
+++ b/R/DataSplitting.R
@@ -148,6 +148,7 @@ splitData <- function(plpData = plpData,
       outcomeId = attr(population, "metaData")$outcomeId,
       targetId = attr(population, "metaData")$targetId,
       cdmDatabaseSchema = plpData$metaData$databaseDetails$cdmDatabaseSchema,
+      cdmDatabaseName = plpData$metaData$databaseDetails$cdmDatabaseName,
       cdmDatabaseId = plpData$metaData$databaseDetails$cdmDatabaseId,
       restrictPlpDataSettings = attr(population, "metaData")$restrictPlpDataSettings,
       covariateSettings = plpData$metaData$covariateSettings,
@@ -186,6 +187,7 @@ splitData <- function(plpData = plpData,
       outcomeId = attr(population, "metaData")$outcomeId,
       targetId = attr(population, "metaData")$targetId,
       cdmDatabaseSchema = plpData$metaData$databaseDetails$cdmDatabaseSchema,
+      cdmDatabaseName = plpData$metaData$databaseDetails$cdmDatabaseName,
       cdmDatabaseId = plpData$metaData$databaseDetails$cdmDatabaseId,
       restrictPlpDataSettings = attr(population, "metaData")$restrictPlpDataSettings,
       covariateSettings = plpData$metaData$covariateSettings,
@@ -195,10 +197,6 @@ splitData <- function(plpData = plpData,
       populationSize = nrow(trainData$labels)
       )
     
-    # add pop size to covariateData as used in tidyCovariates
-    attr(trainData$covariateData, "metaData") <- list(populationSize = nrow(trainData$labels))
-    class(trainData$covariateData) <- "CovariateData"
-    
     testId <- splitId[splitId$index<0,]
     testData <- list()
     class(testData) <- 'plpData'
@@ -214,7 +212,6 @@ splitData <- function(plpData = plpData,
         data.frame(rowId = testId$rowId), 
         sizeN = 10000000)
     }
-    class(testData$covariateData) <- "CovariateData"
     
     result <- list(
       Train =  trainData,
diff --git a/R/DemographicSummary.R b/R/DemographicSummary.R
index 7157690a7..b3df14305 100644
--- a/R/DemographicSummary.R
+++ b/R/DemographicSummary.R
@@ -111,30 +111,30 @@ getDemographicSummary_survival <- function(prediction, evalColumn, timepoint = N
         tempDemo <- demographicSum %>% 
           dplyr::filter( .data$genGroup == gen & .data$ageGroup == age )
         
-        if(nrow(tempDemo)>0){
-          t1 <- tempDemo %>% dplyr::select("t")
-          y1 <- tempDemo %>% dplyr::select("y")
-          p1 <- tempDemo %>% dplyr::select("value")
+        if (nrow(tempDemo) > 1 & length(unique(tempDemo$y)) > 1) {
+          t <- tempDemo$t
+          y <- tempDemo$y
+          value <- tempDemo$value
           
           out <- tryCatch(
             {
               summary(
-                survival::survfit(survival::Surv(t1$t, y1$y) ~ 1), 
+                survival::survfit(survival::Surv(t, y) ~ 1), 
                 times = timepoint
               )
             },
-            error = function(e){ParallelLogger::logError(e); return(NULL)}
+            error = function(e){ParallelLogger::logError(e);return(NULL)}
           )
           
           if(!is.null(out)){
             demoTemp <- c(
               genGroup = gen, 
               ageGroup = age, 
-              PersonCountAtRisk = length(p1$value),
-              PersonCountWithOutcome = round(length(p1$value)*(1-out$surv)),
+              PersonCountAtRisk = length(value),
+              PersonCountWithOutcome = round(length(value)*(1-out$surv)),
               observedRisk = 1-out$surv, 
-              averagePredictedProbability = mean(p1$value, na.rm = T),
-              StDevPredictedProbability = stats::sd(p1$value, na.rm = T)
+              averagePredictedProbability = mean(value, na.rm = T),
+              StDevPredictedProbability = stats::sd(value, na.rm = T)
               )
             
             demographicData <- rbind(demographicData, demoTemp)
diff --git a/R/EvaluationSummary.R b/R/EvaluationSummary.R
index 4e02d7cf4..a318cb923 100644
--- a/R/EvaluationSummary.R
+++ b/R/EvaluationSummary.R
@@ -35,7 +35,7 @@ getEvaluationStatistics_binary <- function(prediction, evalColumn, ...){
     
     # auc
     ParallelLogger::logInfo(paste0('Calculating Performance for ', evalType))
-    ParallelLogger::logInfo('=============')
+   ParallelLogger::logInfo('=============')
     
     ParallelLogger::logTrace('Calculating AUC')
     auc <- computeAuc(predictionOfInterest, confidenceInterval = T)
diff --git a/R/FeatureImportance.R b/R/FeatureImportance.R
index f47f4de25..6c73b1223 100644
--- a/R/FeatureImportance.R
+++ b/R/FeatureImportance.R
@@ -105,23 +105,20 @@ pfi <- function(plpResult, population, plpData, repeats = 1,
     ParallelLogger::logInfo(paste0('Using all ', cores))
     ParallelLogger::logInfo(paste0('Set cores input to use fewer...'))
   }
-  
+  getVpiSettings <- function(i) {
+    result <- list(plpModel = plpResult$model, 
+                   population = population, 
+                   plpDataLocation = plpDataLocation,
+                   covariateId = covariates[i],
+                   repeats = repeats)
+    return(result)
+  }
+  if (cores > 1) {
   cluster <- ParallelLogger::makeCluster(numberOfThreads = cores)
   ParallelLogger::clusterRequire(cluster, c("PatientLevelPrediction", "Andromeda"))
   
-  
-  getVpiSettings <- function(i){
-    result <-list(plpModel = plpResult$model, 
-                  population = population, 
-                  plpDataLocation = plpDataLocation,
-                  covariateId = covariates[i],
-                  repeats = repeats)
-    return(result)
-  }
   vpiSettings <- lapply(1:length(covariates), getVpiSettings)
 
-  
-  #lapply(vpiSettings, function(x) do.call(permutePerf, x))
   aucP <- ParallelLogger::clusterApply(cluster = cluster, 
                                                 x = vpiSettings, 
                                                 fun = permutePerf, 
@@ -129,13 +126,15 @@ pfi <- function(plpResult, population, plpData, repeats = 1,
                                                 progressBar = TRUE)
   ParallelLogger::stopCluster(cluster)
   
+  } else {
+    ParallelLogger::logInfo("Running in serial")
+    aucP <- lapply(1:length(covariates), function(i) {
+      permutePerf(getVpiSettings(i))
+    })
+  }
   aucP <- do.call(c, aucP)
-  
-  # do this in parellel
-
   varImp <- data.frame(covariateId = covariates,
-                       pfi = auc-aucP)
-  
+                       pfi = auc - aucP)
   return(varImp)
   
 }
@@ -200,7 +199,7 @@ permute <- function(plpDataLocation,cId,population){
   
     # find a new random selection of people and give them the covariate and value
     newPlp <- sample(population$rowId,nSamp)
-    newData <- tibble::as_tibble(cbind(rowId = newPlp,coi[,-1]))
+    newData <- dplyr::as_tibble(cbind(rowId = newPlp,coi[,-1]))
     
     # swap old covariate data with new
     plpData$covariateData$covariates <- plpData$covariateData$covariates %>% dplyr::filter(.data$covariateId != !!cId) %>% dplyr::collect()
@@ -215,7 +214,7 @@ permute <- function(plpDataLocation,cId,population){
     
     # sample the pop to replace 
     swapPlp <- sample(population$rowId,nSamp)
-    haveCidDataSwapped <- tibble::as_tibble(cbind(rowId = swapPlp,haveCidData[,-1]))
+    haveCidDataSwapped <- dplyr::as_tibble(cbind(rowId = swapPlp,haveCidData[,-1]))
     
     # find the swapped people to switch 
     connectedCovs <- plpData$covariateData$covariateRef %>% 
@@ -228,7 +227,7 @@ permute <- function(plpDataLocation,cId,population){
       dplyr::filter(.data$rowId %in% swapPlp) %>% 
       dplyr::collect()
     
-    swappedForCid <- tibble::as_tibble(cbind(rowId = haveCidData$rowId[1:nrow(plpToSwap)],plpToSwap[,-1]))
+    swappedForCid <- dplyr::as_tibble(cbind(rowId = haveCidData$rowId[1:nrow(plpToSwap)],plpToSwap[,-1]))
     
 
     # swap old covariate data with new
diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
index bf5f71b1b..c6fd07d13 100644
--- a/R/HelperFunctions.R
+++ b/R/HelperFunctions.R
@@ -1,6 +1,8 @@
 # fix issue with nrow - temp fix for me locally
 nrow <- function(x){UseMethod("nrow",x)}
+#' @exportS3Method NULL
 nrow.default <- base::nrow
+#' @exportS3Method NULL
 nrow.tbl <- function(x){x %>% dplyr::tally() %>% dplyr::pull()}
 
 
@@ -101,9 +103,9 @@ configurePython <- function(envname='PLP', envtype=NULL, condaPythonVersion="3.1
   
   if(is.null(envtype)){
     if(getOs()=='windows'){
-      envtype=='conda'
+      envtype <- "conda"
     } else {
-      envtype=='python'
+      envtype <- "python"
     }
   }
   
diff --git a/R/KNN.R b/R/KNN.R
index 54ef1528d..834b26eaa 100644
--- a/R/KNN.R
+++ b/R/KNN.R
@@ -145,7 +145,8 @@ fitKNN <- function(trainData, modelSettings, search = 'none', analysisId, ...){
     
     trainDetails = list(
       analysisId = analysisId,
-      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
+      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
+      developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema, 
       attrition = attr(trainData, "metaData")$attrition, 
       trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
       trainingDate = Sys.Date(),
diff --git a/R/LearningCurve.R b/R/LearningCurve.R
index 21c89d5d3..9b3ea9498 100644
--- a/R/LearningCurve.R
+++ b/R/LearningCurve.R
@@ -188,7 +188,7 @@ createLearningCurve <- function(
     nRuns <- length(trainFractions)
     
     settings = list(
-      plpData = plpData,
+      plpData = quote(plpData),
       outcomeId = outcomeId,
       analysisId = analysisId,
       populationSettings = populationSettings,
@@ -238,7 +238,7 @@ createLearningCurve <- function(
 
 lcWrapper <- function(settings){
   plpData <- PatientLevelPrediction::loadPlpData(settings$plpData)
-  settings$plpData <- plpData
+  settings$plpData <- quote(plpData)
   result <- tryCatch({do.call(runPlp, settings)},
                      warning = function(war) {
                        ParallelLogger::logInfo(paste0('a warning: ', war))
@@ -470,8 +470,8 @@ plotLearningCurve <- function(learningCurve,
   
   # create plot object
   plot <- tidyLearningCurve %>%
-    ggplot2::ggplot(ggplot2::aes_string(x = abscissa, y= 'value',
-      col = "Dataset")) +
+    ggplot2::ggplot(ggplot2::aes(x = .data[[abscissa]], y = .data[['value']],
+      col = .data[["Dataset"]])) +
     ggplot2::geom_line() +
     ggplot2::coord_cartesian(ylim = yAxisRange, expand = FALSE) +
     ggplot2::labs(title = plotTitle, subtitle = plotSubtitle, 
diff --git a/R/PatientLevelPrediction.R b/R/PatientLevelPrediction.R
index b4e2a82b5..848c42705 100644
--- a/R/PatientLevelPrediction.R
+++ b/R/PatientLevelPrediction.R
@@ -20,12 +20,11 @@
 #' 
 #' @description A package for running predictions using data in the OMOP CDM
 #'
-#' @docType package
 #' @name PatientLevelPrediction
 #' @keywords internal
 #' @importFrom dplyr %>%
 #' @importFrom rlang .data
-NULL
+"_PACKAGE"
 
 #' A simulation profile
 #' @docType data
diff --git a/R/Plotting.R b/R/Plotting.R
index cfa5026d8..f2fe6bf0d 100644
--- a/R/Plotting.R
+++ b/R/Plotting.R
@@ -293,7 +293,7 @@ plotSparseRoc <- function(
   plots <- list()
   length(plots) <- length(evalTypes)
   
-  for(i in 1:length(evalTypes)){
+  for (i in 1:length(evalTypes)){
     evalType <- evalTypes[i]
     x <- plpResult$performanceEvaluation$thresholdSummary %>% 
       dplyr::filter(.data[[typeColumn]] == evalType) %>% 
@@ -316,14 +316,14 @@ plotSparseRoc <- function(
         )
     ) +
       ggplot2::geom_polygon(fill = "blue", alpha = 0.2) +
-      ggplot2::geom_line(size=1) +
+      ggplot2::geom_line(linewidth = 1) +
       ggplot2::geom_abline(intercept = 0, slope = 1,linetype = 2) +
-      ggplot2::scale_x_continuous("1 - specificity", limits=c(0,1)) +
-      ggplot2::scale_y_continuous("Sensitivity", limits=c(0,1)) + 
+      ggplot2::scale_x_continuous("1 - specificity", limits = c(0,1)) +
+      ggplot2::scale_y_continuous("Sensitivity", limits = c(0,1)) + 
       ggplot2::ggtitle(evalType)
   }
   
-  plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
+  plot <- gridExtra::marrangeGrob(plots, nrow =length(plots), ncol = 1)
   
   if (!is.null(saveLocation)){
     if(!dir.exists(saveLocation)){
@@ -567,11 +567,11 @@ plotPrecisionRecall <- function(
       dplyr::select("positivePredictiveValue", "sensitivity")
     
     plots[[i]] <- ggplot2::ggplot(x, ggplot2::aes(.data$sensitivity, .data$positivePredictiveValue)) +
-      ggplot2::geom_line(size=1) +
+      ggplot2::geom_line(linewidth=1) +
       ggplot2::scale_x_continuous("Recall")+#, limits=c(0,1)) +
       ggplot2::scale_y_continuous("Precision") + #, limits=c(0,1))
       ggplot2::geom_hline(yintercept = inc, linetype="dashed", 
-        color = "red", size=1)  +
+        color = "red", linewidth = 1)  +
       ggplot2::ggtitle(evalType)
   }
   
@@ -630,8 +630,8 @@ plotF1Measure <- function(
   }
   
   plots[[i]] <- ggplot2::ggplot(x, ggplot2::aes(.data$predictionThreshold, .data$f1Score)) +
-    ggplot2::geom_line(size=1) +
-    ggplot2::geom_point(size=1) +
+    ggplot2::geom_line(linewidth = 1) +
+    ggplot2::geom_point(size = 1) +
     ggplot2::scale_x_continuous("predictionThreshold")+#, limits=c(0,1)) +
     ggplot2::scale_y_continuous("F1Score") +#, limits=c(0,1))
     ggplot2::ggtitle(evalType)
@@ -844,9 +844,9 @@ plotSparseCalibration <- function(
                                        )) +
     ggplot2::geom_ribbon(ggplot2::aes(ymin=.data$lci,ymax=.data$uci, x=x), 
                          fill="blue", alpha=0.2) +
-    ggplot2::geom_point(size=1, color='darkblue') +
+    ggplot2::geom_point(size = 1, color='darkblue') +
     ggplot2::coord_cartesian(ylim = c(0, maxVal), xlim =c(0,maxVal)) +
-    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 2, size=1,
+    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 2, linewidth = 1,
                          show.legend = TRUE) +
     ggplot2::geom_abline(intercept = res['Intercept'], slope = res['Gradient'],
                          linetype = 1,show.legend = TRUE,
@@ -919,7 +919,7 @@ plotSparseCalibration2 <- function(
     ggplot2::geom_point(size=2, color='black') +
     ggplot2::geom_errorbar(limits) +
     ggplot2::geom_line(colour='darkgrey') +
-    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 5, size=0.4,
+    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 5, linewidth=0.4,
                          show.legend = TRUE) +
     ggplot2::scale_x_continuous("Average Predicted Probability") +
     ggplot2::scale_y_continuous("Observed Fraction With Outcome") +
@@ -1248,15 +1248,14 @@ plotSmoothCalibrationLoess <- function(data, span = 0.75) {
     fill = "blue",
     alpha = 0.2
   ) +
-  ggplot2::geom_segment(
-    ggplot2::aes(
-      x = 0,
-      xend = 1,
-      y = 0,
-      yend = 1,
-      color = "Ideal",
-      linetype = "Ideal"
-    )
+  ggplot2::annotate(
+    geom = "segment",
+    x = 0,
+    xend = 1,
+    y = 0,
+    yend = 1,
+    color = "red",
+    linetype = "dashed"
   ) +
   ggplot2::scale_linetype_manual(
     name = "Models",
@@ -1442,43 +1441,44 @@ plotPredictionDistribution <- function(
   plots <- list()
   length(plots) <- length(evalTypes)
   
-  for(i in 1:length(evalTypes)){
+  for (i in 1:length(evalTypes)) {
     evalType <- evalTypes[i]
     x <- plpResult$performanceEvaluation$predictionDistribution %>% 
       dplyr::filter(.data[[typeColumn]] == evalType) 
   
-  non05 <- x$P05PredictedProbability[x$class==0]
-  non95 <- x$P95PredictedProbability[x$class==0]
-  one05 <- x$P05PredictedProbability[x$class==1]
-  one95 <- x$P95PredictedProbability[x$class==1]
-  
-  plots[[i]] <-   ggplot2::ggplot(x, ggplot2::aes(x=as.factor(.data$class),
-                                            ymin=.data$MinPredictedProbability,
-                                            lower=.data$P25PredictedProbability,
-                                            middle=.data$MedianPredictedProbability,
-                                            upper=.data$P75PredictedProbability, 
-                                            ymax=.data$MaxPredictedProbability, 
-                                            color=as.factor(.data$class))) + 
+  non05 <- x$P05PredictedProbability[x$class == 0]
+  non95 <- x$P95PredictedProbability[x$class == 0]
+  one05 <- x$P05PredictedProbability[x$class == 1]
+  one95 <- x$P95PredictedProbability[x$class == 1]
+  
+  plots[[i]] <- ggplot2::ggplot(x,
+    ggplot2::aes(x = as.factor(class),
+                 ymin = .data$MinPredictedProbability,
+                 lower = .data$P25PredictedProbability,
+                 middle = .data$MedianPredictedProbability,
+                 upper = .data$P75PredictedProbability, 
+                 ymax = .data$MaxPredictedProbability, 
+                 color = as.factor(.data$class))) + 
     ggplot2::coord_flip() +
-    ggplot2::geom_boxplot(stat="identity")  +
+    ggplot2::geom_boxplot(stat = "identity")  +
     ggplot2::scale_x_discrete("Class") + 
     ggplot2::scale_y_continuous("Predicted Probability") + 
-    ggplot2::theme(legend.position="none") +
-    ggplot2::geom_segment(ggplot2::aes(x = 0.9, y = non05, 
-                     xend = 1.1, yend = non05), color='red') +
-    ggplot2::geom_segment(ggplot2::aes(x = 0.9, y = non95, 
-                                       xend = 1.1, yend = non95), color='red') +
-  ggplot2::geom_segment(ggplot2::aes(x = 1.9, y = one05, 
-                                     xend = 2.1, yend = one05)) +
-    ggplot2::geom_segment(ggplot2::aes(x = 1.9, y = one95, 
-                                       xend = 2.1, yend = one95)) +
-    ggplot2::ggtitle(evalType)
+    ggplot2::theme(legend.position = "none") +
+    ggplot2::annotate("segment", x = 0.9, xend = 1.1, y = non05, yend = non05,
+                      color = "red") +
+    ggplot2::annotate("segment", x = 0.9, xend = 1.1, y = non95, yend = non95,
+                      color = "red") +
+    ggplot2::annotate("segment", x = 1.9, xend = 2.1, y = one05, yend = one05,
+                      color = "#00BFC4") +
+    ggplot2::annotate("segment", x = 1.9, xend = 2.1, y = one95, yend = one95,
+                      color = "#00BFC4") +
+     ggplot2::ggtitle(evalType)
   }
   
-  plot <- gridExtra::marrangeGrob(plots, nrow=length(plots), ncol=1)
+  plot <- gridExtra::marrangeGrob(plots, nrow = length(plots), ncol = 1)
   
-  if (!is.null(saveLocation)){
-    if(!dir.exists(saveLocation)){
+  if (!is.null(saveLocation)) {
+    if (!dir.exists(saveLocation)) {
       dir.create(saveLocation, recursive = T)
     }
     ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
diff --git a/R/PopulationSettings.R b/R/PopulationSettings.R
index e35df3b67..098a784f7 100644
--- a/R/PopulationSettings.R
+++ b/R/PopulationSettings.R
@@ -215,6 +215,9 @@ createStudyPopulation <- function(
 
   if (is.null(population)) {
     population <- plpData$cohorts
+  } else {
+    population <- plpData$cohorts %>% 
+      dplyr::filter(.data$rowId %in% (population %>% dplyr::pull(.data$rowId)))
   }
   
   # save the metadata (should have the ?targetId, outcomeId, plpDataSettings and population settings)
@@ -259,9 +262,9 @@ createStudyPopulation <- function(
     
   
   # get the outcomes during TAR
-  outcomeTAR <- population %>% 
-    dplyr::inner_join(plpData$outcomes, by ='rowId') %>% 
-    dplyr::filter(.data$outcomeId == get('oId'))  %>% 
+  outcomeTAR <- plpData$outcomes %>% 
+    dplyr::filter(.data$outcomeId == get("oId")) %>%
+    dplyr::inner_join(population, by = "rowId") %>%
     dplyr::select("rowId", "daysToEvent", "tarStart", "tarEnd") %>% 
     dplyr::filter(.data$daysToEvent >= .data$tarStart & .data$daysToEvent <= .data$tarEnd)  
   
@@ -294,10 +297,12 @@ createStudyPopulation <- function(
   if (firstExposureOnly) {
     ParallelLogger::logTrace(paste("Restricting to first exposure"))
     
-    population <- population %>%
-      dplyr::arrange(.data$subjectId,.data$cohortStartDate) %>%
-      dplyr::group_by(.data$subjectId) %>%
-      dplyr::filter(dplyr::row_number(.data$subjectId)==1)
+    if (nrow(population) > dplyr::n_distinct(population$subjectId)) {
+      population <- population %>%
+        dplyr::arrange(.data$subjectId,.data$cohortStartDate) %>%
+        dplyr::group_by(.data$subjectId) %>%
+        dplyr::filter(dplyr::row_number(.data$subjectId)==1)
+    }
     
     attrRow <- population %>% dplyr::group_by() %>%
       dplyr::summarise(outcomeId = get('oId'),
@@ -329,9 +334,9 @@ createStudyPopulation <- function(
       ParallelLogger::logTrace("Removing subjects with prior outcomes (if any)")
     
     # get the outcomes during TAR
-    outcomeBefore <- population %>% 
-      dplyr::inner_join(plpData$outcomes, by ='rowId') %>% 
+    outcomeBefore <- plpData$outcomes %>% 
       dplyr::filter(outcomeId == get('oId'))  %>% 
+      dplyr::inner_join(population, by = 'rowId') %>% 
       dplyr::select("rowId", "daysToEvent", "tarStart") %>% 
       dplyr::filter(.data$daysToEvent < .data$tarStart & .data$daysToEvent > -get('priorOutcomeLookback') ) 
     
diff --git a/R/Predict.R b/R/Predict.R
index e84d48eec..14eff8787 100644
--- a/R/Predict.R
+++ b/R/Predict.R
@@ -168,7 +168,7 @@ applyTidyCovariateData <- function(
   
   if(!is.null(maxs)){
     if('bins'%in%colnames(maxs)){
-      covariateData$maxes <- tibble::as_tibble(maxs)  %>% dplyr::rename(covariateId = "bins") %>% 
+      covariateData$maxes <- dplyr::as_tibble(maxs)  %>% dplyr::rename(covariateId = "bins") %>% 
         dplyr::rename(maxValue = "maxs")
     } else{
       covariateData$maxes <- maxs 
diff --git a/R/RClassifier.R b/R/RClassifier.R
index 69b575193..a74e2358b 100644
--- a/R/RClassifier.R
+++ b/R/RClassifier.R
@@ -83,7 +83,8 @@ fitRclassifier <- function(
     trainDetails = list(
       analysisId = analysisId,
       analysisSource = '', #TODO add from model
-      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
+      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
+      developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema, 
       attrition = attr(trainData, "metaData")$attrition, 
       trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
       trainingDate = Sys.Date(),
diff --git a/R/RunMultiplePlp.R b/R/RunMultiplePlp.R
index eff802409..9e0e6e5aa 100644
--- a/R/RunMultiplePlp.R
+++ b/R/RunMultiplePlp.R
@@ -175,7 +175,7 @@ runMultiplePlp <- function(
         if(!analysisExists){
           plpData <- PatientLevelPrediction::loadPlpData(file.path(saveDirectory, settings$dataLocation))
           runPlpSettings <- list(
-            plpData = plpData,
+            plpData = quote(plpData),
             outcomeId = modelDesign$outcomeId,
             analysisId = settings$analysisId,
             populationSettings = modelDesign$populationSettings,
diff --git a/R/RunPlp.R b/R/RunPlp.R
index a711101e6..3d94c4725 100644
--- a/R/RunPlp.R
+++ b/R/RunPlp.R
@@ -268,21 +268,23 @@ runPlp <- function(
   })
   
   # create the population
-  population <- tryCatch(
-    {
-      do.call(
-        createStudyPopulation, 
-        list(
-          plpData = plpData,
-          outcomeId = outcomeId,
-          populationSettings = populationSettings, 
-          population = plpData$population
-        )
-      )
-    },
+  if(!is.null(plpData$population)) {
+    ParallelLogger::logInfo('Using existing population')
+    population <- plpData$population
+  } else {
+    ParallelLogger::logInfo('Creating population')
+    population <- tryCatch({
+      do.call(createStudyPopulation,
+              list(plpData = plpData,
+                   outcomeId = outcomeId,
+                   populationSettings = populationSettings,
+                   population = plpData$population
+                   )
+      )},
     error = function(e){ParallelLogger::logError(e); return(NULL)}
-  )
-  
+    )
+  }
+    
   if(is.null(population)){
     stop('population NULL')
   }
diff --git a/R/Simulation.R b/R/Simulation.R
index ae9c8e34a..b1d115c14 100644
--- a/R/Simulation.R
+++ b/R/Simulation.R
@@ -162,7 +162,8 @@ simulatePlpData <- function(plpDataSimulationProfile, n = 10000) {
   metaData = list()
   
   metaData$databaseDetails <- list(
-    cdmDatabaseSchema = 'Profile',
+    cdmDatabaseSchema = 'CDM_SCHEMA',
+    cdmDatabaseName = "CDM_NAME",
     outcomeDatabaseSchema = NULL,
     cohortDatabaseSchema = NULL,
     connectionDetails = NULL,
diff --git a/R/SklearnClassifier.R b/R/SklearnClassifier.R
index 4e386b705..8d4a2603c 100644
--- a/R/SklearnClassifier.R
+++ b/R/SklearnClassifier.R
@@ -115,7 +115,8 @@ fitSklearn <- function(
     trainDetails = list(
       analysisId = analysisId,
       analysisSource = '', #TODO add from model
-      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseSchema,
+      developmentDatabase = attr(trainData, "metaData")$cdmDatabaseName,
+      developmentDatabaseSchema = attr(trainData, "metaData")$cdmDatabaseSchema,
       attrition = attr(trainData, "metaData")$attrition, 
       trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
       trainingDate = Sys.Date(),
diff --git a/R/ThresholdSummary.R b/R/ThresholdSummary.R
index b35b0e152..b36d86014 100644
--- a/R/ThresholdSummary.R
+++ b/R/ThresholdSummary.R
@@ -205,17 +205,19 @@ getThresholdSummary_survival <- function(prediction, evalColumn, timepoint, ...)
     )
     
     nbSummary <- tryCatch(
-      {
+      { xstart <- max(min(preddat$p),0.001); 
+        xstop <- min(max(preddat$p),0.99);
         stdca(
           data = preddat, 
           outcome = "y", 
           ttoutcome = "t", 
           timepoint = timepoint,  
           predictors = "p", 
-          xstart = max(min(preddat$p),0.001), #0.001, 
-          xstop = min(max(preddat$p),0.99), 
-          xby = 0.001, 
-          smooth=F
+          xstart = xstart, 
+          xstop = xstop, 
+          xby = (xstop - xstart)/100,
+          smooth = FALSE,
+          graph = FALSE
         )
       },
       error = function(e){ParallelLogger::logError(e); return(NULL)}
diff --git a/inst/doc/BuildingDeepLearningModels.pdf b/inst/doc/BuildingDeepLearningModels.pdf
deleted file mode 100644
index a496daccb..000000000
Binary files a/inst/doc/BuildingDeepLearningModels.pdf and /dev/null differ
diff --git a/inst/doc/BuildingEnsembleModels.pdf b/inst/doc/BuildingEnsembleModels.pdf
deleted file mode 100644
index 37e714ebf..000000000
Binary files a/inst/doc/BuildingEnsembleModels.pdf and /dev/null differ
diff --git a/man/PatientLevelPrediction.Rd b/man/PatientLevelPrediction.Rd
index c3d7c5c47..8bc15fc71 100644
--- a/man/PatientLevelPrediction.Rd
+++ b/man/PatientLevelPrediction.Rd
@@ -2,9 +2,32 @@
 % Please edit documentation in R/PatientLevelPrediction.R
 \docType{package}
 \name{PatientLevelPrediction}
+\alias{PatientLevelPrediction-package}
 \alias{PatientLevelPrediction}
 \title{PatientLevelPrediction}
 \description{
 A package for running predictions using data in the OMOP CDM
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://ohdsi.github.io/PatientLevelPrediction}
+  \item \url{https://github.com/OHDSI/PatientLevelPrediction}
+  \item Report bugs at \url{https://github.com/OHDSI/PatientLevelPrediction/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Jenna Reps \email{jreps@its.jnj.com}
+
+Authors:
+\itemize{
+  \item Martijn Schuemie
+  \item Marc Suchard
+  \item Patrick Ryan
+  \item Peter Rijnbeek
+  \item Egill Fridgeirsson
+}
+
 }
 \keyword{internal}
diff --git a/man/getCohortCovariateData.Rd b/man/getCohortCovariateData.Rd
index ff152f84b..d5333d646 100644
--- a/man/getCohortCovariateData.Rd
+++ b/man/getCohortCovariateData.Rd
@@ -12,7 +12,7 @@ getCohortCovariateData(
   cohortTable = "#cohort_person",
   rowIdField = "row_id",
   aggregated,
-  cohortId,
+  cohortIds,
   covariateSettings
 )
 }
@@ -31,7 +31,7 @@ getCohortCovariateData(
 
 \item{aggregated}{whether the covariate should be aggregated}
 
-\item{cohortId}{cohort id for the target cohort}
+\item{cohortIds}{cohort id for the target cohort}
 
 \item{covariateSettings}{settings for the covariate cohorts and time periods}
 }
diff --git a/tests/testthat/helper-expectations.R b/tests/testthat/helper-expectations.R
index 5405c008d..25210fdf6 100644
--- a/tests/testthat/helper-expectations.R
+++ b/tests/testthat/helper-expectations.R
@@ -7,13 +7,14 @@ expect_correct_fitPlp <- function(plpModel, trainData) {
   expect_equal(NROW(trainData$labels)*multiplicativeFactor, NROW(plpModel$prediction))
   
   # predictions are all between 0 and 1
-  expect_true(all((plpModel$prediction$value>=0) & (plpModel$prediction$value<=1)))
+  expect_true(all((plpModel$prediction$value >= 0) & 
+                    (plpModel$prediction$value <= 1)))
 
   # model directory exists
   expect_true(dir.exists(plpModel$model))
   
-  expect_equal(plpModel$modelDesign$outcomeId,2)
-  expect_equal(plpModel$modelDesign$targetId,1)
+  expect_equal(plpModel$modelDesign$outcomeId, outcomeId)
+  expect_equal(plpModel$modelDesign$targetId, 1)
   
   # structure of plpModel is correct
   expect_equal(names(plpModel), c("model", "preprocessing", "prediction",
@@ -23,8 +24,6 @@ expect_correct_fitPlp <- function(plpModel, trainData) {
 expect_correct_predictions <- function(predictions, testData) {
   
   # predictions are all between 0 and 1
-  expect_true(all((predictions$value>=0) & (predictions$value<=1)))
+  expect_true(all((predictions$value >= 0) & (predictions$value <= 1)))
   expect_equal(NROW(testData$labels), NROW(predictions))
-  
-  
 }
diff --git a/tests/testthat/helper-functions.R b/tests/testthat/helper-functions.R
index edcbffb8a..3a44091e8 100644
--- a/tests/testthat/helper-functions.R
+++ b/tests/testthat/helper-functions.R
@@ -12,11 +12,12 @@ copyTrainData <- function(trainData) {
 }
 
 # create tiny dataset with subset of covariates based on lasso fit
-createTinyPlpData <- function(plpData, plpResult) {
+createTinyPlpData <- function(plpData, plpResult, n= 20) {
   
   covariates <- plpResult$model$covariateImportance %>% 
-    dplyr::slice_max(order_by = abs(covariateValue),n = 20, with_ties=F) %>% 
-    dplyr::pull(covariateId)
+    dplyr::slice_max(order_by = abs(.data$covariateValue),
+                     n = n, with_ties = F) %>% 
+    dplyr::pull(.data$covariateId)
   tinyPlpData <- plpData
   tinyPlpData$covariateData <- Andromeda::copyAndromeda(plpData$covariateData)
   
diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R
index 29cf1b5c7..d3bd579cb 100644
--- a/tests/testthat/helper-objects.R
+++ b/tests/testthat/helper-objects.R
@@ -1,6 +1,6 @@
 
 # this files contains the objects used in the tests:
-if(Sys.getenv('GITHUB_ACTIONS') == 'true'){
+if (Sys.getenv('GITHUB_ACTIONS') == 'true') {
   # Download the PostreSQL driver ---------------------------
   # If DATABASECONNECTOR_JAR_FOLDER exists, assume driver has been downloaded
   jarFolder <- Sys.getenv("DATABASECONNECTOR_JAR_FOLDER", unset = "")
@@ -21,7 +21,7 @@ if(Sys.getenv('GITHUB_ACTIONS') == 'true'){
   PatientLevelPrediction::setPythonEnvironment(envname = 'r-reticulate', envtype = "conda")
   
   # if mac install nomkl -- trying to fix github actions
-  if(ifelse(is.null(Sys.info()), F, Sys.info()['sysname'] == 'Darwin')){
+  if (ifelse(is.null(Sys.info()), F, Sys.info()['sysname'] == 'Darwin')){
     reticulate::conda_install(envname = 'r-reticulate', packages = c('nomkl'), 
                               forge = TRUE, pip = FALSE, pip_ignore_installed = TRUE, 
                               conda = "auto")
@@ -36,12 +36,34 @@ dir.create(saveLoc)
 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 # simulated data Tests
 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
-data(plpDataSimulationProfile, envir = environment())
+data("plpDataSimulationProfile")
 
 # PLPDATA
-sampleSize <- 1500+sample(300,1)
-plpData <- simulatePlpData(plpDataSimulationProfile, n = sampleSize)
+connectionDetails <- Eunomia::getEunomiaConnectionDetails()
+Eunomia::createCohorts(connectionDetails)
+outcomeId <- 3 # GIbleed
+
+databaseDetails <- createDatabaseDetails(
+  connectionDetails = connectionDetails, 
+  cdmDatabaseSchema = "main", 
+  cdmDatabaseName = "main",
+  cohortDatabaseSchema = "main", 
+  cohortTable = "cohort", 
+  outcomeDatabaseSchema = "main", 
+  outcomeTable =  "cohort",
+  targetId = 1, 
+  outcomeIds = outcomeId,
+  cdmVersion = 5)
+
+covariateSettings <- FeatureExtraction::createCovariateSettings(
+  useDemographicsAge = TRUE,
+  useDemographicsGender = TRUE,
+  useConditionOccurrenceAnyTimePrior = TRUE
+)
+
+plpData <- getPlpData(databaseDetails = databaseDetails,
+                      covariateSettings = covariateSettings,
+                      restrictPlpDataSettings = createRestrictPlpDataSettings()) 
 
 # POPULATION
 populationSettings <- createStudyPopulationSettings(
@@ -50,7 +72,7 @@ populationSettings <- createStudyPopulationSettings(
   removeSubjectsWithPriorOutcome = FALSE,
   priorOutcomeLookback = 99999,
   requireTimeAtRisk = T,
-  minTimeAtRisk=10,
+  minTimeAtRisk = 10,
   riskWindowStart = 0,
   startAnchor = 'cohort start',
   riskWindowEnd = 365,
@@ -59,12 +81,12 @@ populationSettings <- createStudyPopulationSettings(
 
 
 # MODEL SETTINGS
-lrSet <- setLassoLogisticRegression()
+lrSet <- setLassoLogisticRegression(seed = 42)
 
 # RUNPLP - LASSO LR
 plpResult <- runPlp(
   plpData = plpData, 
-  outcomeId = 2, 
+  outcomeId = outcomeId, 
   analysisId = 'Test', 
   analysisName = 'Testing analysis',
   populationSettings = populationSettings, 
@@ -80,7 +102,7 @@ plpResult <- runPlp(
 # now diagnose
 diagnoseResult <- diagnosePlp(
   plpData = plpData,
-  outcomeId = 2,
+  outcomeId = outcomeId,
   analysisId = 'Test', 
   populationSettings = populationSettings,
   splitSettings = createDefaultSplitSetting(splitSeed = 12),
@@ -101,12 +123,13 @@ diagnoseResult <- diagnosePlp(
 
 population <- createStudyPopulation(
   plpData = plpData,
-  outcomeId = 2, 
+  outcomeId = outcomeId, 
   populationSettings = populationSettings
   )
 
 createTrainData <- function(plpData, population){
-  data <- PatientLevelPrediction::splitData(plpData = plpData, population=population,
+  data <- PatientLevelPrediction::splitData(plpData = plpData, 
+                                            population = population,
                                             splitSettings = PatientLevelPrediction::createDefaultSplitSetting(splitSeed = 12))
   trainData <- data$Train
   return(trainData)
@@ -115,7 +138,8 @@ createTrainData <- function(plpData, population){
 trainData <- createTrainData(plpData, population)
 
 createTestData <- function(plpData, population){
-  data <- PatientLevelPrediction::splitData(plpData = plpData, population=population,
+  data <- PatientLevelPrediction::splitData(plpData = plpData,
+                                            population = population,
                                             splitSettings = PatientLevelPrediction::createDefaultSplitSetting(splitSeed = 12))
   testData <- data$Test
   return(testData)
@@ -123,11 +147,11 @@ createTestData <- function(plpData, population){
 
 testData <- createTestData(plpData, population)
 
-# reduced trainData to only use 10 most important features (as decided by LR)
-reduceTrainData <- function(trainData) {
+# reduced trainData to only use n most important features (as decided by LR)
+reduceTrainData <- function(trainData, n=20) {
   covariates <- plpResult$model$covariateImportance %>% 
-    dplyr::slice_max(order_by = abs(covariateValue),n = 20, with_ties = F) %>% 
-    dplyr::pull(covariateId)
+    dplyr::slice_max(order_by = abs(.data$covariateValue),n = n, with_ties = F) %>% 
+    dplyr::pull(.data$covariateId)
   
   reducedTrainData <- list(labels = trainData$labels,
                            folds = trainData$folds,
@@ -151,3 +175,16 @@ tinyTrainData <- reduceTrainData(trainData)
 
 tinyPlpData <- createTinyPlpData(plpData, plpResult)
 
+nanoData <- createTinyPlpData(plpData, plpResult, n = 2)
+tinyResults <- runPlp(plpData = nanoData,
+                      populationSettings = populationSettings,
+                      outcomeId = outcomeId,
+                      analysisId = 'tinyFit',
+                      executeSettings = createExecuteSettings(
+                        runSplitData = T,
+                        runSampleData = F,
+                        runfeatureEngineering = F,
+                        runPreprocessData = T,
+                        runModelDevelopment = T,
+                        runCovariateSummary = F
+                      ))
diff --git a/tests/testthat/test-KNN.R b/tests/testthat/test-KNN.R
index 421a55fc2..8383b451a 100644
--- a/tests/testthat/test-KNN.R
+++ b/tests/testthat/test-KNN.R
@@ -1,16 +1,20 @@
 
 
 test_that('KNN fit works', {
-  modelSettings = setKNN(k=5)
-  
+  modelSettings = setKNN(k = 2)
+  nanoTrainData <- reduceTrainData(tinyTrainData, n = 2)
+  subjectToKeep <- nanoTrainData$labels[sample.int(nrow(nanoTrainData$labels), 50),"rowId"]
+  nanoTrainData$labels <- nanoTrainData$labels[nanoTrainData$labels$rowId %in% subjectToKeep,]
+  nanoTrainData$folds <- nanoTrainData$folds[nanoTrainData$folds$rowId %in% subjectToKeep,]
+  nanoTrainData$covariateData$covariates <- nanoTrainData$covariateData$covariates %>% dplyr::filter(.data$rowId %in% subjectToKeep)
   plpModel <- fitPlp(
-    trainData = tinyTrainData, 
+    trainData = nanoTrainData, 
     modelSettings = modelSettings,
     analysisId = 'KNN',
     analysisPath = tempdir()
   )
   
-  expect_correct_fitPlp(plpModel, tinyTrainData) 
+  expect_correct_fitPlp(plpModel, nanoTrainData) 
   
 })
 
diff --git a/tests/testthat/test-LightGBM.R b/tests/testthat/test-LightGBM.R
index f3ddef8f6..35e742cfb 100644
--- a/tests/testthat/test-LightGBM.R
+++ b/tests/testthat/test-LightGBM.R
@@ -111,7 +111,7 @@ test_that("LightGBM working checks", {
   
   expect_lte(nrow(fitModel$covariateImportance), trainData$covariateData$covariateRef %>% dplyr::tally() %>% dplyr::pull())
   
-  expect_equal(fitModel$modelDesign$outcomeId, 2)
+  expect_equal(fitModel$modelDesign$outcomeId, outcomeId)
   expect_equal(fitModel$modelDesign$targetId, 1)
   # TODO check other model design values?
   
diff --git a/tests/testthat/test-UploadToDatabase.R b/tests/testthat/test-UploadToDatabase.R
index 3184ddf13..ca30694c6 100644
--- a/tests/testthat/test-UploadToDatabase.R
+++ b/tests/testthat/test-UploadToDatabase.R
@@ -129,7 +129,7 @@ test_that("results uploaded to database", {
   }
   plpResult$model$validationDetails <- list(
     targetId = 1, 
-    outcomeId = 3,
+    outcomeId = outcomeId,
     developmentDatabase = 'test',
     validationDatabase = 'test',
     populationSettings = plpResult$model$modelDesign$populationSettings, 
diff --git a/tests/testthat/test-covariateExtras.R b/tests/testthat/test-covariateExtras.R
index a454761d7..17c9e04ec 100644
--- a/tests/testthat/test-covariateExtras.R
+++ b/tests/testthat/test-covariateExtras.R
@@ -18,10 +18,6 @@ library("testthat")
 
 context("CovariateExtras")
 
-connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-Eunomia::createCohorts(connectionDetails)
-
-
 test_that("settings creation", {
   
 covSet <- createCohortCovariateSettings(
@@ -113,7 +109,7 @@ covs <- FeatureExtraction::getDbCovariateData(
   cohortTable = "cohort", 
   cohortDatabaseSchema = "main", 
   cohortTableIsTemp = F, 
-  cohortId = 1, 
+  cohortIds = c(1), 
   rowIdField = 'rowId', 
   covariateSettings = covSet, 
   aggregated = F
diff --git a/tests/testthat/test-dataSplitting.R b/tests/testthat/test-dataSplitting.R
index bc05e4729..b8ce628bb 100644
--- a/tests/testthat/test-dataSplitting.R
+++ b/tests/testthat/test-dataSplitting.R
@@ -22,7 +22,7 @@ context("Data splitting")
 
 populationT <- plpData$cohorts
 populationT$outcomeCount <- sample(c(0,1), nrow(populationT), replace = T)
-attr(populationT, "metaData")$outcomeId <- 2
+attr(populationT, "metaData")$outcomeId <- outcomeId
 attr(populationT, "metaData")$populationSettings <- list(madeup = T)
 attr(populationT, "metaData")$restrictPlpDataSettings <- list(madeup = T)
 attr(populationT, "metaData")$attrition <- c(1,2,3)
diff --git a/tests/testthat/test-diagnostic.R b/tests/testthat/test-diagnostic.R
index 3acb2f5ae..2c261dd16 100644
--- a/tests/testthat/test-diagnostic.R
+++ b/tests/testthat/test-diagnostic.R
@@ -62,8 +62,8 @@ test_that("getMaxEndDaysFromCovariates works", {
 
 test_that("test diagnosePlp works", {
   test <- diagnosePlp(
-    plpData = plpData,
-    outcomeId = 2,
+    plpData = tinyPlpData,
+    outcomeId = outcomeId,
     analysisId = 'diagnoseTest',
     populationSettings = createStudyPopulationSettings(
       riskWindowStart = 1, 
@@ -112,28 +112,14 @@ test_that("test diagnosePlp works", {
 
 test_that("test diagnoseMultiplePlp works", {
   
-  connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-  Eunomia::createCohorts(connectionDetails)
-  
-  databaseDetails <- createDatabaseDetails(
-    connectionDetails = connectionDetails, 
-    cdmDatabaseSchema = "main", 
-    cdmDatabaseName = "main",
-    cohortDatabaseSchema = "main", 
-    cohortTable = "cohort", 
-    outcomeDatabaseSchema = "main", 
-    outcomeTable =  "cohort",
-    targetId = 1, 
-    outcomeIds = 3, #make this ids
-    cdmVersion = 5
-  )
-  
   analysis1 <- createModelDesign(
     targetId = 1,
-    outcomeId = 3,
-    restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 0),
+    outcomeId = outcomeId,
+    restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, 
+                                                            washoutPeriod = 0,
+                                                            sampleSize = 100),
     populationSettings = createStudyPopulationSettings(),
-    covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
+    covariateSettings = covariateSettings,
     featureEngineeringSettings = NULL,
     sampleSettings = NULL,
     splitSettings = createDefaultSplitSetting(),
@@ -143,10 +129,12 @@ test_that("test diagnoseMultiplePlp works", {
   
   analysis2 <- createModelDesign(
     targetId = 1,
-    outcomeId = 3,
-    restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 0),
+    outcomeId = outcomeId,
+    restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, 
+                                                            washoutPeriod = 0,
+                                                            sampleSize = 100),
     populationSettings = createStudyPopulationSettings(washoutPeriod = 400),
-    covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
+    covariateSettings = covariateSettings,
     featureEngineeringSettings = NULL,
     sampleSettings = NULL,
     splitSettings = createDefaultSplitSetting(),
@@ -161,7 +149,7 @@ test_that("test diagnoseMultiplePlp works", {
       analysis2
       ),
     cohortDefinitions = data.frame(
-      cohortId = c(1,3),
+      cohortId = c(1, outcomeId),
       cohortName = c('target', 'outcome')
     ), 
     saveDirectory = file.path(saveLoc, 'diagnosticsMultiple')
diff --git a/tests/testthat/test-evaluation.R b/tests/testthat/test-evaluation.R
index 0133caed5..6669aa488 100644
--- a/tests/testthat/test-evaluation.R
+++ b/tests/testthat/test-evaluation.R
@@ -38,15 +38,15 @@ test_that("modelBasedConcordance", {
 })
 
 test_that("evaluatePlp_survival", {
-  
+  N <- 100
   plpResultSurvivalPred <- data.frame(
-    rowId = 1:300, 
-    ageYear = sample(100, 300, replace = T),
-    gender = sample(c('8507','8532'), 300, replace = T),
-    outcomeCount = c(rep(1,40), rep(0,260)),
-    value = runif(300),
-    evaluationType = rep('Train', 300),
-    survivalTime = sample(2000, 300, replace = T)
+    rowId = 1:N, 
+    ageYear = sample(100, N, replace = T),
+    gender = sample(c('8507','8532'), N, replace = T),
+    outcomeCount = c(rep(1,N*0.1), rep(0,N*0.9)),
+    value = runif(N, max=0.1),
+    evaluationType = rep('Train', N),
+    survivalTime = sample(2000, N, replace = T)
   )
   attr(plpResultSurvivalPred, "metaData")$modelType <- 'survival'
   attr(plpResultSurvivalPred, 'metaData')$timepoint <- 365
diff --git a/tests/testthat/test-extractData.R b/tests/testthat/test-extractData.R
index 45596858f..0e15eb37a 100644
--- a/tests/testthat/test-extractData.R
+++ b/tests/testthat/test-extractData.R
@@ -17,7 +17,7 @@
 context("extractPlp")
 
 test_that("summary.plpData", {
-  attr(plpData$outcomes, "metaData")$outcomeIds <- c(2,3)
+  attr(plpData$outcomes, "metaData")$outcomeIds <- c(outcomeId)
   sum <- summary.plpData(plpData)
   testthat::expect_equal(class(sum),'summary.plpData')
 })
@@ -55,7 +55,7 @@ test_that("createDatabaseDetails with NULL cdmDatabaseId errors", {
     cdmDatabaseSchema = 'main', 
     cdmDatabaseId = NULL, 
     targetId = 1, 
-    outcomeIds = 2
+    outcomeIds = outcomeId
   ))
 })
 
diff --git a/tests/testthat/test-featureImportance.R b/tests/testthat/test-featureImportance.R
index 193f43b41..d0140adda 100644
--- a/tests/testthat/test-featureImportance.R
+++ b/tests/testthat/test-featureImportance.R
@@ -21,21 +21,24 @@ context("FeatureImportance")
 
 test_that("pfi feature importance returns data.frame", {
   
-  # limit to a sample of 10 covariates for faster test
+  # limit to a sample of 2 covariates for faster test
   covariates <- plpResult$model$covariateImportance %>% 
     dplyr::filter("covariateValue" != 0) %>% 
     dplyr::select("covariateId") %>% 
+    dplyr::arrange(desc("covariateValue")) %>%
     dplyr::pull()
   
   # if the model had non-zero covariates
   if(length(covariates) > 0){
-    covariates <- sample(covariates, min(10,length(covariates)))
+    if (length(covariates) > 2) {
+      covariates <- covariates[1:2]
+    }
     pfiTest <- pfi(plpResult, population, plpData, repeats = 1,
-                   covariates = covariates, cores = NULL, log = NULL,
+                   covariates = covariates, cores = 1, log = NULL,
                    logthreshold = "INFO")
     
     expect_equal(class(pfiTest), 'data.frame')
-    expect_equal(sum(names(pfiTest)%in%c("covariateId", "pfi")), 2)
+    expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2)
     expect_true(all(!is.nan(pfiTest$pfi)))
     
   }
@@ -43,25 +46,12 @@ test_that("pfi feature importance returns data.frame", {
 })
 
 test_that('pfi feature importance works with logger or without covariates', {
-  tinyResults <- runPlp(plpData = tinyPlpData,
-                        populationSettings = populationSettings,
-                        outcomeId = 2,
-                        analysisId = 'tinyFit',
-                        featureEngineeringSettings = createUnivariateFeatureSelection(k=20),
-                        executeSettings = createExecuteSettings(
-                          runSplitData = T,
-                          runSampleData = F,
-                          runfeatureEngineering = T,
-                          runPreprocessData = T,
-                          runModelDevelopment = T,
-                          runCovariateSummary = F
-                        ))
-  
-  pfiTest <- pfi(tinyResults, population, tinyPlpData, 
+
+  pfiTest <- pfi(tinyResults, population, nanoData, cores = 1, 
                  covariates = NULL, log = file.path(tempdir(), 'pfiLog'))
   
   expect_equal(class(pfiTest), 'data.frame')
-  expect_equal(sum(names(pfiTest)%in%c("covariateId", "pfi")), 2)
+  expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2)
   expect_true(all(!is.nan(pfiTest$pfi)))
 
 })
diff --git a/tests/testthat/test-formatting.R b/tests/testthat/test-formatting.R
index 63e47dd60..0e3ac0121 100644
--- a/tests/testthat/test-formatting.R
+++ b/tests/testthat/test-formatting.R
@@ -115,7 +115,7 @@ test_that("toSparseM", {
                                                          outcomes=2))
   
   outcomes <- data.frame(rowId=c(1,2), 
-                         outcomeId=rep(2,2), 
+                         outcomeId=rep(outcomeId,2), 
                          daysToEvent=c(150,40))
   
   FplpData <- list(cohorts=cohorts,
diff --git a/tests/testthat/test-learningCurves.R b/tests/testthat/test-learningCurves.R
index a51537209..15da60bcf 100644
--- a/tests/testthat/test-learningCurves.R
+++ b/tests/testthat/test-learningCurves.R
@@ -19,7 +19,7 @@ context("LearningCurves")
 # learningCurve 
 learningCurve <- PatientLevelPrediction::createLearningCurve(
   plpData = plpData,
-  outcomeId = 2, parallel = F, cores = -1,
+  outcomeId = outcomeId, parallel = F, cores = -1,
   modelSettings = setLassoLogisticRegression(),
   saveDirectory =  file.path(saveLoc, 'lcc'),
   splitSettings = createDefaultSplitSetting(testFraction = 0.2, nfold=2), 
@@ -68,10 +68,11 @@ test_that("getTrainFractions works", {
   
   learningCurve <- PatientLevelPrediction::createLearningCurve(
     plpData = tinyPlpData,
-    outcomeId = 2, parallel = F, cores = -1,
-    modelSettings = setLassoLogisticRegression(),
+    outcomeId = outcomeId, parallel = F, cores = -1,
+    modelSettings = setLassoLogisticRegression(seed = 42),
     saveDirectory =  file.path(saveLoc, 'lcc'),
-    splitSettings = createDefaultSplitSetting(testFraction = 0.33, nfold=2), 
+    splitSettings = createDefaultSplitSetting(testFraction = 0.33, nfold = 2,
+                                              splitSeed = 42), 
     trainEvents = c(150,200),
     preprocessSettings = createPreprocessSettings(
       minFraction = 0.001,
@@ -79,7 +80,7 @@ test_that("getTrainFractions works", {
     )
   )
   testthat::expect_true(is.data.frame(learningCurve))
-  testthat::expect_equal(sum(colnames(learningCurve)%in%c(
+  testthat::expect_equal(sum(colnames(learningCurve) %in% c(
     "trainFraction",
     "Train_AUROC",
     "nPredictors",
diff --git a/tests/testthat/test-multiplePlp.R b/tests/testthat/test-multiplePlp.R
index 8b4a0aad6..14cf651dc 100644
--- a/tests/testthat/test-multiplePlp.R
+++ b/tests/testthat/test-multiplePlp.R
@@ -17,29 +17,12 @@
 library("testthat")
 context("MultiplePlp")
 
-connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-Eunomia::createCohorts(connectionDetails)
-
-databaseDetails <- createDatabaseDetails(
-  connectionDetails = connectionDetails, 
-  cdmDatabaseSchema = "main", 
-  cdmDatabaseName = "main",
-  cohortDatabaseSchema = "main", 
-  cohortTable = "cohort", 
-  outcomeDatabaseSchema = "main", 
-  outcomeTable =  "cohort",
-  targetId = 1, 
-  outcomeIds = 3, #make this ids
-  cdmVersion = 5
-  )
-
-
 analysis1 <- createModelDesign(
   targetId = 1,
-  outcomeId = 3,
+  outcomeId = outcomeId,
   restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 0),
   populationSettings = createStudyPopulationSettings(),
-  covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
+  covariateSettings = covariateSettings,
   featureEngineeringSettings = NULL,
   sampleSettings = NULL,
   splitSettings = createDefaultSplitSetting(splitSeed = 1),
@@ -50,9 +33,9 @@ analysis1 <- createModelDesign(
 test_that("createModelDesign - test working", {
   
   expect_equal(analysis1$targetId, 1)
-  expect_equal(analysis1$outcomeId, 3)
+  expect_equal(analysis1$outcomeId, outcomeId)
   expect_equal(analysis1$restrictPlpDataSettings, createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 0))
-  expect_equal(analysis1$covariateSettings, FeatureExtraction::createDefaultCovariateSettings())
+  expect_equal(analysis1$covariateSettings, covariateSettings)
   expect_equal(analysis1$featureEngineeringSettings, list(createFeatureEngineeringSettings(type= "none")))
   expect_equal(analysis1$sampleSettings, list(createSampleSettings(type = 'none')))
   expect_equal(analysis1$preprocessSettings, createPreprocessSettings())
@@ -103,27 +86,14 @@ test_that("loading analyses settings", {
 }
 )
 
-analysis2 <- createModelDesign(
-  targetId = 10,
-  outcomeId = 2,
-  restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 9999),
-  populationSettings = createStudyPopulationSettings(),
-  covariateSettings = FeatureExtraction::createCovariateSettings(useDemographicsAge = T),
-  featureEngineeringSettings = NULL,
-  sampleSettings = NULL,
-  preprocessSettings = createPreprocessSettings(),
-  modelSettings = setLassoLogisticRegression(seed = 12)
-)
-
-
 test_that("test run multiple", {
   
   analysis3 <- createModelDesign(
     targetId = 1,
-    outcomeId = 3,
+    outcomeId = outcomeId,
     restrictPlpDataSettings = createRestrictPlpDataSettings(firstExposureOnly = F, washoutPeriod = 0),
     populationSettings = createStudyPopulationSettings(),
-    covariateSettings = FeatureExtraction::createDefaultCovariateSettings(),
+    covariateSettings = covariateSettings,
     featureEngineeringSettings = createFeatureEngineeringSettings(),
     sampleSettings = createSampleSettings(),
     preprocessSettings = createPreprocessSettings(),
@@ -134,7 +104,8 @@ test_that("test run multiple", {
       trainFraction = 0.75, 
       splitSeed = 123, 
       nfold = 3
-    )
+    ),
+    runCovariateSummary = FALSE
   )
   
   runMultiplePlp(
diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R
index a3989c9b6..17039166e 100644
--- a/tests/testthat/test-plotting.R
+++ b/tests/testthat/test-plotting.R
@@ -38,7 +38,7 @@ test_that("plots", {
   test <- plotF1Measure(plpResult, typeColumn = 'evaluation')
   testthat::expect_s3_class(test, 'arrangelist')
   
-  if(!is.null(plpResult$performanceEvaluation$demographicSummary)){
+  if (!is.null(plpResult$performanceEvaluation$demographicSummary)) {
     test <- plotDemographicSummary(plpResult, typeColumn = 'evaluation')
     testthat::expect_s3_class(test, 'arrangelist')
   }
@@ -52,7 +52,7 @@ test_that("plots", {
   test <- plotVariableScatterplot(plpResult$covariateSummary)
   testthat::expect_s3_class(test, 'ggplot')
   
-  test <- plotGeneralizability(plpResult$covariateSummary, fileName=NULL)
+  test <- plotGeneralizability(plpResult$covariateSummary, fileName = NULL)
   testthat::expect_s3_class(test, 'grob')
 
 })
@@ -61,7 +61,7 @@ test_that("plots", {
 test_that("outcomeSurvivalPlot", {
   
   # test the plot works
-  test <- outcomeSurvivalPlot(plpData = plpData, outcomeId = 2)
+  test <- outcomeSurvivalPlot(plpData = plpData, outcomeId = outcomeId)
   testthat::expect_s3_class(test, 'ggsurvplot')
   
   testthat::expect_error(outcomeSurvivalPlot())
@@ -82,7 +82,7 @@ test_that("plotPlp", {
   testthat::expect_equal(dir.exists(file.path(saveLoc,'plots')), T)
   
   # expect plots to be there
-  expect_true(length(dir(file.path(saveLoc,'plots')))>0)
+  expect_true(length(dir(file.path(saveLoc,'plots'))) > 0)
   
 })
 
@@ -134,10 +134,4 @@ test_that("plotSmoothCalibration", {
     )
   )
   
-})
-
-
-
-
-
-
+})
\ No newline at end of file
diff --git a/tests/testthat/test-population.R b/tests/testthat/test-population.R
index 91f533c6e..6add83fee 100644
--- a/tests/testthat/test-population.R
+++ b/tests/testthat/test-population.R
@@ -293,7 +293,7 @@ test_that("population creation parameters", {
   
   studyPopulation <- createStudyPopulation(
     plpData = plpData,
-    outcomeId = 3,
+    outcomeId = outcomeId,
     populationSettings = defaultSettings()
   )
   
@@ -306,7 +306,7 @@ test_that("population creation parameters", {
   #firstExposureOnly test (should have no effect on simulated data)
   studyPopulation <- createStudyPopulation(
     plpData = plpData,
-    outcomeId = 3,
+    outcomeId = outcomeId,
     populationSettings = defaultSettings(firstExposureOnly = T)
     )
   
@@ -317,7 +317,7 @@ test_that("population creation parameters", {
   #requireTimeAtRisk
   studyPopulation <- createStudyPopulation(
     plpData = plpData,
-    outcomeId = 3, 
+    outcomeId = outcomeId, 
     populationSettings = defaultSettings(requireTimeAtRisk = T)
     )
                                            
@@ -330,7 +330,7 @@ test_that("population creation parameters", {
   expect_warning(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(requireTimeAtRisk = T, minTimeAtRisk = 99999)
     )
   )
@@ -338,7 +338,7 @@ test_that("population creation parameters", {
   #washoutPeriod = 365,
   studyPopulation <- createStudyPopulation(
     plpData = plpData,
-    outcomeId = 3, 
+    outcomeId = outcomeId, 
     populationSettings = defaultSettings(washoutPeriod = 365)
   )
   nrOutcomes4 <- sum(studyPopulation$outcomeCount)
@@ -349,7 +349,7 @@ test_that("population creation parameters", {
   expect_error(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(washoutPeriod = -1)
     )
   )
@@ -358,7 +358,7 @@ test_that("population creation parameters", {
   expect_error(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(
         priorOutcomeLookback = -1, 
         removeSubjectsWithPriorOutcome = T
@@ -370,7 +370,7 @@ test_that("population creation parameters", {
   expect_error(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(
         minTimeAtRisk  = -1, 
         requireTimeAtRisk = T
@@ -382,7 +382,7 @@ test_that("population creation parameters", {
   expect_error(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(
         startAnchor = 'cohort stard'
       )
@@ -394,7 +394,7 @@ test_that("population creation parameters", {
   expect_error(
     createStudyPopulation(
       plpData = plpData,
-      outcomeId = 3, 
+      outcomeId = outcomeId, 
       populationSettings = defaultSettings(
         endAnchor = 'cohort ent'
       )
@@ -495,4 +495,31 @@ test_that("population creation parameters", {
   
 })
 
+testthat::test_that("Providing an existing population and skipping population creation works", {
+  popSize <- 400
+  newPopulation <- population[sample.int(nrow.default(population), popSize), ]
+  
+  tinyPlpData$population <- newPopulation
+  
+  plpResults <- runPlp(
+    plpData = tinyPlpData,
+    outcomeId = 2,
+    analysisId = "1",
+    analysisName = "existing population",
+    populationSettings = createStudyPopulationSettings(),
+    splitSettings = createDefaultSplitSetting(),
+    modelSettings = setLassoLogisticRegression(),
+    executeSettings = createExecuteSettings(
+      runSplitData = TRUE,
+      runPreprocessData = FALSE,
+      runModelDevelopment = TRUE
+    )
+  )
+  
+  trainPredictions <- plpResults$prediction %>%
+    dplyr::filter(.data$evaluationType == "Train") %>% nrow.default()
+  testPredictions <- plpResults$prediction %>%
+    dplyr::filter(.data$evaluationType == "Test") %>% nrow.default()
+  expect_equal(popSize, trainPredictions + testPredictions)
+})
 
diff --git a/tests/testthat/test-rclassifier.R b/tests/testthat/test-rclassifier.R
index 54195ca08..5a0dadc31 100644
--- a/tests/testthat/test-rclassifier.R
+++ b/tests/testthat/test-rclassifier.R
@@ -108,7 +108,7 @@ test_that("GBM working checks", {
   
   expect_lte(nrow(fitModel$covariateImportance), trainData$covariateData$covariateRef %>% dplyr::tally() %>% dplyr::pull())
   
-  expect_equal(fitModel$modelDesign$outcomeId, 2)
+  expect_equal(fitModel$modelDesign$outcomeId, outcomeId)
   expect_equal(fitModel$modelDesign$targetId, 1)
   # TODO check other model design values?
   
diff --git a/tests/testthat/test-recalibration.R b/tests/testthat/test-recalibration.R
index 2cc4a9257..33e717197 100644
--- a/tests/testthat/test-recalibration.R
+++ b/tests/testthat/test-recalibration.R
@@ -32,7 +32,7 @@ prediction <- data.frame(
 metaData <- list(
   modelType = "binary", 
   targetId = 1,
-  outcomeId = 2,
+  outcomeId = outcomeId,
   timepoint = 365
 )
 
@@ -66,7 +66,7 @@ test_that("recalibratePlpRefit", {
   newPop <- plpResult$prediction %>% dplyr::select(-"value") %>% dplyr::filter(.data$evaluationType %in% c('Test','Train'))
   attr(newPop, 'metaData') <- list(
     targetId = 1, 
-    outcomeId = 2,
+    outcomeId = outcomeId,
     restrictPlpDataSettings = PatientLevelPrediction::createRestrictPlpDataSettings(),
     populationSettings = PatientLevelPrediction::createStudyPopulationSettings()
   )
@@ -95,7 +95,7 @@ test_that("survival", {
 metaData <- list(
   modelType = "survival", 
   targetId = 1,
-  outcomeId = 2,
+  outcomeId = outcomeId,
   timepoint = 365
 )
 
diff --git a/tests/testthat/test-runPlpHelpers.R b/tests/testthat/test-runPlpHelpers.R
index 2256a6450..4d361410d 100644
--- a/tests/testthat/test-runPlpHelpers.R
+++ b/tests/testthat/test-runPlpHelpers.R
@@ -22,7 +22,7 @@ test_that("check printHeader runs", {
   header <- printHeader(
     plpData = plpData, 
     targetId = 1, 
-    outcomeId = 2, 
+    outcomeId = outcomeId, 
     analysisId = 123, 
     analysisName = 'test', 
     ExecutionDateTime = Sys.time()
@@ -36,7 +36,7 @@ test_that("checkInputs", {
   check <- checkInputs(
     list(
       plpData = plpData, 
-      outcomeId = 2, 
+      outcomeId = outcomeId, 
       populationSettings = populationSettings
     )
   )
@@ -49,7 +49,7 @@ test_that("checkInputs", {
     checkInputs(
       list(
         plpData = NULL, 
-        outcomeId = 2, 
+        outcomeId = outcomeId, 
         populationSettings = populationSettings
       )
     )
@@ -71,7 +71,7 @@ test_that("checkInputs", {
     checkInputs(
       list(
         plpData = plpData, 
-        outcomeId = 2, 
+        outcomeId = outcomeId, 
         populationSettings = 'populationSettings'
       )
     )
diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R
index db3480392..84ee78516 100644
--- a/tests/testthat/test-validation.R
+++ b/tests/testthat/test-validation.R
@@ -17,22 +17,6 @@
 context("Validation")
 
 # Test unit for the creation of the study externalValidatePlp
-
-connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-Eunomia::createCohorts(connectionDetails)
-
-databaseDetails <- createDatabaseDetails(
-  connectionDetails = connectionDetails, 
-  cdmDatabaseSchema = "main", 
-  cdmDatabaseName = "main",
-  cohortDatabaseSchema = "main", 
-  cohortTable = "cohort", 
-  outcomeDatabaseSchema = "main", 
-  outcomeTable =  "cohort",
-  targetId = 1, 
-  outcomeIds = 3, #make this ids
-  cdmVersion = 5)
-
 modelVal <- loadPlpModel(file.path(saveLoc, 'Test', 'plpResult', 'model'))
 validationDatabaseDetailsVal <- databaseDetails  # from run multiple tests
 validationRestrictPlpDataSettingsVal <- createRestrictPlpDataSettings(washoutPeriod = 0, sampleSize = NULL)
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 000000000..097b24163
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/BenchmarkTasks.Rmd b/vignettes/BenchmarkTasks.Rmd
index 13eb511ab..4c452aa99 100644
--- a/vignettes/BenchmarkTasks.Rmd
+++ b/vignettes/BenchmarkTasks.Rmd
@@ -25,7 +25,7 @@ output:
 ---
 <!--
 %\VignetteEngine{knitr}
-%\VignetteIndexEntry{Best Practices}
+%\VignetteIndexEntry{Benchmark Tasks}
 -->
 
 ## Benchmark Tasks For Large-Scale Empirical Analyses
diff --git a/vignettes/ConstrainedPredictors.Rmd b/vignettes/ConstrainedPredictors.Rmd
index 2776122cb..8c1daadec 100644
--- a/vignettes/ConstrainedPredictors.Rmd
+++ b/vignettes/ConstrainedPredictors.Rmd
@@ -25,7 +25,7 @@ output:
 ---
 <!--
 %\VignetteEngine{knitr}
-%\VignetteIndexEntry{Best Practices}
+%\VignetteIndexEntry{Constrainted Predictors}
 -->
 
 ## Constrained Predictors
@@ -36,20 +36,20 @@ Here we provide a set of phenotypes that can be used as predictors in prediction
 
 These phenotypes can be extracted from the PhenotypeLibrary R package.  To install the R package run:
 
-```{r echo = T}
+```{r, echo = TRUE, message = FALSE, warning = FALSE, tidy = FALSE, eval=FALSE}
 remotes::install_github('ohdsi/PhenotypeLibrary')
 ```
 
 
 To extract the cohort definition for Alcoholism with an id of 1165, just run:
 
-```{r echo = T}
+```{r echo = TRUE, message = FALSE, warning = FALSE, tidy = FALSE, eval = FALSE}
 PhenotypeLibrary::getPlCohortDefinitionSet(1165)
 ```
 
 in general you can extract all the cohorts by running:
 
-```{r echo = T}
+```{r echo = TRUE, message = FALSE, warning = FALSE, tidy = FALSE, eval = FALSE}
 phenotypeDefinitions <- PhenotypeLibrary::getPlCohortDefinitionSet(1152:1215)
 ```