remove version from is_installed (#444)

* remove version from is_installed * update GA versions * fix NOTES about documentation * preserve randomness in table generation in tests
OHDSI · Apr 26, 2024 · 1a1f620 · 1a1f620
1 parent 29c3d0b
commit 1a1f620
Show file tree

Hide file tree

Showing 102 changed files with 3,521 additions and 2,075 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,5 @@ compare_versions
 docs/*
 _pkgdown.yml
 ^vignettes/articles$
+^doc$
+^Meta$
diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml
@@ -45,7 +45,7 @@ jobs:
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -66,7 +66,7 @@ jobs:
           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "22.04"))')
           
       - name: Setup conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
@@ -81,7 +81,7 @@ jobs:
 
       - name: Upload source package
         if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: package_tarball
           path: check/*.tar.gz
@@ -110,7 +110,7 @@ jobs:
 
     steps:
 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0    
 
@@ -136,7 +136,7 @@ jobs:
           draft: false
           prerelease: false
 
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         if: ${{ env.new_version != '' }}
 
       - name: Install drat
@@ -152,7 +152,7 @@ jobs:
           
       - name: Download package tarball
         if: ${{ env.new_version != '' }}
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
         with:
           name: package_tarball    
 

diff --git a/.github/workflows/R_CMD_check_main_weekly.yaml b/.github/workflows/R_CMD_check_main_weekly.yaml
@@ -45,7 +45,7 @@ jobs:
       CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:

diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,5 @@ standalone/build/*
 /python_models/*
 /mycache/*
 /inst/shiny/DiagnosticsExplorer/rsconnect/*
+/doc/
+/Meta/
diff --git a/R/DataSplitting.R b/R/DataSplitting.R
@@ -17,7 +17,7 @@
 
 
 #' Create the settings for defining how the plpData are split into test/validation/train sets using 
-#' default splitting functions (either random stratified by outcome, time or subject splitting).
+#' default splitting functions (either random stratified by outcome, time or subject splitting)
 #'
 #' @details
 #' Returns an object of class \code{splitSettings} that specifies the splitting function that will be called and the settings
@@ -28,9 +28,9 @@
 #' @param nfold             (numeric) An integer > 1 specifying the number of folds used in cross validation
 #' @param splitSeed         (numeric) A seed to use when splitting the data for reproducibility (if not set a random number will be generated)
 #' @param type              (character) Choice of:  \itemize{
-#'                                         \item{'stratified'}{ Each data point is randomly assigned into the test or a train fold set but this is done stratified such that the outcome rate is consistent in each partition }
-#'                                         \item{'time')}{ Older data are assigned into the training set and newer data are assigned into the test set}
-#'                                         \item{'subject'}{ Data are partitioned by subject, if a subject is in the data more than once, all the data points for the subject are assigned either into the test data or into the train data (not both).}
+#'                                         \item'stratified' Each data point is randomly assigned into the test or a train fold set but this is done stratified such that the outcome rate is consistent in each partition 
+#'                                         \item'time' Older data are assigned into the training set and newer data are assigned into the test set
+#'                                         \item'subject' Data are partitioned by subject, if a subject is in the data more than once, all the data points for the subject are assigned either into the test data or into the train data (not both).
 #'                                         } 
 #'
 #' @return
@@ -87,17 +87,17 @@ createDefaultSplitSetting <- function(testFraction=0.25,
 #'
 #' @details
 #' Returns a list containing the training data (Train) and optionally the test data (Test).  Train is an Andromeda object containing
-#' \itemize{\item{covariates}{ a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the train data }
-#'          \item{covariateRef}{ a table with the covariate information}
-#'          \item{labels)}{ a table (rowId, outcomeCount, ...) for each data point in the train data (outcomeCount is the class label) }
-#'          \item{folds}{ a table (rowId, index) specifying which training fold each data point is in.}
+#' \itemize{\item covariates: a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the train data
+#'          \item covariateRef: a table with the covariate information
+#'          \item labels: a table (rowId, outcomeCount, ...) for each data point in the train data (outcomeCount is the class label) 
+#'          \item folds: a table (rowId, index) specifying which training fold each data point is in.
 #'          } 
 #' Test is an Andromeda object containing
-#' \itemize{\item{covariates}{ a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the test data }
-#'          \item{covariateRef}{ a table with the covariate information}
-#'          \item{labels)}{ a table (rowId, outcomeCount, ...) for each data point in the test data (outcomeCount is the class label) }
+#' \itemize{\item covariates: a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the test data 
+#'          \item covariateRef: a table with the covariate information
+#'          \item labels: a table (rowId, outcomeCount, ...) for each data point in the test data (outcomeCount is the class label) 
 #'          } 
-#' 
+#'   
 #' 
 #'
 #' @param plpData                          An object of type \code{plpData} - the patient level prediction

diff --git a/R/DiagnosePlp.R b/R/DiagnosePlp.R
@@ -213,16 +213,12 @@ diagnoseMultiplePlp <- function(
 #'                                   and whether to normalise the covariates before training  
 #' @param modelSettings              An object of class \code{modelSettings} created using one of the function:
 #'                                         \itemize{
-#'                                         \item{setLassoLogisticRegression()}{ A lasso logistic regression model}
-#'                                         \item{setGradientBoostingMachine()}{ A gradient boosting machine}
-#'                                         \item{setAdaBoost()}{ An ada boost model}
-#'                                         \item{setRandomForest()}{ A random forest model}
-#'                                         \item{setDecisionTree()}{ A decision tree model}
-#'                                         \item{setCovNN())}{ A convolutional neural network model}
-#'                                         \item{setCIReNN()}{ A recurrent neural network model}
-#'                                         \item{setMLP()}{ A neural network model}
-#'                                         \item{setDeepNN()}{ A deep neural network model}
-#'                                         \item{setKNN()}{ A KNN model}
+#'                                         \item setLassoLogisticRegression() A lasso logistic regression model
+#'                                         \item setGradientBoostingMachine() A gradient boosting machine
+#'                                         \item setAdaBoost() An ada boost model
+#'                                         \item setRandomForest() A random forest model
+#'                                         \item setDecisionTree() A decision tree model
+#'                                         \item setKNN() A KNN model
 #'                                         
 #'                                         } 
 #' @param logSettings           An object of \code{logSettings} created using \code{createLogSettings} 

diff --git a/R/FeatureEngineering.R b/R/FeatureEngineering.R
@@ -47,7 +47,7 @@ featureEngineer <- function(data, featureEngineeringSettings){
 #' Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings
 #'
 #' @param type              (character) Choice of:  \itemize{
-#'                                         \item{'none'}{ No feature engineering - this is the default }
+#'                                         \item'none' No feature engineering - this is the default 
 #'                                         } 
 #'
 #' @return

diff --git a/R/Fit.R b/R/Fit.R
@@ -29,11 +29,10 @@
 #'                                         data extracted from the CDM.
 #' @param modelSettings                    An object of class \code{modelSettings} created using one of the function:
 #'                                         \itemize{
-#'                                         \item{logisticRegressionModel()}{ A lasso logistic regression model}
-#'                                         \item{GBMclassifier()}{ A gradient boosting machine}
-#'                                         \item{RFclassifier()}{ A random forest model}
-#'                                         \item{GLMclassifier ()}{ A generalised linear model}
-#'                                         \item{KNNclassifier()}{ A KNN model}
+#'                                         \item setLassoLogisticRegression() A lasso logistic regression model
+#'                                         \item setGradientBoostingMachine() A gradient boosting machine
+#'                                         \item setRandomForest() A random forest model
+#'                                         \item setKNN() A KNN model
 #'                                         }
 #' @param search                           The search strategy for the hyper-parameter selection (currently not used)                                        
 #' @param analysisId                       The id of the analysis

diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
@@ -16,10 +16,10 @@ removeInvalidString <- function(string){
 
 
 # Borrowed from devtools: https://github.com/hadley/devtools/blob/ba7a5a4abd8258c52cb156e7b26bb4bf47a79f0b/R/utils.r#L44
-is_installed <- function (pkg, version = 0) {
+is_installed <- function (pkg) {
   installed_version <- tryCatch(utils::packageVersion(pkg), 
                                 error = function(e) NA)
-  !is.na(installed_version) && installed_version >= version
+  !is.na(installed_version)
 }
 
 # Borrowed and adapted from devtools: https://github.com/hadley/devtools/blob/ba7a5a4abd8258c52cb156e7b26bb4bf47a79f0b/R/utils.r#L74

diff --git a/R/LearningCurve.R b/R/LearningCurve.R
@@ -40,25 +40,20 @@
 #'   \code{trainFractions}. Note, providing \code{trainEvents} will override
 #'   your input to \code{trainFractions}. The format should be as follows:
 #'   \itemize{
-#'     \item{ \code{c(500, 1000, 1500) } - a list of training events}
+#'     \item \code{c(500, 1000, 1500) } - a list of training events
 #'   }
 #' @param featureEngineeringSettings An object of \code{featureEngineeringSettings} specifying any feature engineering to be learned (using the train data)                                                        
 #' @param preprocessSettings         An object of \code{preprocessSettings}. This setting specifies the minimum fraction of 
 #'                                   target population who must have a covariate for it to be included in the model training                            
 #'                                   and whether to normalise the covariates before training  
 #' @param modelSettings              An object of class \code{modelSettings} created using one of the function:
 #'                                         \itemize{
-#'                                         \item{setLassoLogisticRegression()}{ A lasso logistic regression model}
-#'                                         \item{setGradientBoostingMachine()}{ A gradient boosting machine}
-#'                                         \item{setAdaBoost()}{ An ada boost model}
-#'                                         \item{setRandomForest()}{ A random forest model}
-#'                                         \item{setDecisionTree()}{ A decision tree model}
-#'                                         \item{setCovNN())}{ A convolutional neural network model}
-#'                                         \item{setCIReNN()}{ A recurrent neural network model}
-#'                                         \item{setMLP()}{ A neural network model}
-#'                                         \item{setDeepNN()}{ A deep neural network model}
-#'                                         \item{setKNN()}{ A KNN model}
-#'                                         
+#'                                         \item \code{setLassoLogisticRegression()} A lasso logistic regression model
+#'                                         \item \code{setGradientBoostingMachine()} A gradient boosting machine
+#'                                         \item \code{setAdaBoost()} An ada boost model
+#'                                         \item \code{setRandomForest()} A random forest model
+#'                                         \item \code{setDecisionTree()} A decision tree model
+#'                                         \item \code{setKNN()} A KNN model
 #'                                         } 
 #' @param logSettings                An object of \code{logSettings} created using \code{createLogSettings} 
 #'                                   specifying how the logging is done                                                                            

diff --git a/R/Logging.R b/R/Logging.R
@@ -22,12 +22,12 @@
 #'
 #' @param verbosity                        Sets the level of the verbosity. If the log level is at or higher in priority than the logger threshold, a message will print. The levels are:
 #'                                         \itemize{
-#'                                         \item{DEBUG}{Highest verbosity showing all debug statements}
-#'                                         \item{TRACE}{Showing information about start and end of steps}
-#'                                         \item{INFO}{Show informative information (Default)}
-#'                                         \item{WARN}{Show warning messages}
-#'                                         \item{ERROR}{Show error messages}
-#'                                         \item{FATAL}{Be silent except for fatal errors}
+#'                                         \item DEBUG Highest verbosity showing all debug statements
+#'                                         \item TRACE Showing information about start and end of steps
+#'                                         \item INFO Show informative information (Default)
+#'                                         \item WARN Show warning messages
+#'                                         \item ERROR Show error messages
+#'                                         \item FATAL Be silent except for fatal errors
 #'                                         }
 #' @param timeStamp                        If TRUE a timestamp will be added to each logging statement. Automatically switched on for TRACE level.
 #' @param logName                          A string reference for the logger

diff --git a/R/RunPlp.R b/R/RunPlp.R
@@ -49,17 +49,12 @@
 #'                                   and whether to normalise the covariates before training  
 #' @param modelSettings              An object of class \code{modelSettings} created using one of the function:
 #'                                         \itemize{
-#'                                         \item{setLassoLogisticRegression()}{ A lasso logistic regression model}
-#'                                         \item{setGradientBoostingMachine()}{ A gradient boosting machine}
-#'                                         \item{setAdaBoost()}{ An ada boost model}
-#'                                         \item{setRandomForest()}{ A random forest model}
-#'                                         \item{setDecisionTree()}{ A decision tree model}
-#'                                         \item{setCovNN())}{ A convolutional neural network model}
-#'                                         \item{setCIReNN()}{ A recurrent neural network model}
-#'                                         \item{setMLP()}{ A neural network model}
-#'                                         \item{setDeepNN()}{ A deep neural network model}
-#'                                         \item{setKNN()}{ A KNN model}
-#'                                         
+#'                                         \item setLassoLogisticRegression() A lasso logistic regression model
+#'                                         \item setGradientBoostingMachine() A gradient boosting machine
+#'                                         \item setAdaBoost() An ada boost model
+#'                                         \item setRandomForest() A random forest model
+#'                                         \item setDecisionTree() A decision tree model
+#'                                         \item setKNN() A KNN model
 #'                                         } 
 #' @param logSettings                An object of \code{logSettings} created using \code{createLogSettings} 
 #'                                   specifying how the logging is done                                                                            
@@ -71,13 +66,12 @@
 #' An object containing the following:
 #'
 #'  \itemize{
-#'           \item{inputSettings}{A list containing all the settings used to develop the model}
-#'           \item{model}{ The developed model of class \code{plpModel}}
-#'           \item{executionSummary}{ A list containing the hardward details, R package details and execution time}
-#'           \item{performanceEvaluation}{ Various internal performance metrics in sparse format}
-#'           \item{prediction}{ The plpData cohort table with the predicted risks added as a column (named value)}
-#'           \item{covariateSummary)}{ A characterization of the features for patients with and without the outcome during the time at risk}
-#'           \item{analysisRef}{ A list with details about the analysis}
+#'           \item model The developed model of class \code{plpModel}
+#'           \item executionSummary A list containing the hardward details, R package details and execution time
+#'           \item performanceEvaluation Various internal performance metrics in sparse format
+#'           \item prediction The plpData cohort table with the predicted risks added as a column (named value)
+#'           \item covariateSummary A characterization of the features for patients with and without the outcome during the time at risk
+#'           \item analysisRef A list with details about the analysis
 #'           } 
 #'
 #'

diff --git a/R/Sampling.R b/R/Sampling.R
@@ -22,9 +22,9 @@
 #' Returns an object of class \code{sampleSettings} that specifies the sampling function that will be called and the settings
 #'
 #' @param type              (character) Choice of:  \itemize{
-#'                                         \item{'none'}{ No sampling is applied - this is the default }
-#'                                         \item{'underSample')}{Undersample the non-outcome class to make the data more ballanced}
-#'                                         \item{'overSample'}{Oversample the outcome class by adding in each outcome multiple times}
+#'                                         \item 'none' No sampling is applied - this is the default
+#'                                         \item 'underSample' Undersample the non-outcome class to make the data more ballanced
+#'                                         \item 'overSample' Oversample the outcome class by adding in each outcome multiple times
 #'                                         } 
 #' @param numberOutcomestoNonOutcomes   (numeric) An numeric specifying the require number of non-outcomes per outcome 
 #' @param sampleSeed         (numeric) A seed to use when splitting the data for reproducibility (if not set a random number will be generated)
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,5 @@ compare_versions @@
     docs/*
     _pkgdown.yml
     ^vignettes/articles$
+    ^doc$
+    ^Meta$