From 7bddc3871bf4385241a0db892956e72c47ffa655 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Mon, 18 Nov 2024 21:09:33 +0100
Subject: [PATCH] docs

---
 NAMESPACE                          |  1 +
 man/createDefaultSplitSetting.Rd   | 42 +++++++++++++++++++-----------
 man/createExistingSplitSettings.Rd | 23 ++++++++++++++++
 man/splitData.Rd                   | 36 ++++++++++++++++---------
 4 files changed, 74 insertions(+), 28 deletions(-)
 create mode 100644 man/createExistingSplitSettings.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 5ce71cbba..ad98ff62d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -22,6 +22,7 @@ export(createDatabaseSchemaSettings)
 export(createDefaultExecuteSettings)
 export(createDefaultSplitSetting)
 export(createExecuteSettings)
+export(createExistingSplitSettings)
 export(createFeatureEngineeringSettings)
 export(createGlmModel)
 export(createLearningCurve)
diff --git a/man/createDefaultSplitSetting.Rd b/man/createDefaultSplitSetting.Rd
index b9020bd45..e63e74375 100644
--- a/man/createDefaultSplitSetting.Rd
+++ b/man/createDefaultSplitSetting.Rd
@@ -2,8 +2,9 @@
 % Please edit documentation in R/DataSplitting.R
 \name{createDefaultSplitSetting}
 \alias{createDefaultSplitSetting}
-\title{Create the settings for defining how the plpData are split into test/validation/train sets using 
-default splitting functions (either random stratified by outcome, time or subject splitting)}
+\title{Create the settings for defining how the plpData are split into
+test/validation/train sets using default splitting functions 
+(either random stratified by outcome, time or subject splitting)}
 \usage{
 createDefaultSplitSetting(
   testFraction = 0.25,
@@ -14,28 +15,39 @@ createDefaultSplitSetting(
 )
 }
 \arguments{
-\item{testFraction}{(numeric) A real number between 0 and 1 indicating the test set fraction of the data}
+\item{testFraction}{(numeric) A real number between 0 and 1
+indicating the test set fraction of the data}
 
-\item{trainFraction}{(numeric) A real number between 0 and 1 indicating the train set fraction of the data.
-If not set train is equal to 1 - test}
+\item{trainFraction}{(numeric) A real number between 0 and 1 indicating the 
+train set fraction of the data. If not set train is equal to 1 - test}
 
-\item{splitSeed}{(numeric) A seed to use when splitting the data for reproducibility (if not set a random number will be generated)}
+\item{splitSeed}{(numeric) A seed to use when splitting the data for 
+reproducibility (if not set a random number will be generated)}
 
-\item{nfold}{(numeric) An integer > 1 specifying the number of folds used in cross validation}
+\item{nfold}{(numeric) An integer > 1 specifying the number of
+folds used in cross validation}
 
-\item{type}{(character) Choice of:  \itemize{
-\item'stratified' Each data point is randomly assigned into the test or a train fold set but this is done stratified such that the outcome rate is consistent in each partition 
-\item'time' Older data are assigned into the training set and newer data are assigned into the test set
-\item'subject' Data are partitioned by subject, if a subject is in the data more than once, all the data points for the subject are assigned either into the test data or into the train data (not both).
-}}
+\item{type}{(character) Choice of: \itemize{
+                                     \item'stratified' Each data point is 
+randomly assigned into the test or a train fold set but this is done 
+stratified such that the outcome rate is consistent in each partition
+                                     \item'time' Older data are assigned 
+into the training set and newer data are assigned into the test set
+                                     \item'subject' Data are partitioned by
+subject, if a subject is in the data more than once, all the data points for 
+the subject are assigned either into the test data or into the train data 
+(not both).
+                                        }}
 }
 \value{
 An object of class \code{splitSettings}
 }
 \description{
-Create the settings for defining how the plpData are split into test/validation/train sets using 
-default splitting functions (either random stratified by outcome, time or subject splitting)
+Create the settings for defining how the plpData are split into
+test/validation/train sets using default splitting functions 
+(either random stratified by outcome, time or subject splitting)
 }
 \details{
-Returns an object of class \code{splitSettings} that specifies the splitting function that will be called and the settings
+Returns an object of class \code{splitSettings} that specifies the 
+splitting function that will be called and the settings
 }
diff --git a/man/createExistingSplitSettings.Rd b/man/createExistingSplitSettings.Rd
new file mode 100644
index 000000000..18ffda495
--- /dev/null
+++ b/man/createExistingSplitSettings.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataSplitting.R
+\name{createExistingSplitSettings}
+\alias{createExistingSplitSettings}
+\title{Create the settings for defining how the plpData are split into
+test/validation/train sets using an existing split - good to use for 
+reproducing results from a different run}
+\usage{
+createExistingSplitSettings(splitIds)
+}
+\arguments{
+\item{splitIds}{(data.frame) A data frame with rowId and index columns of 
+type integer/numeric. Index is -1 for test set, positive integer for train 
+set folds}
+}
+\value{
+An object of class \code{splitSettings}
+}
+\description{
+Create the settings for defining how the plpData are split into
+test/validation/train sets using an existing split - good to use for 
+reproducing results from a different run
+}
diff --git a/man/splitData.Rd b/man/splitData.Rd
index 4c03f61b8..a4368cf4e 100644
--- a/man/splitData.Rd
+++ b/man/splitData.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/DataSplitting.R
 \name{splitData}
 \alias{splitData}
-\title{Split the plpData into test/train sets using a splitting settings of class \code{splitSettings}}
+\title{Split the plpData into test/train sets using a splitting settings of class 
+\code{splitSettings}}
 \usage{
 splitData(
   plpData = plpData,
@@ -11,29 +12,38 @@ splitData(
 )
 }
 \arguments{
-\item{plpData}{An object of type \code{plpData} - the patient level prediction
-data extracted from the CDM.}
+\item{plpData}{An object of type \code{plpData} - the patient level 
+prediction data extracted from the CDM.}
 
-\item{population}{The population created using \code{createStudyPopulation} that define who will be used to develop the model}
+\item{population}{The population created using \code{createStudyPopulation} 
+that define who will be used to develop the model}
 
-\item{splitSettings}{An object of type \code{splitSettings} specifying the split - the default can be created using \code{createDefaultSplitSetting}}
+\item{splitSettings}{An object of type \code{splitSettings} specifying the 
+split - the default can be created using \code{createDefaultSplitSetting}}
 }
 \value{
 An object of class \code{splitSettings}
 }
 \description{
-Split the plpData into test/train sets using a splitting settings of class \code{splitSettings}
+Split the plpData into test/train sets using a splitting settings of class 
+\code{splitSettings}
 }
 \details{
-Returns a list containing the training data (Train) and optionally the test data (Test).  Train is an Andromeda object containing
-\itemize{\item covariates: a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the train data
+Returns a list containing the training data (Train) and optionally the test
+data (Test). Train is an Andromeda object containing
+\itemize{\item covariates: a table (rowId, covariateId, covariateValue)
+containing the covariates for each data point in the train data
          \item covariateRef: a table with the covariate information
-         \item labels: a table (rowId, outcomeCount, ...) for each data point in the train data (outcomeCount is the class label) 
-         \item folds: a table (rowId, index) specifying which training fold each data point is in.
-         } 
+         \item labels: a table (rowId, outcomeCount, ...) for each data point
+in the train data (outcomeCount is the class label)
+         \item folds: a table (rowId, index) specifying which training 
+fold each data point is in.
+         }
 Test is an Andromeda object containing
-\itemize{\item covariates: a table (rowId, covariateId, covariateValue) containing the covariates for each data point in the test data 
+\itemize{\item covariates: a table (rowId, covariateId, covariateValue)
+containing the covariates for each data point in the test data
          \item covariateRef: a table with the covariate information
-         \item labels: a table (rowId, outcomeCount, ...) for each data point in the test data (outcomeCount is the class label) 
+         \item labels: a table (rowId, outcomeCount, ...) for each data
+point in the test data (outcomeCount is the class label)
          }
 }