diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 7a5e8ac..d22bb32 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, develop] + branches: [main] release: types: [published] workflow_dispatch: diff --git a/DESCRIPTION b/DESCRIPTION index 8fa61a7..cdb11c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: DeepPatientLevelPrediction Type: Package Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model Version: 1.0.0 -Date: 29-08-2022 +Date: 09-10-2022 Authors@R: c( person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")), @@ -23,7 +23,7 @@ Imports: data.table, FeatureExtraction (>= 3.0.0), ParallelLogger (>= 2.0.0), - PatientLevelPrediction, + PatientLevelPrediction (>= 6.0.4), rlang, torch (>= 0.8.0) Suggests: @@ -34,7 +34,7 @@ Suggests: plyr, testthat Remotes: - ohdsi/PatientLevelPrediction@develop, + ohdsi/PatientLevelPrediction, ohdsi/FeatureExtraction, ohdsi/Eunomia RoxygenNote: 7.2.1 diff --git a/NAMESPACE b/NAMESPACE index a102062..a65b67a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ export(Estimator) export(fitEstimator) export(gridCvDeep) export(predictDeepEstimator) +export(setDefaultResNet) +export(setDefaultTransformer) export(setMultiLayerPerceptron) export(setResNet) export(setTransformer) diff --git a/NEWS.md b/NEWS.md index c0bdb3a..08ed504 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,4 +3,5 @@ DeepPatientLevelPrediction 1.0.0 - created an Estimator R6 class to handle the model fitting - Added three non-temporal models. An MLP, a ResNet and a Transformer +- ResNet and Transformer have default versions of hyperparameters - Created tests and documentation for the package diff --git a/R/Estimator-class.R b/R/Estimator-class.R new file mode 100644 index 0000000..33980e1 --- /dev/null +++ b/R/Estimator-class.R @@ -0,0 +1,403 @@ +# @file Estimator-class.R +# +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of DeepPatientLevelPrediction +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Estimator +#' @description +#' A generic R6 class that wraps around a torch nn module and can be used to +#' fit and predict the model defined in that module. +#' @export +Estimator <- R6::R6Class( + classname = "Estimator", + lock_objects = FALSE, + public = list( + #' @description + #' Creates a new estimator + #' @param baseModel The torch nn module to use as model + #' @param modelParameters Parameters to initialize the baseModel + #' @param fitParameters Parameters required for the estimator fitting + #' @param optimizer A torch optimizer to use, default is Adam + #' @param criterion The torch loss function to use, defaults to + #' binary cross entropy with logits + #' @param scheduler learning rate scheduler to use + #' @param device Which device to use for fitting, default is cpu + #' @param patience Patience to use for early stopping + initialize = function(baseModel, + modelParameters, + fitParameters, + optimizer = torch::optim_adam, + criterion = torch::nn_bce_with_logits_loss, + scheduler = torch::lr_reduce_on_plateau, + device = "cpu", + patience = 4) { + self$device <- device + self$model <- do.call(baseModel, modelParameters) + self$modelParameters <- modelParameters + self$fitParameters <- fitParameters + self$epochs <- self$itemOrDefaults(fitParameters, "epochs", 10) + self$learningRate <- self$itemOrDefaults(fitParameters, "learningRate", 1e-3) + self$l2Norm <- self$itemOrDefaults(fitParameters, "weightDecay", 1e-5) + self$batchSize <- self$itemOrDefaults(fitParameters, "batchSize", 1024) + self$prefix <- self$itemOrDefaults(fitParameters, "prefix", self$model$name) + + self$previousEpochs <- self$itemOrDefaults(fitParameters, "previousEpochs", 0) + self$model$to(device = self$device) + + self$optimizer <- optimizer( + params = self$model$parameters, + lr = self$learningRate, + weight_decay = self$l2Norm + ) + self$criterion <- criterion() + + self$scheduler <- scheduler(self$optimizer, + patience = 1, + verbose = FALSE, mode = "max" + ) + + # gradient accumulation is useful when training large numbers where + # you can only fit few samples on the GPU in each batch. + self$gradAccumulationIter <- 1 + + if (!is.null(patience)) { + self$earlyStopper <- EarlyStopping$new(patience = patience) + } else { + self$earlyStopper <- NULL + } + + self$bestScore <- NULL + self$bestEpoch <- NULL + }, + + #' @description fits the estimator + #' @param dataset a torch dataset to use for model fitting + #' @param testDataset a torch dataset to use for early stopping + fit = function(dataset, testDataset) { + valLosses <- c() + valAUCs <- c() + batchIndex <- torch::torch_randperm(length(dataset)) + 1L + batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) + + testBatchIndex <- 1:length(testDataset) + testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / self$batchSize)) + + modelStateDict <- list() + epoch <- list() + times <- list() + learnRates <- list() + for (epochI in 1:self$epochs) { + startTime <- Sys.time() + trainLoss <- self$fitEpoch(dataset, batchIndex) + endTime <- Sys.time() + + # predict on test data + scores <- self$score(testDataset, testBatchIndex) + delta <- endTime - startTime + currentEpoch <- epochI + self$previousEpochs + lr <- self$optimizer$param_groups[[1]]$lr + ParallelLogger::logInfo( + "Epochs: ", currentEpoch, + " | Val AUC: ", round(scores$auc, 3), + " | Val Loss: ", round(scores$loss, 3), + " | Train Loss: ", round(trainLoss, 3), + " | Time: ", round(delta, 3), " ", + units(delta), + " | LR: ", lr + ) + self$scheduler$step(scores$auc) + valLosses <- c(valLosses, scores$loss) + valAUCs <- c(valAUCs, scores$auc) + learnRates <- c(learnRates, lr) + times <- c(times, round(delta, 3)) + if (!is.null(self$earlyStopper)) { + self$earlyStopper$call(scores$auc) + if (self$earlyStopper$improved) { + # here it saves the results to lists rather than files + modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu()) + epoch[[epochI]] <- currentEpoch + } + if (self$earlyStopper$earlyStop) { + ParallelLogger::logInfo("Early stopping, validation AUC stopped improving") + ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta)) + self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates) + return(invisible(self)) + } + } else { + modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu()) + epoch[[epochI]] <- currentEpoch + } + } + ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta)) + self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates) + invisible(self) + }, + + #' @description + #' fits estimator for one epoch (one round through the data) + #' @param dataset torch dataset to use for fitting + #' @param batchIndex indices of batches + fitEpoch = function(dataset, batchIndex) { + trainLosses <- torch::torch_empty(length(batchIndex)) + ix <- 1 + self$model$train() + progressBar <- utils::txtProgressBar(style = 3) + coro::loop(for (b in batchIndex) { + self$optimizer$zero_grad() + batch <- self$batchToDevice(dataset[b]) + out <- self$model(batch[[1]]) + loss <- self$criterion(out, batch[[2]]) + loss$backward() + + self$optimizer$step() + trainLosses[ix] <- loss$detach() + utils::setTxtProgressBar(progressBar, ix / length(batchIndex)) + ix <- ix + 1 + }) + close(progressBar) + trainLosses$mean()$item() + }, + + #' @description + #' calculates loss and auc after training for one epoch + #' @param dataset The torch dataset to use to evaluate loss and auc + #' @param batchIndex Indices of batches in the dataset + #' @return list with average loss and auc in the dataset + score = function(dataset, batchIndex) { + torch::with_no_grad({ + loss <- torch::torch_empty(c(length(batchIndex))) + predictions <- list() + targets <- list() + self$model$eval() + ix <- 1 + coro::loop(for (b in batchIndex) { + batch <- self$batchToDevice(dataset[b]) + pred <- self$model(batch[[1]]) + predictions <- c(predictions, pred) + targets <- c(targets, batch[[2]]) + loss[ix] <- self$criterion(pred, batch[[2]]) + ix <- ix + 1 + }) + mean_loss <- loss$mean()$item() + predictionsClass <- data.frame( + value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())), + outcomeCount = as.matrix(torch::torch_cat(targets)$cpu()) + ) + attr(predictionsClass, "metaData")$modelType <- "binary" + auc <- PatientLevelPrediction::computeAuc(predictionsClass) + }) + return(list(loss = mean_loss, auc = auc)) + }, + + #' @description + #' operations that run when fitting is finished + #' @param valAUCs validation AUC values + #' @param modelStateDict fitted model parameters + #' @param valLosses validation losses + #' @param epoch list of epochs fit + #' @param learnRates learning rate sequence used so far + finishFit = function(valAUCs, modelStateDict, valLosses, epoch, learnRates) { + bestEpochInd <- which.max(valAUCs) # change this if a different metric is used + + bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device = self$device)) + self$model$load_state_dict(bestModelStateDict) + + bestEpoch <- epoch[[bestEpochInd]] + self$bestEpoch <- bestEpoch + self$bestScore <- list( + loss = valLosses[bestEpochInd], + auc = valAUCs[bestEpochInd] + ) + self$learnRateSchedule <- learnRates[1:bestEpochInd] + + ParallelLogger::logInfo("Loaded best model (based on AUC) from epoch ", bestEpoch) + ParallelLogger::logInfo("ValLoss: ", self$bestScore$loss) + ParallelLogger::logInfo("valAUC: ", self$bestScore$auc) + }, + + #' @description + #' Fits whole training set on a specific number of epochs + #' TODO What happens when learning rate changes per epochs? + #' Ideally I would copy the learning rate strategy from before + #' and adjust for different sizes ie more iterations/updates??? + #' @param dataset torch dataset + #' @param learnRates learnRateSchedule from CV + fitWholeTrainingSet = function(dataset, learnRates = NULL) { + if (is.null(self$bestEpoch)) { + self$bestEpoch <- self$epochs + } + + batchIndex <- torch::torch_randperm(length(dataset)) + 1L + batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) + for (epoch in 1:self$bestEpoch) { + self$optimizer$param_groups[[1]]$lr <- learnRates[[epoch]] + self$fitEpoch(dataset, batchIndex) + } + }, + + #' @description + #' save model and those parameters needed to reconstruct it + #' @param path where to save the model + #' @param name name of file + #' @return the path to saved model + save = function(path, name) { + savePath <- file.path(path, name) + torch::torch_save( + list( + modelStateDict = self$model$state_dict(), + modelParameters = self$modelParameters, + fitParameters = self$fitParameters, + epoch = self$epochs + ), + savePath + ) + return(savePath) + }, + + + #' @description + #' predicts and outputs the probabilities + #' @param dataset Torch dataset to create predictions for + #' @return predictions as probabilities + predictProba = function(dataset) { + batchIndex <- 1:length(dataset) + batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) + torch::with_no_grad({ + predictions <- torch::torch_empty(length(dataset), device=self$device) + self$model$eval() + progressBar <- utils::txtProgressBar(style = 3) + ix <- 1 + coro::loop(for (b in batchIndex) { + batch <- self$batchToDevice(dataset[b]) + target <- batch$target + pred <- self$model(batch$batch) + predictions[b] <- torch::torch_sigmoid(pred) + utils::setTxtProgressBar(progressBar, ix / length(batchIndex)) + ix <- ix + 1 + }) + predictions <- as.array(predictions$cpu()) + close(progressBar) + }) + return(predictions) + }, + + #' @description + #' predicts and outputs the class + #' @param dataset A torch dataset to create predictions for + #' @param threshold Which threshold to use for predictions + #' @return The predicted class for the data in the dataset + predict = function(dataset, threshold = NULL) { + predictions <- self$predictProba(dataset) + + if (is.null(threshold)) { + # use outcome rate + threshold <- dataset$target$sum()$item() / length(dataset) + } + predicted_class <- as.integer(predictions > threshold) + return(predicted_class) + }, + + #' @description + #' sends a batch of data to device + #' assumes batch includes lists of tensors to arbitrary nested depths + #' @param batch the batch to send, usually a list of torch tensors + #' @return the batch on the required device + batchToDevice = function(batch) { + if (class(batch)[1] == "torch_tensor") { + batch <- batch$to(device = self$device) + } else { + ix <- 1 + for (b in batch) { + if (class(b)[1] == "torch_tensor") { + b <- b$to(device = self$device) + } else { + b <- self$batchToDevice(b) + } + if (!is.null(b)) { + batch[[ix]] <- b + } + ix <- ix + 1 + } + } + return(batch) + }, + + #' @description + #' select item from list, and if it's null sets a default + #' @param list A list with items + #' @param item Which list item to retrieve + #' @param default The value to return if list doesn't have item + #' @return the list item or default + itemOrDefaults = function(list, item, default = NULL) { + value <- list[[item]] + if (is.null(value)) default else value + } + ) +) + +#' Earlystopping class +#' @description +#' Stops training if a loss or metric has stopped improving +EarlyStopping <- R6::R6Class( + classname = "EarlyStopping", + lock_objects = FALSE, + public = list( + #' @description + #' Creates a new earlystopping object + #' @param patience Stop after this number of epochs if loss doesn't improve + #' @param delta How much does the loss need to improve to count as improvement + #' @param verbose If information should be printed out + #' @return a new earlystopping object + initialize = function(patience = 3, delta = 0, verbose = TRUE) { + self$patience <- patience + self$counter <- 0 + self$verbose <- verbose + self$bestScore <- NULL + self$earlyStop <- FALSE + self$improved <- FALSE + self$delta <- delta + self$previousScore <- 0 + }, + #' @description + #' call the earlystopping object and increment a counter if loss is not + #' improving + #' @param metric the current metric value + call = function(metric) { + score <- metric + if (is.null(self$bestScore)) { + self$bestScore <- score + self$improved <- TRUE + } else if (score < self$bestScore + self$delta) { + self$counter <- self$counter + 1 + self$improved <- FALSE + if (self$verbose) { + ParallelLogger::logInfo( + "EarlyStopping counter: ", self$counter, + " out of ", self$patience + ) + } + if (self$counter >= self$patience) { + self$earlyStop <- TRUE + } + } else { + self$bestScore <- score + self$counter <- 0 + self$improved <- TRUE + } + self$previousScore <- score + } + ) +) diff --git a/R/Estimator.R b/R/Estimator.R index 8abb048..00d78c5 100644 --- a/R/Estimator.R +++ b/R/Estimator.R @@ -27,7 +27,6 @@ #' @param optimizer which optimizer to use #' @param scheduler which learning rate scheduler to use #' @param criterion loss function to use -#' @param posWeight If more weight should be added to positive labels during training - will result in miscalibrated models #' @param earlyStopping If earlyStopping should be used which stops the training of your metric is not improving #' @param earlyStoppingMetric Which parameter to use for early stopping #' @param patience patience for earlyStopper @@ -247,7 +246,6 @@ gridCvDeep <- function(mappedData, ParallelLogger::logInfo(paste0("Fold ", i)) trainDataset <- torch::dataset_subset(dataset, indices = which(fold != i)) testDataset <- torch::dataset_subset(dataset, indices = which(fold == i)) - # fitParams$posWeight <- trainDataset$dataset$posWeight estimator <- Estimator$new( baseModel = baseModel, modelParameters = modelParams, @@ -301,7 +299,6 @@ gridCvDeep <- function(mappedData, fitParams <- finalParam[fitParamNames] fitParams$epochs <- finalParam$learnSchedule$bestEpoch fitParams$batchSize <- batchSize - fitParams$posWeight <- dataset$posWeight # create the dir if (!dir.exists(file.path(modelLocation))) { dir.create(file.path(modelLocation), recursive = T) @@ -350,387 +347,4 @@ gridCvDeep <- function(mappedData, numericalIndex = numericalIndex ) ) -} - -#' Estimator -#' @description -#' A generic R6 class that wraps around a torch nn module and can be used to -#' fit and predict the model defined in that module. -#' @export -Estimator <- R6::R6Class( - classname = "Estimator", - lock_objects = FALSE, - public = list( - #' @description - #' Creates a new estimator - #' @param baseModel The torch nn module to use as model - #' @param modelParameters Parameters to initialize the baseModel - #' @param fitParameters Parameters required for the estimator fitting - #' @param optimizer A torch optimizer to use, default is Adam - #' @param criterion The torch loss function to use, defaults to - #' binary cross entropy with logits - #' @param scheduler learning rate scheduler to use - #' @param device Which device to use for fitting, default is cpu - #' @param patience Patience to use for early stopping - initialize = function(baseModel, - modelParameters, - fitParameters, - optimizer = torch::optim_adam, - criterion = torch::nn_bce_with_logits_loss, - scheduler = torch::lr_reduce_on_plateau, - device = "cpu", - patience = 4) { - self$device <- device - self$model <- do.call(baseModel, modelParameters) - self$modelParameters <- modelParameters - self$fitParameters <- fitParameters - self$epochs <- self$itemOrDefaults(fitParameters, "epochs", 10) - self$learningRate <- self$itemOrDefaults(fitParameters, "learningRate", 1e-3) - self$l2Norm <- self$itemOrDefaults(fitParameters, "weightDecay", 1e-5) - self$batchSize <- self$itemOrDefaults(fitParameters, "batchSize", 1024) - self$posWeight <- self$itemOrDefaults(fitParameters, "posWeight", 1) - self$prefix <- self$itemOrDefaults(fitParameters, "prefix", self$model$name) - - self$previousEpochs <- self$itemOrDefaults(fitParameters, "previousEpochs", 0) - self$model$to(device = self$device) - - self$optimizer <- optimizer( - params = self$model$parameters, - lr = self$learningRate, - weight_decay = self$l2Norm - ) - self$criterion <- criterion(torch::torch_tensor(self$posWeight, - device = self$device - )) - - self$scheduler <- scheduler(self$optimizer, - patience = 1, - verbose = FALSE, mode = "max" - ) - - # gradient accumulation is useful when training large numbers where - # you can only fit few samples on the GPU in each batch. - self$gradAccumulationIter <- 1 - - if (!is.null(patience)) { - self$earlyStopper <- EarlyStopping$new(patience = patience) - } else { - self$earlyStopper <- NULL - } - - self$bestScore <- NULL - self$bestEpoch <- NULL - }, - - #' @description fits the estimator - #' @param dataset a torch dataset to use for model fitting - #' @param testDataset a torch dataset to use for early stopping - fit = function(dataset, testDataset) { - valLosses <- c() - valAUCs <- c() - batchIndex <- torch::torch_randperm(length(dataset)) + 1L - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) - - testBatchIndex <- 1:length(testDataset) - testBatchIndex <- split(testBatchIndex, ceiling(seq_along(testBatchIndex) / self$batchSize)) - - modelStateDict <- list() - epoch <- list() - times <- list() - learnRates <- list() - for (epochI in 1:self$epochs) { - startTime <- Sys.time() - trainLoss <- self$fitEpoch(dataset, batchIndex) - endTime <- Sys.time() - - # predict on test data - scores <- self$score(testDataset, testBatchIndex) - delta <- endTime - startTime - currentEpoch <- epochI + self$previousEpochs - lr <- self$optimizer$param_groups[[1]]$lr - ParallelLogger::logInfo( - "Epochs: ", currentEpoch, - " | Val AUC: ", round(scores$auc, 3), - " | Val Loss: ", round(scores$loss, 3), - " | Train Loss: ", round(trainLoss, 3), - " | Time: ", round(delta, 3), " ", - units(delta), - " | LR: ", lr - ) - self$scheduler$step(scores$auc) - valLosses <- c(valLosses, scores$loss) - valAUCs <- c(valAUCs, scores$auc) - learnRates <- c(learnRates, lr) - times <- c(times, round(delta, 3)) - if (!is.null(self$earlyStopper)) { - self$earlyStopper$call(scores$auc) - if (self$earlyStopper$improved) { - # here it saves the results to lists rather than files - modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu()) - epoch[[epochI]] <- currentEpoch - } - if (self$earlyStopper$earlyStop) { - ParallelLogger::logInfo("Early stopping, validation AUC stopped improving") - ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta)) - self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates) - return(invisible(self)) - } - } else { - modelStateDict[[epochI]] <- lapply(self$model$state_dict(), function(x) x$detach()$cpu()) - epoch[[epochI]] <- currentEpoch - } - } - ParallelLogger::logInfo("Average time per epoch was: ", round(mean(as.numeric(times)), 3), " ", units(delta)) - self$finishFit(valAUCs, modelStateDict, valLosses, epoch, learnRates) - invisible(self) - }, - - #' @description - #' fits estimator for one epoch (one round through the data) - #' @param dataset torch dataset to use for fitting - #' @param batchIndex indices of batches - fitEpoch = function(dataset, batchIndex) { - trainLosses <- torch::torch_empty(length(batchIndex)) - ix <- 1 - self$model$train() - progressBar <- utils::txtProgressBar(style = 3) - coro::loop(for (b in batchIndex) { - self$optimizer$zero_grad() - batch <- self$batchToDevice(dataset[b]) - out <- self$model(batch[[1]]) - loss <- self$criterion(out, batch[[2]]) - loss$backward() - - self$optimizer$step() - trainLosses[ix] <- loss$detach() - utils::setTxtProgressBar(progressBar, ix / length(batchIndex)) - ix <- ix + 1 - }) - close(progressBar) - trainLosses$mean()$item() - }, - - #' @description - #' calculates loss and auc after training for one epoch - #' @param dataset The torch dataset to use to evaluate loss and auc - #' @param batchIndex Indices of batches in the dataset - #' @return list with average loss and auc in the dataset - score = function(dataset, batchIndex) { - torch::with_no_grad({ - loss <- torch::torch_empty(c(length(batchIndex))) - predictions <- list() - targets <- list() - self$model$eval() - ix <- 1 - coro::loop(for (b in batchIndex) { - batch <- self$batchToDevice(dataset[b]) - pred <- self$model(batch[[1]]) - predictions <- c(predictions, pred) - targets <- c(targets, batch[[2]]) - loss[ix] <- self$criterion(pred, batch[[2]]) - ix <- ix + 1 - }) - mean_loss <- loss$mean()$item() - predictionsClass <- data.frame( - value = as.matrix(torch::torch_sigmoid(torch::torch_cat(predictions)$cpu())), - outcomeCount = as.matrix(torch::torch_cat(targets)$cpu()) - ) - attr(predictionsClass, "metaData")$modelType <- "binary" - auc <- PatientLevelPrediction::computeAuc(predictionsClass) - }) - return(list(loss = mean_loss, auc = auc)) - }, - - #' @description - #' operations that run when fitting is finished - #' @param valAUCs validation AUC values - #' @param modelStateDict fitted model parameters - #' @param valLosses validation losses - #' @param epoch list of epochs fit - #' @param learnRates learning rate sequence used so far - finishFit = function(valAUCs, modelStateDict, valLosses, epoch, learnRates) { - bestEpochInd <- which.max(valAUCs) # change this if a different metric is used - - bestModelStateDict <- lapply(modelStateDict[[bestEpochInd]], function(x) x$to(device = self$device)) - self$model$load_state_dict(bestModelStateDict) - - bestEpoch <- epoch[[bestEpochInd]] - self$bestEpoch <- bestEpoch - self$bestScore <- list( - loss = valLosses[bestEpochInd], - auc = valAUCs[bestEpochInd] - ) - self$learnRateSchedule <- learnRates[1:bestEpochInd] - - ParallelLogger::logInfo("Loaded best model (based on AUC) from epoch ", bestEpoch) - ParallelLogger::logInfo("ValLoss: ", self$bestScore$loss) - ParallelLogger::logInfo("valAUC: ", self$bestScore$auc) - }, - - #' @description - #' Fits whole training set on a specific number of epochs - #' TODO What happens when learning rate changes per epochs? - #' Ideally I would copy the learning rate strategy from before - #' and adjust for different sizes ie more iterations/updates??? - #' @param dataset torch dataset - #' @param learnRates learnRateSchedule from CV - fitWholeTrainingSet = function(dataset, learnRates = NULL) { - if (is.null(self$bestEpoch)) { - self$bestEpoch <- self$epochs - } - - batchIndex <- torch::torch_randperm(length(dataset)) + 1L - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) - for (epoch in 1:self$bestEpoch) { - self$optimizer$param_groups[[1]]$lr <- learnRates[[epoch]] - self$fitEpoch(dataset, batchIndex) - } - }, - - #' @description - #' save model and those parameters needed to reconstruct it - #' @param path where to save the model - #' @param name name of file - #' @return the path to saved model - save = function(path, name) { - savePath <- file.path(path, name) - torch::torch_save( - list( - modelStateDict = self$model$state_dict(), - modelParameters = self$modelParameters, - fitParameters = self$fitParameters, - epoch = self$epochs - ), - savePath - ) - return(savePath) - }, - - - #' @description - #' predicts and outputs the probabilities - #' @param dataset Torch dataset to create predictions for - #' @return predictions as probabilities - predictProba = function(dataset) { - batchIndex <- 1:length(dataset) - batchIndex <- split(batchIndex, ceiling(seq_along(batchIndex) / self$batchSize)) - torch::with_no_grad({ - predictions <- c() - self$model$eval() - coro::loop(for (b in batchIndex) { - batch <- self$batchToDevice(dataset[b]) - target <- batch$target - pred <- self$model(batch$batch) - predictions <- c(predictions, as.array(torch::torch_sigmoid(pred$cpu()))) - }) - }) - return(predictions) - }, - - #' @description - #' predicts and outputs the class - #' @param dataset A torch dataset to create predictions for - #' @param threshold Which threshold to use for predictions - #' @return The predicted class for the data in the dataset - predict = function(dataset, threshold = NULL) { - predictions <- self$predictProba(dataset) - - if (is.null(threshold)) { - # use outcome rate - threshold <- dataset$target$sum()$item() / length(dataset) - } - predicted_class <- as.integer(predictions > threshold) - return(predicted_class) - }, - - #' @description - #' sends a batch of data to device - #' assumes batch includes lists of tensors to arbitrary nested depths - #' @param batch the batch to send, usually a list of torch tensors - #' @return the batch on the required device - batchToDevice = function(batch) { - if (class(batch)[1] == "torch_tensor") { - batch <- batch$to(device = self$device) - } else { - ix <- 1 - for (b in batch) { - if (class(b)[1] == "torch_tensor") { - b <- b$to(device = self$device) - } else { - b <- self$batchToDevice(b) - } - if (!is.null(b)) { - batch[[ix]] <- b - } - ix <- ix + 1 - } - } - return(batch) - }, - - #' @description - #' select item from list, and if it's null sets a default - #' @param list A list with items - #' @param item Which list item to retrieve - #' @param default The value to return if list doesn't have item - #' @return the list item or default - itemOrDefaults = function(list, item, default = NULL) { - value <- list[[item]] - if (is.null(value)) default else value - } - ) -) - -#' Earlystopping class -#' @description -#' Stops training if a loss or metric has stopped improving -EarlyStopping <- R6::R6Class( - classname = "EarlyStopping", - lock_objects = FALSE, - public = list( - #' @description - #' Creates a new earlystopping object - #' @param patience Stop after this number of epochs if loss doesn't improve - #' @param delta How much does the loss need to improve to count as improvement - #' @param verbose If information should be printed out - #' @return a new earlystopping object - initialize = function(patience = 3, delta = 0, verbose = TRUE) { - self$patience <- patience - self$counter <- 0 - self$verbose <- verbose - self$bestScore <- NULL - self$earlyStop <- FALSE - self$improved <- FALSE - self$delta <- delta - self$previousScore <- 0 - }, - #' @description - #' call the earlystopping object and increment a counter if loss is not - #' improving - #' @param metric the current metric value - call = function(metric) { - score <- metric - if (is.null(self$bestScore)) { - self$bestScore <- score - self$improved <- TRUE - } else if (score < self$bestScore + self$delta) { - self$counter <- self$counter + 1 - self$improved <- FALSE - if (self$verbose) { - ParallelLogger::logInfo( - "EarlyStopping counter: ", self$counter, - " out of ", self$patience - ) - } - if (self$counter >= self$patience) { - self$earlyStop <- TRUE - } - } else { - self$bestScore <- score - self$counter <- 0 - self$improved <- TRUE - } - self$previousScore <- score - } - ) -) +} \ No newline at end of file diff --git a/R/MLP.R b/R/MLP.R index dc5f56a..7df166a 100644 --- a/R/MLP.R +++ b/R/MLP.R @@ -66,7 +66,12 @@ setMultiLayerPerceptron <- function(numLayers = c(1:8), ) param <- PatientLevelPrediction::listCartesian(paramGrid) - + if (randomSample>length(param)) { + stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", + "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param), + "\n Please lower the amount of randomSamples")) + } + if (hyperParamSearch == "random") { param <- param[sample(length(param), randomSample)] } diff --git a/R/ResNet.R b/R/ResNet.R index e8ce322..d0364a4 100644 --- a/R/ResNet.R +++ b/R/ResNet.R @@ -16,6 +16,44 @@ # See the License for the specific language governing permissions and # limitations under the License. +#' setDefaultResNet +#' +#' @description +#' Creates settings for a default ResNet model +#' +#' @details +#' Model architecture from by https://arxiv.org/abs/2106.11959 . +#' Hyperparameters chosen by a experience on a few prediction problems. +#' +#' @param device Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu' +#' @param batchSize Size of batch, default: 1024 +#' @param epochs Number of epochs to run, default: 10 +#' @param seed Random seed to use + +#' @export +setDefaultResNet <- function(device='cpu', + batchSize=1024, + epochs=10, + seed=NULL) { + + resnetSettings <- setResNet(numLayers = 6, + sizeHidden = 512, + hiddenFactor = 2, + residualDropout = 0.1, + hiddenDropout = 0.4, + sizeEmbedding = 256, + weightDecay = 1e-6, + learningRate = 0.01, + hyperParamSearch = 'random', + randomSample = 1, + device = device, + batchSize = batchSize, + seed = seed) + attr(resnetSettings, 'settings')$name <- 'defaultResnet' + return(resnetSettings) +} + + #' setResNet #' #' @description @@ -42,7 +80,7 @@ #' #' @export setResNet <- function(numLayers = c(1:8), - sizeHidden = c(2^(6:9)), + sizeHidden = c(2^(6:10)), hiddenFactor = c(1:4), residualDropout = c(seq(0, 0.5, 0.05)), hiddenDropout = c(seq(0, 0.5, 0.05)), @@ -73,6 +111,12 @@ setResNet <- function(numLayers = c(1:8), param <- PatientLevelPrediction::listCartesian(paramGrid) + if (randomSample>length(param)) { + stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", + "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param), + "\n Please lower the amount of randomSamples")) + } + if (hyperParamSearch == "random") { param <- param[sample(length(param), randomSample)] } @@ -106,14 +150,17 @@ ResNet <- torch::nn_module( initialize = function(catFeatures, numFeatures = 0, sizeEmbedding, sizeHidden, numLayers, hiddenFactor, activation = torch::nn_relu, normalization = torch::nn_batch_norm1d, hiddenDropout = NULL, - residualDropout = NULL, d_out = 1) { + residualDropout = NULL, d_out = 1, concatNum=TRUE) { self$embedding <- torch::nn_embedding_bag( num_embeddings = catFeatures + 1, embedding_dim = sizeEmbedding, padding_idx = 1 ) - if (numFeatures != 0) { + if (numFeatures != 0 & concatNum != TRUE) { self$numEmbedding <- numericalEmbedding(numFeatures, sizeEmbedding) + } else { + self$numEmbedding <- NULL + sizeEmbedding <- sizeEmbedding + numFeatures } self$first_layer <- torch::nn_linear(sizeEmbedding, sizeHidden) @@ -140,9 +187,10 @@ ResNet <- torch::nn_module( x_cat <- x$cat x_num <- x$num x_cat <- self$embedding(x_cat + 1L) # padding_idx is 1 - if (!is.null(x_num)) { + if (!is.null(x_num) & (!is.null(self$numEmbedding))) { x <- (x_cat + self$numEmbedding(x_num)$mean(dim = 2)) / 2 - # x <- torch::torch_cat(list(x_cat, x_num), dim = 2L) + } else if (!is.null(x_num) & is.null(self$numEmbedding)) { + x <- torch::torch_cat(list(x_cat, x_num), dim = 2L) } else { x <- x_cat } diff --git a/R/Transformer.R b/R/Transformer.R index 0ab7227..ea353ca 100644 --- a/R/Transformer.R +++ b/R/Transformer.R @@ -1,3 +1,56 @@ +# @file Transformer.R +# +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of DeepPatientLevelPrediction +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Create default settings for a non-temporal transformer +#' +#' @description A transformer model with default hyperparameters +#' @details from https://arxiv.org/abs/2106.11959 +#' Default hyperparameters from paper +#' @param device Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu' +#' @param batchSize Size of batch, default: 512 +#' @param epochs Number of epochs to run, default: 10 +#' @param seed random seed to use +#' +#' @export +setDefaultTransformer <- function(device='cpu', + batchSize=512, + epochs=10, + seed=NULL) { + transformerSettings <- setTransformer(numBlocks = 3, + dimToken = 192, + dimOut = 1, + numHeads = 8, + attDropout = 0.2, + ffnDropout = 0.1, + resDropout = 0.0, + dimHidden = 256, + weightDecay = 1e-5, + learningRate = 1e-4, + batchSize = 512, + epochs = 30, + device = device, + hyperParamSearch = 'random', + randomSample = 1, + seed = seed) + attr(transformerSettings, 'settings')$name <- 'defaultTransformer' + return(transformerSettings) +} + #' create settings for training a non-temporal transformer #' #' @description A transformer model @@ -17,7 +70,7 @@ #' @param epochs How many epochs to run the model for #' @param device Which device to use, cpu or cuda #' @param hyperParamSearch what kind of hyperparameter search to do, default 'random' -#' @param randomSamples How many samples to use in hyperparameter search if random +#' @param randomSample How many samples to use in hyperparameter search if random #' @param seed Random seed to use #' #' @export @@ -26,7 +79,7 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1, resDropout = 0, dimHidden = 512, weightDecay = 1e-6, learningRate = 3e-4, batchSize = 1024, epochs = 10, device = "cpu", hyperParamSearch = "random", - randomSamples = 100, seed = NULL) { + randomSample = 1, seed = NULL) { if (is.null(seed)) { seed <- as.integer(sample(1e5, 1)) } @@ -54,8 +107,14 @@ setTransformer <- function(numBlocks = 3, dimToken = 96, dimOut = 1, param <- PatientLevelPrediction::listCartesian(paramGrid) + if (randomSample>length(param)) { + stop(paste("\n Chosen amount of randomSamples is higher than the amount of possible hyperparameter combinations.", + "\n randomSample:", randomSample,"\n Possible hyperparameter combinations:", length(param), + "\n Please lower the amount of randomSample")) + } + if (hyperParamSearch == "random") { - param <- param[sample(length(param), randomSamples)] + param <- param[sample(length(param), randomSample)] } attr(param, "settings") <- list( diff --git a/man/EarlyStopping.Rd b/man/EarlyStopping.Rd index 1087705..0baf50a 100644 --- a/man/EarlyStopping.Rd +++ b/man/EarlyStopping.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Estimator.R +% Please edit documentation in R/Estimator-class.R \name{EarlyStopping} \alias{EarlyStopping} \title{Earlystopping class} diff --git a/man/Estimator.Rd b/man/Estimator.Rd index a6fc4bc..e825cf9 100644 --- a/man/Estimator.Rd +++ b/man/Estimator.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Estimator.R +% Please edit documentation in R/Estimator-class.R \name{Estimator} \alias{Estimator} \title{Estimator} diff --git a/man/setDefaultResNet.Rd b/man/setDefaultResNet.Rd new file mode 100644 index 0000000..28c29df --- /dev/null +++ b/man/setDefaultResNet.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ResNet.R +\name{setDefaultResNet} +\alias{setDefaultResNet} +\title{setDefaultResNet} +\usage{ +setDefaultResNet(device = "cpu", batchSize = 1024, epochs = 10, seed = NULL) +} +\arguments{ +\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'} + +\item{batchSize}{Size of batch, default: 1024} + +\item{epochs}{Number of epochs to run, default: 10} + +\item{seed}{Random seed to use} +} +\description{ +Creates settings for a default ResNet model +} +\details{ +Model architecture from by https://arxiv.org/abs/2106.11959 . +Hyperparameters chosen by a experience on a few prediction problems. +} diff --git a/man/setDefaultTransformer.Rd b/man/setDefaultTransformer.Rd new file mode 100644 index 0000000..d3b54fb --- /dev/null +++ b/man/setDefaultTransformer.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Transformer.R +\name{setDefaultTransformer} +\alias{setDefaultTransformer} +\title{Create default settings for a non-temporal transformer} +\usage{ +setDefaultTransformer( + device = "cpu", + batchSize = 512, + epochs = 10, + seed = NULL +) +} +\arguments{ +\item{device}{Which device to run analysis on, either 'cpu' or 'cuda', default: 'cpu'} + +\item{batchSize}{Size of batch, default: 512} + +\item{epochs}{Number of epochs to run, default: 10} + +\item{seed}{random seed to use} +} +\description{ +A transformer model with default hyperparameters +} +\details{ +from https://arxiv.org/abs/2106.11959 +Default hyperparameters from paper +} diff --git a/man/setEstimator.Rd b/man/setEstimator.Rd index d3001e3..50919af 100644 --- a/man/setEstimator.Rd +++ b/man/setEstimator.Rd @@ -14,8 +14,6 @@ \item{criterion}{loss function to use} -\item{posWeight}{If more weight should be added to positive labels during training - will result in miscalibrated models} - \item{earlyStopping}{If earlyStopping should be used which stops the training of your metric is not improving} \item{earlyStoppingMetric}{Which parameter to use for early stopping} diff --git a/man/setResNet.Rd b/man/setResNet.Rd index d0ced0d..5f66d45 100644 --- a/man/setResNet.Rd +++ b/man/setResNet.Rd @@ -6,7 +6,7 @@ \usage{ setResNet( numLayers = c(1:8), - sizeHidden = c(2^(6:9)), + sizeHidden = c(2^(6:10)), hiddenFactor = c(1:4), residualDropout = c(seq(0, 0.5, 0.05)), hiddenDropout = c(seq(0, 0.5, 0.05)), diff --git a/man/setTransformer.Rd b/man/setTransformer.Rd index 54927b0..f46a66a 100644 --- a/man/setTransformer.Rd +++ b/man/setTransformer.Rd @@ -19,7 +19,7 @@ setTransformer( epochs = 10, device = "cpu", hyperParamSearch = "random", - randomSamples = 100, + randomSample = 1, seed = NULL ) } @@ -52,7 +52,7 @@ setTransformer( \item{hyperParamSearch}{what kind of hyperparameter search to do, default 'random'} -\item{randomSamples}{How many samples to use in hyperparameter search if random} +\item{randomSample}{How many samples to use in hyperparameter search if random} \item{seed}{Random seed to use} } diff --git a/tests/testthat/test-MLP.R b/tests/testthat/test-MLP.R index 3bc0d8f..97d7103 100644 --- a/tests/testthat/test-MLP.R +++ b/tests/testthat/test-MLP.R @@ -108,3 +108,20 @@ test_that("MLP nn-module works ", { # model works without numeric variables expect_equal(output$shape, 10) }) + + +test_that("Errors are produced by settings function", { + randomSample <- 2 + + expect_error(setMultiLayerPerceptron( + numLayers = 1, + sizeHidden = 128, + dropout = 0.0, + sizeEmbedding = 128, + weightDecay = 1e-6, + learningRate = 0.01, + seed = 42, + hyperParamSearch = 'random', + randomSample = randomSample)) + +}) \ No newline at end of file diff --git a/tests/testthat/test-ResNet.R b/tests/testthat/test-ResNet.R index cb9a397..8f5a12f 100644 --- a/tests/testthat/test-ResNet.R +++ b/tests/testthat/test-ResNet.R @@ -87,7 +87,7 @@ test_that("ResNet nn-module works ", { pars <- sum(sapply(model$parameters, function(x) prod(x$shape))) # expected number of parameters - expect_equal(pars, 1289) + expect_equal(pars, 1295) input <- list() input$cat <- torch::torch_randint(0, 5, c(10, 5), dtype = torch::torch_long()) @@ -111,3 +111,33 @@ test_that("ResNet nn-module works ", { # model works without numeric variables expect_equal(output$shape, 10) }) + +test_that("Default Resnet works", { + defaultResNet <- setDefaultResNet() + params <- defaultResNet$param[[1]] + + expect_equal(params$numLayers, 6) + expect_equal(params$sizeHidden, 512) + expect_equal(params$hiddenFactor, 2) + expect_equal(params$residualDropout, 0.1) + expect_equal(params$hiddenDropout, 0.4) + expect_equal(params$sizeEmbedding, 256) + +}) + +test_that("Errors are produced by settings function", { + randomSample <- 2 + + expect_error(setResNet( + numLayers = 1, + sizeHidden = 128, + hiddenFactor = 1, + residualDropout = 0.0, + hiddenDropout = 0.0, + sizeEmbedding = 128, + weightDecay = 1e-6, + learningRate = 0.01, + seed = 42, + hyperParamSearch = 'random', + randomSample = randomSample)) +}) diff --git a/tests/testthat/test-Transformer.R b/tests/testthat/test-Transformer.R index 913e687..228e881 100644 --- a/tests/testthat/test-Transformer.R +++ b/tests/testthat/test-Transformer.R @@ -2,7 +2,7 @@ settings <- setTransformer( numBlocks = 1, dimToken = 8, dimOut = 1, numHeads = 2, attDropout = 0.0, ffnDropout = 0.2, resDropout = 0.0, dimHidden = 32, batchSize = 64, - epochs = 1, randomSamples = 1 + epochs = 1, randomSample = 1 ) test_that("Transformer settings work", { @@ -59,3 +59,24 @@ test_that("transformer nn-module works", { output <- model(input) expect_equal(output$shape, 10) }) + +test_that("Default Transformer works", { + defaultTransformer <- setDefaultTransformer() + params <- defaultTransformer$param[[1]] + + expect_equal(params$numBlocks, 3) + expect_equal(params$dimToken, 192) + expect_equal(params$numHeads, 8) + expect_equal(params$resDropout, 0.0) + expect_equal(params$attDropout, 0.2) + + settings <- attr(defaultTransformer, 'settings') + + expect_equal(settings$name, 'defaultTransformer') +}) + +test_that("Errors are produced by settings function", { + randomSample <- 2 + + expect_error(setTransformer(randomSample = randomSample)) +}) diff --git a/vignettes/BuildingDeepModels.Rmd b/vignettes/BuildingDeepModels.Rmd index 8ac9925..41ee944 100644 --- a/vignettes/BuildingDeepModels.Rmd +++ b/vignettes/BuildingDeepModels.Rmd @@ -454,7 +454,7 @@ modelSettings <- setTransformer(numBlocks = 3, batchSize = 128, epochs = 10, device = 'cpu', # or 'cuda' for GPU - randomSamples = 1) + randomSample = 1) TransformerResult <- PatientLevelPrediction::runPlp(