From 91d64405321ec5a7b9036b4b599e254a02fbe8ba Mon Sep 17 00:00:00 2001 From: egillax Date: Tue, 17 Dec 2024 09:13:29 +0100 Subject: [PATCH] move mgcv to suggests, guard mgcv use behing check_installed, lint smoothCalibration --- DESCRIPTION | 2 +- R/Plotting.R | 66 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a45cfa47..7d2f0bed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,6 @@ Imports: gridExtra, Matrix, memuse, - mgcv, ParallelLogger (>= 2.0.0), polspline, pROC, @@ -54,6 +53,7 @@ Suggests: knitr, markdown, Metrics, + mgcv, parallel, plyr, pool, diff --git a/R/Plotting.R b/R/Plotting.R index d2ed49d4..df6e7a1d 100644 --- a/R/Plotting.R +++ b/R/Plotting.R @@ -972,14 +972,23 @@ plotSmoothCalibration <- function(plpResult, scatter = FALSE, bins = 20, sample = TRUE, - typeColumn = 'evaluation', + typeColumn = "evaluation", saveLocation = NULL, fileName = "smoothCalibration.pdf") { - if (!smooth %in% c("loess", "rcs")) stop(ParallelLogger::logError("Smooth type must be either 'loess' or 'rcs")) - if (nKnots < 3) stop(ParallelLogger::logError("Number of knots must be larger than 3")) + if (!smooth %in% c("loess", "rcs")) { + stop(ParallelLogger::logError("Smooth type must be either 'loess' or 'rcs")) + } + if (nKnots < 3) { + stop(ParallelLogger::logError("Number of knots must be larger than 3")) + } + if (smooth == "rcs") { + rlang::check_installed("mgcv", + reason = "mgcv is required for restricted cubic spline smoothing") + } - evalTypes <- unique(plpResult$performanceEvaluation$calibrationSummary[,typeColumn]) + evalTypes <- + unique(plpResult$performanceEvaluation$calibrationSummary[, typeColumn]) plots <- list() length(plots) <- length(evalTypes) @@ -989,7 +998,7 @@ plotSmoothCalibration <- function(plpResult, evalType <- evalTypes[i] ParallelLogger::logInfo(paste("Smooth calibration plot for "), evalType) - if('prediction'%in%names(plpResult)) { + if ("prediction" %in% names(plpResult)) { x <- plpResult$performanceEvaluation$calibrationSummary %>% dplyr::filter(.data[[typeColumn]] == evalType) %>% dplyr::select("averagePredictedProbability", "observedIncidence") @@ -1015,7 +1024,7 @@ plotSmoothCalibration <- function(plpResult, y <- y[nma] p <- p[nma] - logit <- log(p/(1 - p)) # delete cases with 0 and 1 probs + logit <- log(p / (1 - p)) # delete cases with 0 and 1 probs nonInf <- !is.infinite(logit) sumNonInf <- sum(!nonInf) if (sumNonInf > 0) @@ -1027,13 +1036,13 @@ plotSmoothCalibration <- function(plpResult, p <- p[order(p)] if (smooth == "loess") { - if(sample){ - if(length(p)>40000){ - inds <- unique(c(0,seq(0, length(p), by = floor(length(p)/20000)), - length(p))) + if (sample) { + if (length(p) > 40000) { + inds <- unique(c(0, seq(0, length(p), + by = floor(length(p) / 20000)), length(p))) p <- p[inds] y <- y[inds] - } else if(length(p)>20000){ + } else if (length(p) > 20000) { inds <- sample(length(p), 20000) p <- p[inds] y <- y[inds] @@ -1041,7 +1050,6 @@ plotSmoothCalibration <- function(plpResult, } # loess smoothData <- data.frame(y, p) - # xlim <- ylim <- c(0, 1) smoothPlot <- plotSmoothCalibrationLoess(data = smoothData, span = span) + ggplot2::coord_cartesian( xlim = c(0, maxes), @@ -1130,15 +1138,19 @@ plotSmoothCalibration <- function(plpResult, # Histogram object detailing the distibution of event/noevent for each probability interval - popData1 <- sparsePred[, c('averagePredictedProbability', 'PersonCountWithOutcome')] + popData1 <- sparsePred[, c("averagePredictedProbability", "PersonCountWithOutcome")] popData1$Label <- "Outcome" - colnames(popData1) <- c('averagePredictedProbability', 'PersonCount', "Label") - popData2 <- sparsePred[,c('averagePredictedProbability', 'PersonCountAtRisk')] + colnames(popData1) <- c("averagePredictedProbability", "PersonCount", + "Label") + popData2 <- sparsePred[, c("averagePredictedProbability", + "PersonCountAtRisk")] popData2$Label <- "No Outcome" popData2$PersonCountAtRisk <- -1 * (popData2$PersonCountAtRisk - popData1$PersonCount) - colnames(popData2) <- c('averagePredictedProbability', 'PersonCount', "Label") + colnames(popData2) <- c("averagePredictedProbability", "PersonCount", + "Label") popData <- rbind(popData1, popData2) - popData$averagePredictedProbability <- factor(popData$averagePredictedProbability) + popData$averagePredictedProbability <- + factor(popData$averagePredictedProbability) histPlot <- ggplot2::ggplot( data = popData, ggplot2::aes( @@ -1148,20 +1160,20 @@ plotSmoothCalibration <- function(plpResult, ) ) + ggplot2::geom_bar( - data = popData[popData$Label == "Outcome",], + data = popData[popData$Label == "Outcome", ], stat = "identity" ) + ggplot2::geom_bar( - data = popData[popData$Label == "No Outcome",], + data = popData[popData$Label == "No Outcome", ], stat = "identity" ) + ggplot2::geom_bar(stat = "identity") + ggplot2::scale_x_continuous(labels = abs) + - ggplot2::coord_flip( ) + + ggplot2::coord_flip() + ggplot2::theme( - axis.title.x=ggplot2::element_blank(), - axis.text.x=ggplot2::element_blank(), - axis.ticks.x=ggplot2::element_blank() + axis.title.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank() ) } @@ -1174,8 +1186,8 @@ plotSmoothCalibration <- function(plpResult, names(plots) <- tolower(evalTypes) if (!is.null(saveLocation)) { - if(!dir.exists(saveLocation)) { - dir.create(saveLocation, recursive = T) + if (!dir.exists(saveLocation)) { + dir.create(saveLocation, recursive = TRUE) } for (i in seq_along(evalTypes)) { if (!failedEvalType[i]) { @@ -1184,7 +1196,7 @@ plotSmoothCalibration <- function(plpResult, plots[[i]]$histPlot, ncol = 1, nrow = 2, - heights=c(2,1) + heights = c(2, 1) ) fileNameComponents <- unlist(strsplit(fileName, split = "\\.")) n <- length(fileNameComponents) @@ -1194,7 +1206,7 @@ plotSmoothCalibration <- function(plpResult, collapse = "." ) } else { - actualFileName = fileNameComponents[1] + actualFileName <- fileNameComponents[1] } saveFileName <- paste0( actualFileName,