From e18ff08d4aa0ea863b7c68269e62ae9dc3d8f1ef Mon Sep 17 00:00:00 2001 From: egillax Date: Tue, 22 Oct 2024 11:45:06 +0200 Subject: [PATCH 1/3] Add net benefit plot and tests --- man/PatientLevelPrediction.Rd | 4 ++-- man/getNetBenefit.Rd | 20 +++++++++++++++++++ man/plotNetBenefit.Rd | 37 +++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 man/getNetBenefit.Rd create mode 100644 man/plotNetBenefit.Rd diff --git a/man/PatientLevelPrediction.Rd b/man/PatientLevelPrediction.Rd index 8bc15fc71..4946949d9 100644 --- a/man/PatientLevelPrediction.Rd +++ b/man/PatientLevelPrediction.Rd @@ -18,15 +18,15 @@ Useful links: } \author{ -\strong{Maintainer}: Jenna Reps \email{jreps@its.jnj.com} +\strong{Maintainer}: Egill Fridgeirsson \email{e.fridgeirsson@erasmusmc.nl} Authors: \itemize{ + \item Jenna Reps \email{jreps@its.jnj.com} \item Martijn Schuemie \item Marc Suchard \item Patrick Ryan \item Peter Rijnbeek - \item Egill Fridgeirsson } } diff --git a/man/getNetBenefit.Rd b/man/getNetBenefit.Rd new file mode 100644 index 000000000..af758fa79 --- /dev/null +++ b/man/getNetBenefit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Plotting.R +\name{getNetBenefit} +\alias{getNetBenefit} +\title{Calculate net benefit in an efficient way} +\usage{ +getNetBenefit(plpResult, evalType) +} +\arguments{ +\item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} + +\item{evalType}{The evaluation type column} +} +\value{ +A data frame with the net benefit, treat all and treat none values +} +\description{ +Calculate net benefit in an efficient way +} +\keyword{internal} diff --git a/man/plotNetBenefit.Rd b/man/plotNetBenefit.Rd new file mode 100644 index 000000000..35722350b --- /dev/null +++ b/man/plotNetBenefit.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Plotting.R +\name{plotNetBenefit} +\alias{plotNetBenefit} +\title{Plot the net benefit} +\usage{ +plotNetBenefit( + plpResult, + typeColumn = "evaluation", + saveLocation = NULL, + fileName = "netBenefit.png", + evalType = NULL, + ylim = NULL, + xlim = NULL +) +} +\arguments{ +\item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} + +\item{typeColumn}{The name of the column specifying the evaluation type} + +\item{saveLocation}{Directory to save plot (if NULL plot is not saved)} + +\item{fileName}{Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.} + +\item{evalType}{Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted} + +\item{ylim}{The y limits for the plot, if NULL the limits are calculated from the data} + +\item{xlim}{The x limits for the plot, if NULL the limits are calculated from the data} +} +\value{ +A list of ggplot objects +} +\description{ +Plot the net benefit +} From 759d7df51d983f8eaf9dfdaa1350163ce8247400 Mon Sep 17 00:00:00 2001 From: egillax Date: Tue, 22 Oct 2024 11:53:52 +0200 Subject: [PATCH 2/3] plotNetBenefit with tests --- NAMESPACE | 1 + R/Plotting.R | 119 +++++++++++++++++++++++++ tests/testthat/test-plotting.R | 154 +++++++++++++++++++++------------ 3 files changed, 217 insertions(+), 57 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7119f6dd7..5ce71cbba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ export(plotDemographicSummary) export(plotF1Measure) export(plotGeneralizability) export(plotLearningCurve) +export(plotNetBenefit) export(plotPlp) export(plotPrecisionRecall) export(plotPredictedPDF) diff --git a/R/Plotting.R b/R/Plotting.R index f2fe6bf0d..f76ebb6c4 100644 --- a/R/Plotting.R +++ b/R/Plotting.R @@ -1216,6 +1216,125 @@ plotSmoothCalibration <- function(plpResult, return(plots) } +#' Plot the net benefit +#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function. +#' @param typeColumn The name of the column specifying the evaluation type +#' @param saveLocation Directory to save plot (if NULL plot is not saved) +#' @param fileName Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats. +#' @param evalType Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted +#' @param ylim The y limits for the plot, if NULL the limits are calculated from the data +#' @param xlim The x limits for the plot, if NULL the limits are calculated from the data +#' @return A list of ggplot objects +#' @export +plotNetBenefit <- function(plpResult, + typeColumn = "evaluation", + saveLocation = NULL, + fileName = "netBenefit.png", + evalType = NULL, + ylim = NULL, + xlim = NULL +) { + if (is.null(evalType)) { + evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[, typeColumn]) + } else { + evalTypes <- evalType + } + + plots <- list() + length(plots) <- length(evalTypes) + for (i in 1:length(evalTypes)) { + evalType <- evalTypes[i] + # calculate net benefit straight from predictions instead of thresholdSummary. + nbData <- getNetBenefit(plpResult, evalType) + if (is.null(ylim)) { + ylim <- c(min(nbData$netBenefit), + max(nbData$netBenefit)) + } + if (is.null(xlim)) { + # match limit in Smooth Calibration by default + xlim <- c(min(nbData$threshold), + max(max(plpResult$performanceEvaluation$calibrationSummary$averagePredictedProbability), + max(plpResult$performanceEvaluation$calibrationSummary$observedIncidence))) + + } + + plots[[i]] <- ggplot2::ggplot(data = nbData, ggplot2::aes(x = .data$threshold)) + + ggplot2::geom_line(ggplot2::aes(y = treatAll, color = "Treat All", linetype = "Treat All")) + + ggplot2::geom_line(ggplot2::aes(y = treatNone, color = "Treat None", linetype = "Treat None")) + + ggplot2::geom_line(ggplot2::aes(y = netBenefit, color = "Net Benefit", linetype = "Net Benefit")) + + ggplot2::scale_color_manual( + name = "Strategies", + values = c( + "Net Benefit" = "blue", + "Treat All" = "red", + "Treat None" = "brown" + ) + ) + + ggplot2::scale_linetype_manual( + name = "Strategies", + values = c( + "Net Benefit" = "solid", + "Treat All" = "dashed", + "Treat None" = "dashed" + ) + ) + + ggplot2::labs( + x = "Predicted Probability", + y = "Net Benefit" + ) + + ggplot2::ggtitle(evalType) + + ggplot2::coord_cartesian(xlim = xlim, ylim = ylim) + } + + plot <- gridExtra::marrangeGrob(plots, nrow = length(plots), ncol = 1) + + if (!is.null(saveLocation)) { + if (!dir.exists(saveLocation)) { + dir.create(saveLocation, recursive = T) + } + ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400) + } + return(plot) +} + +#' Calculate net benefit in an efficient way +#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function. +#' @param evalType The evaluation type column +#' @return A data frame with the net benefit, treat all and treat none values +#' @keywords internal +getNetBenefit <- function(plpResult, evalType) { + prediction <- plpResult$prediction %>% dplyr::filter(.data$evaluationType == evalType) + if (nrow(prediction) == 0) { + stop("No prediction data found for evaluation type ", evalType) + } + prediction <- prediction %>% dplyr::arrange(dplyr::desc(.data$value)) %>% + dplyr::mutate( + cumsumTrue = cumsum(.data$outcomeCount), + cumsumFalse = cumsum(1 - .data$outcomeCount) + ) + trueCount <- sum(prediction$outcomeCount) + n <- nrow(prediction) + falseCount <- n - trueCount + outcomeRate <- trueCount / n + + nbData <- prediction %>% + dplyr::group_by(.data$value) %>% + dplyr::summarise( + threshold = unique(.data$value), + TP = max(.data$cumsumTrue), + FP = max(.data$cumsumFalse), + ) %>% + dplyr::ungroup() + + nbData <- nbData %>% + dplyr::mutate( + netBenefit = (.data$TP / n) - (.data$FP / n) * (.data$threshold / (1 - .data$threshold)), + treatAll = outcomeRate - (1 - outcomeRate) * .data$threshold / (1 - .data$threshold), + treatNone = 0 + ) + return(nbData) +} + plotSmoothCalibrationLoess <- function(data, span = 0.75) { fit <- stats::loess(y ~ p, data = data, degree = 2, span = span) predictedFit <- stats::predict(fit, se = TRUE) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 17039166e..4f9574b9b 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -17,53 +17,49 @@ library("testthat") context("Plotting") -#TODO: add input checks and test these... -#options(fftempdir = getwd()) +# TODO: add input checks and test these... test_that("plots", { - # test all the outputs are ggplots - test <- plotSparseRoc(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - - test <- plotPredictedPDF(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - - test <- plotPreferencePDF(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - - test <- plotPrecisionRecall(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - - test <- plotF1Measure(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - + test <- plotSparseRoc(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + + test <- plotPredictedPDF(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + + test <- plotPreferencePDF(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + + test <- plotPrecisionRecall(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + + test <- plotF1Measure(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + if (!is.null(plpResult$performanceEvaluation$demographicSummary)) { - test <- plotDemographicSummary(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') + test <- plotDemographicSummary(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") } - - test <- plotSparseCalibration(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - - test <- plotPredictionDistribution(plpResult, typeColumn = 'evaluation') - testthat::expect_s3_class(test, 'arrangelist') - + + test <- plotSparseCalibration(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + + test <- plotPredictionDistribution(plpResult, typeColumn = "evaluation") + testthat::expect_s3_class(test, "arrangelist") + test <- plotVariableScatterplot(plpResult$covariateSummary) - testthat::expect_s3_class(test, 'ggplot') - - test <- plotGeneralizability(plpResult$covariateSummary, fileName = NULL) - testthat::expect_s3_class(test, 'grob') + testthat::expect_s3_class(test, "ggplot") + test <- plotGeneralizability(plpResult$covariateSummary, fileName = NULL) + testthat::expect_s3_class(test, "grob") }) test_that("outcomeSurvivalPlot", { - # test the plot works test <- outcomeSurvivalPlot(plpData = plpData, outcomeId = outcomeId) - testthat::expect_s3_class(test, 'ggsurvplot') - + testthat::expect_s3_class(test, "ggsurvplot") + testthat::expect_error(outcomeSurvivalPlot()) testthat::expect_error(outcomeSurvivalPlot(plpData = NULL)) testthat::expect_error(outcomeSurvivalPlot(outcomeId = 094954)) @@ -71,30 +67,25 @@ test_that("outcomeSurvivalPlot", { test_that("plotPlp", { - # test the plot works test <- plotPlp( - plpResult = plpResult, - saveLocation = file.path(saveLoc, 'plots'), - typeColumn = 'evaluation' + plpResult = plpResult, + saveLocation = file.path(saveLoc, "plots"), + typeColumn = "evaluation" ) - testthat::expect_equal(test, T) - testthat::expect_equal(dir.exists(file.path(saveLoc,'plots')), T) - + testthat::expect_equal(test, TRUE) + testthat::expect_equal(dir.exists(file.path(saveLoc, "plots")), TRUE) + # expect plots to be there - expect_true(length(dir(file.path(saveLoc,'plots'))) > 0) - + expect_true(length(dir(file.path(saveLoc, "plots"))) > 0) }) test_that("plotSmoothCalibration", { - # test the plot works test <- plotSmoothCalibration( - plpResult = plpResult, + plpResult = plpResult, smooth = "loess", - span = 1, - nKnots = 5, - scatter = T, + scatter = TRUE, bins = 20, saveLocation = file.path(saveLoc, "plots") ) @@ -105,27 +96,29 @@ test_that("plotSmoothCalibration", { file.path(saveLoc, "plots", "smoothCalibrationTest.pdf") ) ) - + pred <- plpResult$prediction plpResult$prediction <- NULL test2 <- plotSmoothCalibration(plpResult, smooth = "loess", span = 1, nKnots = 5, - scatter = T, + scatter = TRUE, bins = 20, - sample = T, - saveLocation = NULL) + sample = TRUE, + saveLocation = NULL + ) testthat::expect_s3_class(test2$test$smoothPlot, c("gg", "ggplot")) plpResult$prediction <- pred - + test3 <- plotSmoothCalibration(plpResult, smooth = "rcs", span = 1, nKnots = 5, - scatter = F, + scatter = FALSE, bins = 20, - fileName = NULL) + fileName = NULL + ) testthat::expect_s3_class(test3$test$smoothPlot, c("gg", "ggplot")) testthat::expect_s3_class(test3$test$histPlot, c("gg", "ggplot")) testthat::expect_true( # is this tested needed again? @@ -133,5 +126,52 @@ test_that("plotSmoothCalibration", { file.path(saveLoc, "plots", "smoothCalibrationTest.pdf") ) ) - -}) \ No newline at end of file +}) + +nbData <- getNetBenefit(plpResult, evalType = "Test") +test_that("getNetBenefit returns the correct dataframe", { + + expect_s3_class(nbData, "data.frame") + expectedColumns <- c("threshold", "TP", "FP", "netBenefit", "treatAll", "treatNone") + expect_true(all(expectedColumns %in% colnames(nbData))) +}) + +test_that("getNetBenefit computes the net benefit correctly", { + threshold <- nbData$threshold[[1]] + truePositives <- nbData$TP[[1]] + falsePositives <- nbData$FP[[1]] + n <- nrow(plpResult$prediction %>% dplyr::filter(.data$evaluationType == "Test")) + netBenefitCalculated <- (truePositives / n) - (falsePositives / n) * (threshold / (1 - threshold)) + expect_equal(nbData$netBenefit[[1]], netBenefitCalculated) +}) + +test_that("getNetBenefit handles invalid evalType gracefully", { + expect_error(getNetBenefit(plpResult, evalType = "InvalidType"), + "No prediction data found for evaluation type InvalidType") +}) + +test_that("plotNetBenefit returns a grob object", { + plot <- plotNetBenefit(plpResult, evalType = "Test") + expect_true(inherits(plot, "arrangelist")) +}) + +test_that("plotNetBenefit saves plot when saveLocation is specified", { + tempDir <- tempdir() + plotNetBenefit(plpResult, saveLocation = tempDir, fileName = "netBenefit.png", evalType = "Test") + expect_true(file.exists(file.path(tempDir, "netBenefit.png"))) + #Clean up + file.remove(file.path(tempDir, "netBenefit.png")) +}) + +test_that("plotNetBenefit handles NULL evalType", { + plot <- plotNetBenefit(plpResult, evalType = NULL) + expect_true(inherits(plot, "arrangelist")) +}) + + +test_that("plotNetBenefit creates correct number of plots when evalType is NULL", { + plot <- plotNetBenefit(plpResult, evalType = NULL) + # Since evalType is NULL, it should plot for all unique evaluation types + evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary$evaluation) + expect_equal(length(plot[[1]]$grobs) - 1 , length(evalTypes)) # -1 for text grob +}) From 2368097bc4d1c77e146d088d19e2f79a48a9e868 Mon Sep 17 00:00:00 2001 From: egillax Date: Tue, 22 Oct 2024 12:52:35 +0200 Subject: [PATCH 3/3] fix missing coverage --- tests/testthat/test-plotting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 4f9574b9b..a250376e4 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -156,7 +156,7 @@ test_that("plotNetBenefit returns a grob object", { }) test_that("plotNetBenefit saves plot when saveLocation is specified", { - tempDir <- tempdir() + tempDir <- tempfile() plotNetBenefit(plpResult, saveLocation = tempDir, fileName = "netBenefit.png", evalType = "Test") expect_true(file.exists(file.path(tempDir, "netBenefit.png"))) #Clean up