From e18ff08d4aa0ea863b7c68269e62ae9dc3d8f1ef Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 22 Oct 2024 11:45:06 +0200
Subject: [PATCH 1/3] Add net benefit plot and tests

---
 man/PatientLevelPrediction.Rd |  4 ++--
 man/getNetBenefit.Rd          | 20 +++++++++++++++++++
 man/plotNetBenefit.Rd         | 37 +++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 man/getNetBenefit.Rd
 create mode 100644 man/plotNetBenefit.Rd

diff --git a/man/PatientLevelPrediction.Rd b/man/PatientLevelPrediction.Rd
index 8bc15fc71..4946949d9 100644
--- a/man/PatientLevelPrediction.Rd
+++ b/man/PatientLevelPrediction.Rd
@@ -18,15 +18,15 @@ Useful links:
 
 }
 \author{
-\strong{Maintainer}: Jenna Reps \email{jreps@its.jnj.com}
+\strong{Maintainer}: Egill Fridgeirsson \email{e.fridgeirsson@erasmusmc.nl}
 
 Authors:
 \itemize{
+  \item Jenna Reps \email{jreps@its.jnj.com}
   \item Martijn Schuemie
   \item Marc Suchard
   \item Patrick Ryan
   \item Peter Rijnbeek
-  \item Egill Fridgeirsson
 }
 
 }
diff --git a/man/getNetBenefit.Rd b/man/getNetBenefit.Rd
new file mode 100644
index 000000000..af758fa79
--- /dev/null
+++ b/man/getNetBenefit.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Plotting.R
+\name{getNetBenefit}
+\alias{getNetBenefit}
+\title{Calculate net benefit in an efficient way}
+\usage{
+getNetBenefit(plpResult, evalType)
+}
+\arguments{
+\item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
+
+\item{evalType}{The evaluation type column}
+}
+\value{
+A data frame with the net benefit, treat all and treat none values
+}
+\description{
+Calculate net benefit in an efficient way
+}
+\keyword{internal}
diff --git a/man/plotNetBenefit.Rd b/man/plotNetBenefit.Rd
new file mode 100644
index 000000000..35722350b
--- /dev/null
+++ b/man/plotNetBenefit.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Plotting.R
+\name{plotNetBenefit}
+\alias{plotNetBenefit}
+\title{Plot the net benefit}
+\usage{
+plotNetBenefit(
+  plpResult,
+  typeColumn = "evaluation",
+  saveLocation = NULL,
+  fileName = "netBenefit.png",
+  evalType = NULL,
+  ylim = NULL,
+  xlim = NULL
+)
+}
+\arguments{
+\item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
+
+\item{typeColumn}{The name of the column specifying the evaluation type}
+
+\item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
+
+\item{fileName}{Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.}
+
+\item{evalType}{Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted}
+
+\item{ylim}{The y limits for the plot, if NULL the limits are calculated from the data}
+
+\item{xlim}{The x limits for the plot, if NULL the limits are calculated from the data}
+}
+\value{
+A list of ggplot objects
+}
+\description{
+Plot the net benefit
+}

From 759d7df51d983f8eaf9dfdaa1350163ce8247400 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 22 Oct 2024 11:53:52 +0200
Subject: [PATCH 2/3] plotNetBenefit with tests

---
 NAMESPACE                      |   1 +
 R/Plotting.R                   | 119 +++++++++++++++++++++++++
 tests/testthat/test-plotting.R | 154 +++++++++++++++++++++------------
 3 files changed, 217 insertions(+), 57 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 7119f6dd7..5ce71cbba 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -72,6 +72,7 @@ export(plotDemographicSummary)
 export(plotF1Measure)
 export(plotGeneralizability)
 export(plotLearningCurve)
+export(plotNetBenefit)
 export(plotPlp)
 export(plotPrecisionRecall)
 export(plotPredictedPDF)
diff --git a/R/Plotting.R b/R/Plotting.R
index f2fe6bf0d..f76ebb6c4 100644
--- a/R/Plotting.R
+++ b/R/Plotting.R
@@ -1216,6 +1216,125 @@ plotSmoothCalibration <- function(plpResult,
   return(plots)
 }
 
+#' Plot the net benefit 
+#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
+#' @param typeColumn The name of the column specifying the evaluation type
+#' @param saveLocation Directory to save plot (if NULL plot is not saved)
+#' @param fileName Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.
+#' @param evalType Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted
+#' @param ylim The y limits for the plot, if NULL the limits are calculated from the data
+#' @param xlim The x limits for the plot, if NULL the limits are calculated from the data
+#' @return A list of ggplot objects
+#' @export
+plotNetBenefit <- function(plpResult,
+                           typeColumn = "evaluation",
+                           saveLocation = NULL,
+                           fileName = "netBenefit.png",
+                           evalType = NULL,
+                           ylim = NULL,
+                           xlim = NULL
+) {
+  if (is.null(evalType)) {
+    evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[, typeColumn])
+  } else {
+    evalTypes <- evalType
+    }
+  
+  plots <- list()
+  length(plots) <- length(evalTypes)
+  for (i in 1:length(evalTypes)) {
+    evalType <- evalTypes[i]
+    # calculate net benefit straight from predictions instead of thresholdSummary.
+    nbData <- getNetBenefit(plpResult, evalType)
+    if (is.null(ylim)) {
+      ylim <- c(min(nbData$netBenefit),
+                max(nbData$netBenefit))
+    }
+    if (is.null(xlim)) {
+      # match limit in Smooth Calibration by default
+      xlim <- c(min(nbData$threshold),
+                max(max(plpResult$performanceEvaluation$calibrationSummary$averagePredictedProbability),
+                    max(plpResult$performanceEvaluation$calibrationSummary$observedIncidence)))
+      
+    }
+    
+    plots[[i]] <- ggplot2::ggplot(data = nbData, ggplot2::aes(x = .data$threshold)) +
+      ggplot2::geom_line(ggplot2::aes(y = treatAll, color = "Treat All", linetype = "Treat All")) +
+      ggplot2::geom_line(ggplot2::aes(y = treatNone, color = "Treat None", linetype = "Treat None")) +
+      ggplot2::geom_line(ggplot2::aes(y = netBenefit, color = "Net Benefit", linetype = "Net Benefit")) +
+      ggplot2::scale_color_manual(
+        name = "Strategies",
+        values = c(
+          "Net Benefit" = "blue",
+          "Treat All" = "red",
+          "Treat None" = "brown"
+        )
+      ) +
+      ggplot2::scale_linetype_manual(
+        name = "Strategies",
+        values = c(
+          "Net Benefit" = "solid",
+          "Treat All" = "dashed",
+          "Treat None" = "dashed"
+        )
+      ) +
+      ggplot2::labs(
+        x = "Predicted Probability",
+        y = "Net Benefit"
+      ) +
+      ggplot2::ggtitle(evalType) +
+      ggplot2::coord_cartesian(xlim = xlim, ylim = ylim)
+  }
+  
+  plot <- gridExtra::marrangeGrob(plots, nrow = length(plots), ncol = 1)
+  
+  if (!is.null(saveLocation)) {
+    if (!dir.exists(saveLocation)) {
+      dir.create(saveLocation, recursive = T)
+    }
+    ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
+  }
+  return(plot)
+}
+
+#' Calculate net benefit in an efficient way
+#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
+#' @param evalType The evaluation type column
+#' @return A data frame with the net benefit, treat all and treat none values
+#' @keywords internal
+getNetBenefit <- function(plpResult, evalType) {
+  prediction <- plpResult$prediction %>% dplyr::filter(.data$evaluationType == evalType)
+  if (nrow(prediction) == 0) {
+    stop("No prediction data found for evaluation type ", evalType)
+  }
+  prediction <- prediction %>% dplyr::arrange(dplyr::desc(.data$value)) %>%
+    dplyr::mutate(
+      cumsumTrue = cumsum(.data$outcomeCount),
+      cumsumFalse = cumsum(1 - .data$outcomeCount)
+    )
+  trueCount <- sum(prediction$outcomeCount)
+  n <- nrow(prediction)
+  falseCount <- n - trueCount
+  outcomeRate <- trueCount / n
+  
+  nbData <- prediction %>% 
+    dplyr::group_by(.data$value) %>% 
+    dplyr::summarise(
+      threshold = unique(.data$value),
+      TP = max(.data$cumsumTrue),
+      FP = max(.data$cumsumFalse),
+    ) %>% 
+    dplyr::ungroup()
+  
+  nbData <- nbData %>%
+    dplyr::mutate(
+      netBenefit = (.data$TP / n) - (.data$FP / n) * (.data$threshold / (1 - .data$threshold)),
+      treatAll = outcomeRate - (1 - outcomeRate) * .data$threshold / (1 - .data$threshold),
+      treatNone = 0
+    )
+  return(nbData)
+}
+
 plotSmoothCalibrationLoess <- function(data, span = 0.75) {
   fit <- stats::loess(y ~ p, data = data, degree = 2, span = span)
   predictedFit <- stats::predict(fit, se = TRUE)
diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R
index 17039166e..4f9574b9b 100644
--- a/tests/testthat/test-plotting.R
+++ b/tests/testthat/test-plotting.R
@@ -17,53 +17,49 @@
 library("testthat")
 
 context("Plotting")
-#TODO: add input checks and test these...
-#options(fftempdir = getwd())
+# TODO: add input checks and test these...
 
 test_that("plots", {
-
   # test all the outputs are ggplots
-  test <- plotSparseRoc(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
-  test <- plotPredictedPDF(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
-  test <- plotPreferencePDF(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
-  test <- plotPrecisionRecall(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
-  test <- plotF1Measure(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
+  test <- plotSparseRoc(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
+  test <- plotPredictedPDF(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
+  test <- plotPreferencePDF(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
+  test <- plotPrecisionRecall(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
+  test <- plotF1Measure(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
   if (!is.null(plpResult$performanceEvaluation$demographicSummary)) {
-    test <- plotDemographicSummary(plpResult, typeColumn = 'evaluation')
-    testthat::expect_s3_class(test, 'arrangelist')
+    test <- plotDemographicSummary(plpResult, typeColumn = "evaluation")
+    testthat::expect_s3_class(test, "arrangelist")
   }
-  
-  test <- plotSparseCalibration(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
-  test <- plotPredictionDistribution(plpResult, typeColumn = 'evaluation')
-  testthat::expect_s3_class(test, 'arrangelist')
-  
+
+  test <- plotSparseCalibration(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
+  test <- plotPredictionDistribution(plpResult, typeColumn = "evaluation")
+  testthat::expect_s3_class(test, "arrangelist")
+
   test <- plotVariableScatterplot(plpResult$covariateSummary)
-  testthat::expect_s3_class(test, 'ggplot')
-  
-  test <- plotGeneralizability(plpResult$covariateSummary, fileName = NULL)
-  testthat::expect_s3_class(test, 'grob')
+  testthat::expect_s3_class(test, "ggplot")
 
+  test <- plotGeneralizability(plpResult$covariateSummary, fileName = NULL)
+  testthat::expect_s3_class(test, "grob")
 })
 
 
 test_that("outcomeSurvivalPlot", {
-  
   # test the plot works
   test <- outcomeSurvivalPlot(plpData = plpData, outcomeId = outcomeId)
-  testthat::expect_s3_class(test, 'ggsurvplot')
-  
+  testthat::expect_s3_class(test, "ggsurvplot")
+
   testthat::expect_error(outcomeSurvivalPlot())
   testthat::expect_error(outcomeSurvivalPlot(plpData = NULL))
   testthat::expect_error(outcomeSurvivalPlot(outcomeId = 094954))
@@ -71,30 +67,25 @@ test_that("outcomeSurvivalPlot", {
 
 
 test_that("plotPlp", {
-  
   # test the plot works
   test <- plotPlp(
-    plpResult  = plpResult, 
-    saveLocation = file.path(saveLoc, 'plots'),
-    typeColumn = 'evaluation'
+    plpResult = plpResult,
+    saveLocation = file.path(saveLoc, "plots"),
+    typeColumn = "evaluation"
   )
-  testthat::expect_equal(test, T)
-  testthat::expect_equal(dir.exists(file.path(saveLoc,'plots')), T)
-  
+  testthat::expect_equal(test, TRUE)
+  testthat::expect_equal(dir.exists(file.path(saveLoc, "plots")), TRUE)
+
   # expect plots to be there
-  expect_true(length(dir(file.path(saveLoc,'plots'))) > 0)
-  
+  expect_true(length(dir(file.path(saveLoc, "plots"))) > 0)
 })
 
 test_that("plotSmoothCalibration", {
-  
   # test the plot works
   test <- plotSmoothCalibration(
-    plpResult = plpResult, 
+    plpResult = plpResult,
     smooth = "loess",
-    span = 1,
-    nKnots = 5,
-    scatter = T,
+    scatter = TRUE,
     bins = 20,
     saveLocation = file.path(saveLoc, "plots")
   )
@@ -105,27 +96,29 @@ test_that("plotSmoothCalibration", {
       file.path(saveLoc, "plots", "smoothCalibrationTest.pdf")
     )
   )
-  
+
   pred <- plpResult$prediction
   plpResult$prediction <- NULL
   test2 <- plotSmoothCalibration(plpResult,
     smooth = "loess",
     span = 1,
     nKnots = 5,
-    scatter = T,
+    scatter = TRUE,
     bins = 20,
-    sample = T,
-    saveLocation = NULL) 
+    sample = TRUE,
+    saveLocation = NULL
+  )
   testthat::expect_s3_class(test2$test$smoothPlot, c("gg", "ggplot"))
   plpResult$prediction <- pred
-  
+
   test3 <- plotSmoothCalibration(plpResult,
     smooth = "rcs",
     span = 1,
     nKnots = 5,
-    scatter = F,
+    scatter = FALSE,
     bins = 20,
-    fileName = NULL)
+    fileName = NULL
+  )
   testthat::expect_s3_class(test3$test$smoothPlot, c("gg", "ggplot"))
   testthat::expect_s3_class(test3$test$histPlot, c("gg", "ggplot"))
   testthat::expect_true( # is this tested needed again?
@@ -133,5 +126,52 @@ test_that("plotSmoothCalibration", {
       file.path(saveLoc, "plots", "smoothCalibrationTest.pdf")
     )
   )
-  
-})
\ No newline at end of file
+})
+
+nbData <- getNetBenefit(plpResult, evalType = "Test")
+test_that("getNetBenefit returns the correct dataframe", {
+
+  expect_s3_class(nbData, "data.frame")
+  expectedColumns <- c("threshold", "TP", "FP", "netBenefit", "treatAll", "treatNone")
+  expect_true(all(expectedColumns %in% colnames(nbData)))
+})
+
+test_that("getNetBenefit computes the net benefit correctly", {
+  threshold <- nbData$threshold[[1]]
+  truePositives <- nbData$TP[[1]]
+  falsePositives <- nbData$FP[[1]]
+  n <- nrow(plpResult$prediction %>% dplyr::filter(.data$evaluationType == "Test"))
+  netBenefitCalculated <- (truePositives / n) - (falsePositives / n) * (threshold / (1 - threshold))
+  expect_equal(nbData$netBenefit[[1]], netBenefitCalculated)
+})
+
+test_that("getNetBenefit handles invalid evalType gracefully", {
+  expect_error(getNetBenefit(plpResult, evalType = "InvalidType"),
+               "No prediction data found for evaluation type InvalidType")
+})
+
+test_that("plotNetBenefit returns a grob object", {
+  plot <- plotNetBenefit(plpResult, evalType = "Test")
+  expect_true(inherits(plot, "arrangelist"))
+})
+
+test_that("plotNetBenefit saves plot when saveLocation is specified", {
+  tempDir <- tempdir()
+  plotNetBenefit(plpResult, saveLocation = tempDir, fileName = "netBenefit.png", evalType = "Test")
+  expect_true(file.exists(file.path(tempDir, "netBenefit.png")))
+  #Clean up
+  file.remove(file.path(tempDir, "netBenefit.png"))
+})
+
+test_that("plotNetBenefit handles NULL evalType", {
+  plot <- plotNetBenefit(plpResult, evalType = NULL)
+  expect_true(inherits(plot, "arrangelist"))
+})
+
+
+test_that("plotNetBenefit creates correct number of plots when evalType is NULL", {
+  plot <- plotNetBenefit(plpResult, evalType = NULL)
+  # Since evalType is NULL, it should plot for all unique evaluation types
+  evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary$evaluation)
+  expect_equal(length(plot[[1]]$grobs) - 1 , length(evalTypes)) # -1 for text grob
+})

From 2368097bc4d1c77e146d088d19e2f79a48a9e868 Mon Sep 17 00:00:00 2001
From: egillax <egillax@gmail.com>
Date: Tue, 22 Oct 2024 12:52:35 +0200
Subject: [PATCH 3/3] fix missing coverage

---
 tests/testthat/test-plotting.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R
index 4f9574b9b..a250376e4 100644
--- a/tests/testthat/test-plotting.R
+++ b/tests/testthat/test-plotting.R
@@ -156,7 +156,7 @@ test_that("plotNetBenefit returns a grob object", {
 })
 
 test_that("plotNetBenefit saves plot when saveLocation is specified", {
-  tempDir <- tempdir()
+  tempDir <- tempfile()
   plotNetBenefit(plpResult, saveLocation = tempDir, fileName = "netBenefit.png", evalType = "Test")
   expect_true(file.exists(file.path(tempDir, "netBenefit.png")))
   #Clean up