Skip to content

Commit

Permalink
Merge pull request #495 from OHDSI/net_benefit_plot
Browse files Browse the repository at this point in the history
Add Net benefit plot and tests
  • Loading branch information
egillax authored Oct 22, 2024
2 parents de8ddcb + 2368097 commit bbf8e0e
Show file tree
Hide file tree
Showing 6 changed files with 276 additions and 59 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ export(plotDemographicSummary)
export(plotF1Measure)
export(plotGeneralizability)
export(plotLearningCurve)
export(plotNetBenefit)
export(plotPlp)
export(plotPrecisionRecall)
export(plotPredictedPDF)
Expand Down
119 changes: 119 additions & 0 deletions R/Plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,125 @@ plotSmoothCalibration <- function(plpResult,
return(plots)
}

#' Plot the net benefit
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param typeColumn The name of the column specifying the evaluation type
#' @param saveLocation Directory to save plot (if NULL plot is not saved)
#' @param fileName Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.
#' @param evalType Which evaluation type to plot for. For example `Test`, `Train`. If NULL everything is plotted
#' @param ylim The y limits for the plot, if NULL the limits are calculated from the data
#' @param xlim The x limits for the plot, if NULL the limits are calculated from the data
#' @return A list of ggplot objects
#' @export
plotNetBenefit <- function(plpResult,
typeColumn = "evaluation",
saveLocation = NULL,
fileName = "netBenefit.png",
evalType = NULL,
ylim = NULL,
xlim = NULL
) {
if (is.null(evalType)) {
evalTypes <- unique(plpResult$performanceEvaluation$thresholdSummary[, typeColumn])
} else {
evalTypes <- evalType
}

plots <- list()
length(plots) <- length(evalTypes)
for (i in 1:length(evalTypes)) {
evalType <- evalTypes[i]
# calculate net benefit straight from predictions instead of thresholdSummary.
nbData <- getNetBenefit(plpResult, evalType)
if (is.null(ylim)) {
ylim <- c(min(nbData$netBenefit),
max(nbData$netBenefit))
}
if (is.null(xlim)) {
# match limit in Smooth Calibration by default
xlim <- c(min(nbData$threshold),
max(max(plpResult$performanceEvaluation$calibrationSummary$averagePredictedProbability),
max(plpResult$performanceEvaluation$calibrationSummary$observedIncidence)))

}

plots[[i]] <- ggplot2::ggplot(data = nbData, ggplot2::aes(x = .data$threshold)) +
ggplot2::geom_line(ggplot2::aes(y = treatAll, color = "Treat All", linetype = "Treat All")) +
ggplot2::geom_line(ggplot2::aes(y = treatNone, color = "Treat None", linetype = "Treat None")) +
ggplot2::geom_line(ggplot2::aes(y = netBenefit, color = "Net Benefit", linetype = "Net Benefit")) +
ggplot2::scale_color_manual(
name = "Strategies",
values = c(
"Net Benefit" = "blue",
"Treat All" = "red",
"Treat None" = "brown"
)
) +
ggplot2::scale_linetype_manual(
name = "Strategies",
values = c(
"Net Benefit" = "solid",
"Treat All" = "dashed",
"Treat None" = "dashed"
)
) +
ggplot2::labs(
x = "Predicted Probability",
y = "Net Benefit"
) +
ggplot2::ggtitle(evalType) +
ggplot2::coord_cartesian(xlim = xlim, ylim = ylim)
}

plot <- gridExtra::marrangeGrob(plots, nrow = length(plots), ncol = 1)

if (!is.null(saveLocation)) {
if (!dir.exists(saveLocation)) {
dir.create(saveLocation, recursive = T)
}
ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400)
}
return(plot)
}

#' Calculate net benefit in an efficient way
#' @param plpResult A plp result object as generated using the \code{\link{runPlp}} function.
#' @param evalType The evaluation type column
#' @return A data frame with the net benefit, treat all and treat none values
#' @keywords internal
getNetBenefit <- function(plpResult, evalType) {
prediction <- plpResult$prediction %>% dplyr::filter(.data$evaluationType == evalType)
if (nrow(prediction) == 0) {
stop("No prediction data found for evaluation type ", evalType)
}
prediction <- prediction %>% dplyr::arrange(dplyr::desc(.data$value)) %>%
dplyr::mutate(
cumsumTrue = cumsum(.data$outcomeCount),
cumsumFalse = cumsum(1 - .data$outcomeCount)
)
trueCount <- sum(prediction$outcomeCount)
n <- nrow(prediction)
falseCount <- n - trueCount
outcomeRate <- trueCount / n

nbData <- prediction %>%
dplyr::group_by(.data$value) %>%
dplyr::summarise(
threshold = unique(.data$value),
TP = max(.data$cumsumTrue),
FP = max(.data$cumsumFalse),
) %>%
dplyr::ungroup()

nbData <- nbData %>%
dplyr::mutate(
netBenefit = (.data$TP / n) - (.data$FP / n) * (.data$threshold / (1 - .data$threshold)),
treatAll = outcomeRate - (1 - outcomeRate) * .data$threshold / (1 - .data$threshold),
treatNone = 0
)
return(nbData)
}

plotSmoothCalibrationLoess <- function(data, span = 0.75) {
fit <- stats::loess(y ~ p, data = data, degree = 2, span = span)
predictedFit <- stats::predict(fit, se = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions man/PatientLevelPrediction.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/getNetBenefit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions man/plotNetBenefit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bbf8e0e

Please sign in to comment.