diff --git a/DESCRIPTION b/DESCRIPTION index 172cfb8..2e87e7f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,8 @@ Version: 0.0.1 Authors@R: c( person("Damir", "Pulatov", , "damirpolat@protonmail.com", role = c("cre", "aut")), person("Marc", "Becker", , "marcbecker@posteo.de", role = "aut", - comment = c(ORCID = "0000-0002-8115-0400")) + comment = c(ORCID = "0000-0002-8115-0400")), + person("Baisu", "Zhou", , "baisu.zhou@outlook.com", role = "aut") ) Description: Flexible AutoML system for the 'mlr3' ecosystem. License: LGPL-3 @@ -18,8 +19,12 @@ Depends: R (>= 3.1.0), rush Imports: + bbotk, + bslib, checkmate, data.table, + ggplot2, + iml, lhs, mlr3mbo, mlr3misc (>= 0.15.1), @@ -27,6 +32,8 @@ Imports: mlr3tuningspaces, paradox (>= 1.0.1), R6, + shiny, + stats, utils Suggests: catboost, @@ -41,7 +48,8 @@ Suggests: ranger, rpart, testthat (>= 3.0.0), - xgboost + xgboost, + vdiffr Remotes: catboost/catboost/catboost/R-package, mlr-org/mlr3, @@ -76,6 +84,9 @@ Collate: 'build_graph.R' 'estimate_memory.R' 'helper.R' + 'helpers_app.R' 'internal_measure.R' 'train_auto.R' + 'visualization.R' + 'visualization_app.R' 'zzz.R' diff --git a/NAMESPACE b/NAMESPACE index a0dd2d3..df912ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,10 +27,16 @@ export(LearnerRegrAutoNnet) export(LearnerRegrAutoRanger) export(LearnerRegrAutoSVM) export(LearnerRegrAutoXgboost) +export(cost_over_time) export(estimate_memory) export(internal_measure_catboost) export(internal_measure_lightgbm) export(internal_measure_xgboost) +export(marginal_plot) +export(parallel_coordinates) +export(pareto_front) +export(partial_dependence_plot) +export(visualize) import(R6) import(checkmate) import(data.table) diff --git a/R/helpers_app.R b/R/helpers_app.R new file mode 100644 index 0000000..f537f5c --- /dev/null +++ b/R/helpers_app.R @@ -0,0 +1,42 @@ +#' @title Custom conditionalPanel for hyperparameter selection +#' +#' @description +#' Used for Marginal Plots and Partial Dependence Plots. +#' +#' @param condition (`character(1)`)\cr +#' Passed to the `condition` argument of `[shiny::conditionalPanel]`. +#' @param prefix (`character(1)`)\cr +#' Prefix of input slot names. +#' @param learner_ids (`character()`)\cr +#' Vector of all possible learner/branch IDs. +#' @param param_ids (`character()`)\cr +#' Vector of all possible param IDs. +#' @param ... (anything) +#' Additional arguments passed to `[shiny::conditionalPanel]`. +#' +param_panel = function(condition, prefix, learner_ids, param_ids, ...) { + assert_string(condition) + assert_string(prefix) + assert_character(learner_ids) + assert_character(param_ids) + + shiny::conditionalPanel( + condition, + shiny::selectInput(paste0(prefix, "_branch"), + label = "Select branch:", + choices = learner_ids + ), + # choices and selected are just placeholders for initialization + shiny::selectInput(paste0(prefix, "_x"), + label = "Select x-axis:", + choices = param_ids, + selected = param_ids[[1]] + ), + shiny::selectInput(paste0(prefix, "_y"), + label = "Select y-axis:", + choices = param_ids, + selected = param_ids[[2]] + ), + ... + ) +} diff --git a/R/visualization.R b/R/visualization.R new file mode 100644 index 0000000..3d453b0 --- /dev/null +++ b/R/visualization.R @@ -0,0 +1,358 @@ +#' @title Cost-Over-Time Plot +#' +#' @description Plots the cost (objective) over time, where the time variable can be set by the user. +#' +#' @template param_instance +#' @param time (`character(1)`)\cr +#' Column in the archive to be interpreted as the time variable, e.g. "timestamp_xs", "timestamp_ys". +#' If `NULL` (default), the configuration ID will be used. +#' @template param_theme +#' +#' @export +cost_over_time = function(instance, time = NULL, theme = ggplot2::theme_minimal()) { + archive = instance$archive + # there should only be one objective, e.g. `classif.ce` + objective = archive$cols_y + + .data = NULL + if (is.null(time)) { + x = seq_row(archive$data) + g = ggplot2::ggplot(data = as.data.table(archive), ggplot2::aes( + x = x, + y = .data[[objective]] + )) + + ggplot2::labs(x = "configuration ID") + } else { + assert_choice(time, names(as.data.table(archive))) + g = ggplot2::ggplot(data = as.data.table(archive), ggplot2::aes( + x = .data[[time]], + y = .data[[objective]] + )) + } + + g + ggplot2::geom_point() + + ggplot2::geom_line() + + theme +} + +#' @title Marginal Plot +#' +#' @description Creates 2D marginal plots for evaluated configurations. +#' +#' @template param_instance +#' @param x (`character(1)`) +#' Name of the parameter to be mapped to the x-axis. +#' @param y (`character(1)`) +#' Name of the parameter to be mapped to the y-axis. +#' If `NULL` (default), the measure (e.g. `classif.ce`) is mapped to the y-axis. +#' @template param_theme +#' +#' @export +marginal_plot = function(instance, x, y = NULL, theme = ggplot2::theme_minimal()) { + archive = instance$archive + param_ids = archive$cols_x + assert_choice(x, param_ids) + assert_choice(y, param_ids, null.ok = TRUE) + + # use transformed values if trafo is set + x_trafo = paste0("x_domain_", x) + y_trafo = if (!is.null(y)) paste0("x_domain_", y) else NULL + + # there should only be one objective, e.g. `classif.ce` + measure = archive$cols_y + + data = na.omit(as.data.table(archive), cols = c(x_trafo, y_trafo)) + + .data = NULL + + # no param provided for y + if (is.null(y)) { + g = ggplot2:: ggplot(data = data, ggplot2::aes( + x = .data[[x_trafo]], + y = .data[[measure]] + )) + + ggplot2::geom_point(alpha = 0.6) + + theme + + if (archive$search_space$is_logscale[[x]]) { + g = g + ggplot2::scale_x_log10() + } + + return(g) + } + + # param provided for y + g = ggplot2::ggplot(data = data, ggplot2::aes( + x = .data[[x_trafo]], + y = .data[[y_trafo]], + col = .data[[measure]] + )) + + ggplot2::geom_point(alpha = 0.6) + + ggplot2::scale_color_viridis_c() + + ggplot2::labs(x = x, y = y) + + theme + + if (archive$search_space$is_logscale[[x]]) { + g = g + ggplot2::scale_x_log10() + } + if (archive$search_space$is_logscale[[y]]) { + g = g + ggplot2::scale_y_log10() + } + + return(g) +} + + +#' @title Parallel Coordinates Plot +#' +#' @description Adapted from [mlr3viz::autoplot()] with `type == "parallel"`. Since the hyperparameters of each individual learner are conditioned on `branch.selection`, missing values are expected in the archive data. When standardizing the hyperparameter values (referred to as "x values" in the following to be consistent with `mlr3viz` documentation), `na.omit == TRUE` is used to compute `mean()` and `stats::sd()`. +#' +#' @template param_instance +#' @param cols_x (`character()`) +#' Column names of x values. +#' By default, all untransformed x values from the search space are plotted. +#' @param trafo (`character(1)`) +#' If `FALSE` (default), the untransformed x values are plotted. +#' If `TRUE`, the transformed x values are plotted. +#' @template param_theme +#' +#' @export +parallel_coordinates = function( + instance, cols_x = NULL, trafo = FALSE, + theme = ggplot2::theme_minimal() +) { + archive = instance$archive + assert_subset(cols_x, c(archive$cols_x, paste0("x_domain_", archive$cols_x))) + assert_flag(trafo) + + if (is.null(cols_x)) { + cols_x = archive$cols_x + } + if (trafo) { + cols_x = paste0("x_domain_", cols_x) + } + cols_y = archive$cols_y + + data = as.data.table(archive) + data = data[, c(cols_x, cols_y), with = FALSE] + x_axis = data.table(x = seq(names(data)), variable = names(data)) + + # split data + data_l = data[, .SD, .SDcols = which(sapply(data, function(x) is.character(x) || is.logical(x)))] + data_n = data[, .SD, .SDcols = which(sapply(data, is.numeric))] + data_y = data[, cols_y, with = FALSE] + + # factor columns to numeric + data_c = data_l[, lapply(.SD, function(x) as.numeric(as.factor(x)))] + + # rescale + data_n = data_n[, lapply(.SD, function(x) { + if (stats::sd(x, na.rm = TRUE) %in% c(0, NA)) { + rep(0, length(x)) + } else { + (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE) + } + })] + data_c = data_c[, lapply(.SD, function(x) { + if (stats::sd(x, na.rm = TRUE) %in% c(0, NA)) { + rep(0, length(x)) + } else { + (x - mean(unique(x), na.rm = TRUE)) / stats::sd(unique(x), na.rm = TRUE) + } + })] + + # to long format + set(data_n, j = "id", value = seq_row(data_n)) + set(data_y, j = "id", value = seq_row(data_y)) + data_n = melt(data_n, measure.var = setdiff(names(data_n), "id")) + + if (nrow(data_c)) { + # Skip if no factor column is present + set(data_c, j = "id", value = seq_row(data_c)) + data_c = melt(data_c, measure.var = setdiff(names(data_c), "id")) + data_l = data_l[, lapply(.SD, as.character)] # Logical to character + data_l = melt(data_l, measure.var = names(data_l), value.name = "label")[, "label"] + set(data_c, j = "label", value = data_l) + } + + # merge + data = rbindlist(list(data_c, data_n), fill = TRUE) + data = merge(data, x_axis, by = "variable") + data = merge(data, data_y, by = "id") + setorderv(data, "x") + + .data = NULL + ggplot2::ggplot(data, + mapping = ggplot2::aes( + x = .data[["x"]], + y = .data[["value"]])) + + ggplot2::geom_line( + mapping = ggplot2::aes( + group = .data$id, + color = .data[[cols_y]]), + linewidth = 1) + + ggplot2::geom_vline(ggplot2::aes(xintercept = x)) + + { + if (nrow(data_c)) ggplot2::geom_label( + mapping = ggplot2::aes(label = .data$label), + data = data[!is.na(data$label), ]) + } + + ggplot2::scale_x_continuous(breaks = x_axis$x, labels = x_axis$variable) + + ggplot2::scale_color_viridis_c() + + ggplot2::guides(color = ggplot2::guide_colorbar(barwidth = 0.5, barheight = 10)) + + theme + + ggplot2::theme(axis.title.x = ggplot2::element_blank()) +} + + +#' @title Partial Dependence Plot +#' +#' @description Creates a partial dependenc plot (PDP) via the `[iml]` package. +#' +#' @template param_instance +#' @param x (`character(1)`) +#' Name of the parameter to be mapped to the x-axis. +#' @param y (`character(1)`) +#' Name of the parameter to be mapped to the y-axis. +#' @param type (`character(1)`) +#' Type of the two-parameter partial dependence plot. Possible options are listed below. +#' \itemize{ +#' \item `"default"`: Use the default setting in `iml`. +#' \item `"contour"`: Create a contour plot using `[ggplot2::geom_contour_filled]`. Only supported if both parameters are numerical. +#' } +#' Ignored if only one parameter is provided. +#' @template param_theme +#' @param ... (anything) +#' Arguments passed to `[iml::FeatureEffect]`. +#' +#' @export +partial_dependence_plot = function( + instance, x, y, type = "default", + theme = ggplot2::theme_minimal(), + ... +) { + archive = instance$archive + assert_choice(x, archive$cols_x) + assert_choice(y, archive$cols_x, null.ok = TRUE) + + branch = tstrsplit(c(x, y), "\\.")[[1]] + branch = unique(branch) + if (length(branch) > 1) { + stop("Parameters from different branches cannot be plotted in the same PDP.") + } + + if (!is.null(y)) { + assert_choice(type, c("contour", "default")) + } + + non_numeric = some(c(x, y), function(param_id) { + !is.numeric(archive$data[[param_id]]) + }) + if (non_numeric && type == "contour") { + stop("Contour plot not supported for non-numeric parameters") + } + + # prepare data for surrogate model + archive_data = as.data.table(archive)[, c(archive$cols_x, archive$cols_y), with = FALSE] + archive_data = archive_data[!is.na(archive_data[[archive$cols_y]]), ] + archive_data[, archive$cols_x := lapply(.SD, function(col) { + # iml does not accept lgcl features + if (is.logical(col)) return(factor(col, levels = c(FALSE, TRUE))) + # also convert integer to double to avoid imputeoor error + if (is.integer(col)) return(as.numeric(col)) + return(col) + }), .SDcols = archive$cols_x] + task = as_task_regr(archive_data, target = archive$cols_y) + + # train surrogate model + surrogate = po("imputeoor", + multiplier = 3, + affect_columns = selector_type(c("numeric", "character", "factor", "ordered")) + ) %>>% default_rf() + surrogate = GraphLearner$new(surrogate) + surrogate$train(task) + + # # store the data.table format for later use in predict.function + # prototype = archive_data[0, archive$cols_x, with = FALSE] + + # new data to compute PDP + # https://github.com/automl/DeepCAVE/blob/58d6801508468841eda038803b12fa2bbf7a0cb8/deepcave/plugins/hyperparameter/pdp.py#L334 + samples_per_param = 10 + num_samples = samples_per_param * nrow(archive_data) + max_samples = 10000 + if (num_samples > max_samples) { + num_samples = max_samples + } + + pdp_data = generate_design_random(archive$search_space, n = num_samples)$data + # same type conversion as above + pdp_data[, archive$cols_x := lapply(.SD, function(col) { + if (is.logical(col)) return(factor(col, levels = c(FALSE, TRUE))) + if (is.integer(col)) return(as.numeric(col)) + return(col) + }), .SDcols = archive$cols_x] + pdp_data_types = pdp_data[, lapply(.SD, storage.mode)] + + predictor = iml::Predictor$new( + model = surrogate, + data = pdp_data[, archive$cols_x, with = FALSE], + predict.function = function(model, newdata) { + model$predict_newdata(newdata)$response + } + ) + + eff = iml::FeatureEffect$new( + predictor, + c(x, y), + method = "pdp", + ... + ) + + .data = NULL + + g = switch(type, + + contour = ggplot2::ggplot(eff$results, ggplot2::aes( + x = .data[[x]], y = .data[[y]], z = .data$.value + )) + + ggplot2::geom_contour_filled() + + ggplot2::scale_fill_viridis_d(), + + # FIXME: rug = TRUE causes error when, e.g., x = "svm.cost", y = "svm.degree" + # related to the problem that degree is missing for some instances? + default = eff$plot(rug = FALSE) + + ggplot2::scale_fill_viridis_c(name = archive$cols_y) + ) + + # TBD: remove existing scales, use viridis instead + g + theme +} + + +#' @title Pareto Front +#' +#' @description Plots the Pareto front with x-axis representing the tuning objective (e.g. `"classif.ce`) and y-axis representing time (the `runtime_learners` column in the archive). +#' +#' @template param_instance +#' @template param_theme +#' +#' @export +pareto_front = function(instance, theme = ggplot2::theme_minimal()) { + # adopted from `Archive$best()` for multi-crit + archive = instance$archive + tab = archive$finished_data + ymat = t(as.matrix(tab[, c(archive$cols_y, "runtime_learners"), with = FALSE])) + ymat = archive$codomain$maximization_to_minimization * ymat + best = tab[!bbotk::is_dominated(ymat)] + + .data = NULL + ggplot2::ggplot() + + ggplot2::geom_point(data = archive$data, + ggplot2::aes(x = .data[[archive$cols_y]], y = .data$runtime_learners), + alpha = 0.2 + ) + + ggplot2::geom_step(data = best, + ggplot2::aes(x = .data[[archive$cols_y]], y = .data$runtime_learners) + ) + + theme +} diff --git a/R/visualization_app.R b/R/visualization_app.R new file mode 100644 index 0000000..cc7e0c8 --- /dev/null +++ b/R/visualization_app.R @@ -0,0 +1,175 @@ +#' @title Shiny App for Visualizing AutoML Results +#' +#' @param instance (`[mlr3tuning::TuningInstanceAsyncSingleCrit]`) +#' @export +visualize = function(instance) { + archive = instance$archive + param_ids = archive$cols_x + learner_ids = unique(archive$data$branch.selection) + + ui = bslib::page_navbar( + id = "nav", + title = "Visualization for mlr3automl", + + sidebar = bslib::sidebar( + shiny::conditionalPanel( + "input.nav === 'Cost Over Time'", + shiny::radioButtons("cot_x", + label = "Select x-axis:", + choices = c("configuration ID", "timestamp_xs", "timestamp_ys") + ) + ), + param_panel( + "input.nav === 'Marginal Plots'", + "mp", + learner_ids, + param_ids + ), + shiny::conditionalPanel( + "input.nav === 'Parallel Coordinates'", + shiny::selectInput("pc_branch", + label = "Select branch:", + choices = learner_ids + ), + shiny::checkboxGroupInput("pc_cols_x", + label = "Select hyperparameters to plot:", + choices = param_ids, + # select all by default + selected = param_ids + ), + shiny::actionButton("pc_unselect_all", + label = "Unselect all" + ), + shiny::actionButton("pc_select_all", + label = "Select all" + ), + shiny::radioButtons("pc_trafo", + label = "Apply transformation?", + choices = list("No", "Yes"), + selected = "No", + inline = TRUE + ) + ), + param_panel( + "input.nav === 'Partial Dependence Plots'", + "pdp", + learner_ids, + param_ids, + shiny::actionButton("pdp_process", + label = "Process" + ) + ) + ), + + bslib::nav_panel( + "Cost Over Time", + bslib::card(shiny::plotOutput("cost_over_time")) + ), + bslib::nav_panel( + "Marginal Plots", + bslib::card(shiny::plotOutput("marginal_plot")) + ), + bslib::nav_panel( + "Parallel Coordinates", + bslib::card(shiny::plotOutput("parallel_coordinates")) + ), + bslib::nav_panel( + "Partial Dependence Plots", + bslib::card(shiny::plotOutput("pdp")) + ), + bslib::nav_panel( + "Pareto Front", + bslib::card(shiny::plotOutput("pf")) + ) + ) + + server = function(input, output, session) { + session$onSessionEnded(shiny::stopApp) + + + # Cost over time + output$cost_over_time = shiny::renderPlot({ + if (input$cot_x == "configuration ID") { + cost_over_time(instance) + } else { + cost_over_time(instance, time = input$cot_x) + } + }) + + + # Marginal plots + shiny::observeEvent(input$mp_branch, { + selectable_ids = param_ids[startsWith(param_ids, input$mp_branch)] + shiny::updateSelectInput(session, "mp_x", choices = selectable_ids, selected = selectable_ids[[1]]) + shiny::updateSelectInput(session, "mp_y", choices = selectable_ids, selected = selectable_ids[[2]]) + }) + + output$marginal_plot = shiny::renderPlot({ + if (input$mp_y == "NULL") { + marginal_plot(instance, x = input$mp_x) + } else { + marginal_plot(instance, x = input$mp_x, y = input$mp_y) + } + }) + + + # Parallel Coordinates + shiny::observeEvent(input$pc_branch, { + selectable_ids = param_ids[startsWith(param_ids, input$pc_branch)] + shiny::updateCheckboxGroupInput(session, + "pc_cols_x", + choices = selectable_ids, + # select all by default + selected = selectable_ids + ) + }) + + output$parallel_coordinates = shiny::renderPlot({ + if (is.null(input$pc_cols_x)) return() # nothing selected + trafo = input$pc_trafo == "Yes" + parallel_coordinates(instance, cols_x = input$pc_cols_x, trafo = trafo) + }) + + shiny::observeEvent(input$pc_unselect_all, { + selectable_ids = param_ids[startsWith(param_ids, input$pc_branch)] + shiny::updateCheckboxGroupInput(session, "pc_cols_x", choices = selectable_ids, selected = NULL) + }) + + shiny::observeEvent(input$pc_select_all, { + selectable_ids = param_ids[startsWith(param_ids, input$pc_branch)] + shiny::updateCheckboxGroupInput(session, "pc_cols_x", choices = selectable_ids, selected = selectable_ids) + }) + + + # Partial Dependence Plots + shiny::observeEvent(input$pdp_branch, { + selectable_ids = param_ids[startsWith(param_ids, input$pdp_branch)] + shiny::updateSelectInput(session, "pdp_x", choices = selectable_ids, selected = selectable_ids[[1]]) + shiny::updateSelectInput(session, "pdp_y", choices = selectable_ids, selected = selectable_ids[[2]]) + }) + + # generate plot only after pressing the "Process" button + # because it takes quite a while... + output$pdp = shiny::bindEvent( + shiny::renderPlot({ + if (is.null(input$pdp_x)) return() + progress <- shiny::Progress$new() + on.exit(progress$close()) + progress$set(message = "Making plot. Please wait.") + partial_dependence_plot( + instance, x = input$pdp_x, y = input$pdp_y, + type = "default" + ) + }), + input$pdp_process + ) + + + # Pareto Front + output$pf = shiny::renderPlot({ + pareto_front(instance) + }) + } + + shiny::shinyApp(ui = ui, server = server) +} diff --git a/man-roxygen/param_instance.R b/man-roxygen/param_instance.R new file mode 100644 index 0000000..64750c5 --- /dev/null +++ b/man-roxygen/param_instance.R @@ -0,0 +1,3 @@ +#' @param instance (`[TuningInstanceAsyncSingleCrit]`)\cr +#' Single-criterion tuning instance with Rush. +#' For [mlr3automl] learners, the tuning instance is stored in the field `$instance`. diff --git a/man-roxygen/param_theme.R b/man-roxygen/param_theme.R new file mode 100644 index 0000000..f976c95 --- /dev/null +++ b/man-roxygen/param_theme.R @@ -0,0 +1,2 @@ +#' @param theme ([ggplot2::theme()])\cr +#' The [ggplot2::theme_minimal()] is applied by default to all plots. diff --git a/man/cost_over_time.Rd b/man/cost_over_time.Rd new file mode 100644 index 0000000..14d52a9 --- /dev/null +++ b/man/cost_over_time.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{cost_over_time} +\alias{cost_over_time} +\title{Cost-Over-Time Plot} +\usage{ +cost_over_time(instance, time = NULL, theme = ggplot2::theme_minimal()) +} +\arguments{ +\item{instance}{(\verb{[TuningInstanceAsyncSingleCrit]})\cr +Single-criterion tuning instance with Rush. +For \link{mlr3automl} learners, the tuning instance is stored in the field \verb{$instance}.} + +\item{time}{(\code{character(1)})\cr +Column in the archive to be interpreted as the time variable, e.g. "timestamp_xs", "timestamp_ys". +If \code{NULL} (default), the configuration ID will be used.} + +\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr +The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} +} +\description{ +Plots the cost (objective) over time, where the time variable can be set by the user. +} diff --git a/man/marginal_plot.Rd b/man/marginal_plot.Rd new file mode 100644 index 0000000..e5ea290 --- /dev/null +++ b/man/marginal_plot.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{marginal_plot} +\alias{marginal_plot} +\title{Marginal Plot} +\usage{ +marginal_plot(instance, x, y = NULL, theme = ggplot2::theme_minimal()) +} +\arguments{ +\item{instance}{(\verb{[TuningInstanceAsyncSingleCrit]})\cr +Single-criterion tuning instance with Rush. +For \link{mlr3automl} learners, the tuning instance is stored in the field \verb{$instance}.} + +\item{x}{(\code{character(1)}) +Name of the parameter to be mapped to the x-axis.} + +\item{y}{(\code{character(1)}) +Name of the parameter to be mapped to the y-axis. +If \code{NULL} (default), the measure (e.g. \code{classif.ce}) is mapped to the y-axis.} + +\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr +The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} +} +\description{ +Creates 2D marginal plots for evaluated configurations. +} diff --git a/man/mlr3automl-package.Rd b/man/mlr3automl-package.Rd index 6fd4cfd..b6d272c 100644 --- a/man/mlr3automl-package.Rd +++ b/man/mlr3automl-package.Rd @@ -24,6 +24,7 @@ Useful links: Authors: \itemize{ \item Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) + \item Baisu Zhou \email{baisu.zhou@outlook.com} } } diff --git a/man/parallel_coordinates.Rd b/man/parallel_coordinates.Rd new file mode 100644 index 0000000..699ce67 --- /dev/null +++ b/man/parallel_coordinates.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{parallel_coordinates} +\alias{parallel_coordinates} +\title{Parallel Coordinates Plot} +\usage{ +parallel_coordinates( + instance, + cols_x = NULL, + trafo = FALSE, + theme = ggplot2::theme_minimal() +) +} +\arguments{ +\item{instance}{(\verb{[TuningInstanceAsyncSingleCrit]})\cr +Single-criterion tuning instance with Rush. +For \link{mlr3automl} learners, the tuning instance is stored in the field \verb{$instance}.} + +\item{cols_x}{(\code{character()}) +Column names of x values. +By default, all untransformed x values from the search space are plotted.} + +\item{trafo}{(\code{character(1)}) +If \code{FALSE} (default), the untransformed x values are plotted. +If \code{TRUE}, the transformed x values are plotted.} + +\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr +The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} +} +\description{ +Adapted from \code{\link[mlr3viz:reexports]{mlr3viz::autoplot()}} with \code{type == "parallel"}. Since the hyperparameters of each individual learner are conditioned on \code{branch.selection}, missing values are expected in the archive data. When standardizing the hyperparameter values (referred to as "x values" in the following to be consistent with \code{mlr3viz} documentation), \code{na.omit == TRUE} is used to compute \code{mean()} and \code{stats::sd()}. +} diff --git a/man/param_panel.Rd b/man/param_panel.Rd new file mode 100644 index 0000000..63aae15 --- /dev/null +++ b/man/param_panel.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers_app.R +\name{param_panel} +\alias{param_panel} +\title{Custom conditionalPanel for hyperparameter selection} +\usage{ +param_panel(condition, prefix, learner_ids, param_ids, ...) +} +\arguments{ +\item{condition}{(\code{character(1)})\cr +Passed to the \code{condition} argument of \verb{[shiny::conditionalPanel]}.} + +\item{prefix}{(\code{character(1)})\cr +Prefix of input slot names.} + +\item{learner_ids}{(\code{character()})\cr +Vector of all possible learner/branch IDs.} + +\item{param_ids}{(\code{character()})\cr +Vector of all possible param IDs.} + +\item{...}{(anything) +Additional arguments passed to \verb{[shiny::conditionalPanel]}.} +} +\description{ +Used for Marginal Plots and Partial Dependence Plots. +} diff --git a/man/pareto_front.Rd b/man/pareto_front.Rd new file mode 100644 index 0000000..c58f17c --- /dev/null +++ b/man/pareto_front.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{pareto_front} +\alias{pareto_front} +\title{Pareto Front} +\usage{ +pareto_front(instance, theme = ggplot2::theme_minimal()) +} +\arguments{ +\item{instance}{(\verb{[TuningInstanceAsyncSingleCrit]})\cr +Single-criterion tuning instance with Rush. +For \link{mlr3automl} learners, the tuning instance is stored in the field \verb{$instance}.} + +\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr +The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} +} +\description{ +Plots the Pareto front with x-axis representing the tuning objective (e.g. \verb{"classif.ce}) and y-axis representing time (the \code{runtime_learners} column in the archive). +} diff --git a/man/partial_dependence_plot.Rd b/man/partial_dependence_plot.Rd new file mode 100644 index 0000000..f4c76ea --- /dev/null +++ b/man/partial_dependence_plot.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{partial_dependence_plot} +\alias{partial_dependence_plot} +\title{Partial Dependence Plot} +\usage{ +partial_dependence_plot( + instance, + x, + y, + type = "default", + theme = ggplot2::theme_minimal(), + ... +) +} +\arguments{ +\item{instance}{(\verb{[TuningInstanceAsyncSingleCrit]})\cr +Single-criterion tuning instance with Rush. +For \link{mlr3automl} learners, the tuning instance is stored in the field \verb{$instance}.} + +\item{x}{(\code{character(1)}) +Name of the parameter to be mapped to the x-axis.} + +\item{y}{(\code{character(1)}) +Name of the parameter to be mapped to the y-axis.} + +\item{type}{(\code{character(1)}) +Type of the two-parameter partial dependence plot. Possible options are listed below. +\itemize{ +\item \code{"default"}: Use the default setting in \code{iml}. +\item \code{"contour"}: Create a contour plot using \verb{[ggplot2::geom_contour_filled]}. Only supported if both parameters are numerical. +} +Ignored if only one parameter is provided.} + +\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr +The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} + +\item{...}{(anything) +Arguments passed to \verb{[iml::FeatureEffect]}.} +} +\description{ +Creates a partial dependenc plot (PDP) via the \verb{[iml]} package. +} diff --git a/man/visualize.Rd b/man/visualize.Rd new file mode 100644 index 0000000..3e07cef --- /dev/null +++ b/man/visualize.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization_app.R +\name{visualize} +\alias{visualize} +\title{Shiny App for Visualizing AutoML Results} +\usage{ +visualize(instance) +} +\arguments{ +\item{instance}{(\verb{[mlr3tuning::TuningInstanceAsyncSingleCrit]})} +} +\description{ +Shiny App for Visualizing AutoML Results +} diff --git a/tests/testthat/test_visualization.R b/tests/testthat/test_visualization.R new file mode 100644 index 0000000..05c7344 --- /dev/null +++ b/tests/testthat/test_visualization.R @@ -0,0 +1,144 @@ +skip_on_cran() +skip_if_not_installed("rush") +skip_if_not_installed(c("glmnet", "kknn", "ranger", "e1071")) + +# cost over time +test_that("cost over time works", { + task = tsk("penguins") + + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner = lrn("classif.auto_ranger", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner$train(task) + + vdiffr::expect_doppelganger("cot-config-id", cost_over_time(learner$instance)) + vdiffr::expect_doppelganger("cot-timestamp-xs", cost_over_time(learner$instance, time = "timestamp_xs")) + vdiffr::expect_doppelganger("cot-timestamp-ys", cost_over_time(learner$instance, time = "timestamp_ys")) +}) + + +# marginal plots +test_that("marginal plot works", { + task = tsk("penguins") + + # numeric vs numeric + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner_glmnet = lrn("classif.auto_glmnet", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner_glmnet$train(task) + vdiffr::expect_doppelganger( + "mp-numeric-numeric", + marginal_plot(learner_glmnet$instance, x = "glmnet.alpha", y = "glmnet.s") + ) + vdiffr::expect_doppelganger( + "mp-numeric-numeric2", + marginal_plot(learner_glmnet$instance, x = "glmnet.s", y = "glmnet.alpha") + ) + + # numeric vs factor + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner_kknn = lrn("classif.auto_kknn", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner_kknn$train(task) + vdiffr::expect_doppelganger( + "mp-numeric-factor", + marginal_plot(learner_kknn$instance, x = "kknn.distance", y = "kknn.kernel") + ) + vdiffr::expect_doppelganger( + "mp-factor-numeric", + marginal_plot(learner_kknn$instance, x = "kknn.kernel", y = "kknn.distance") + ) + + # numeric vs logical + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner_ranger = lrn("classif.auto_ranger", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner_ranger$train(task) + vdiffr::expect_doppelganger( + "mp-numeric-logical", + marginal_plot(learner_ranger$instance, x = "ranger.num.trees", y = "ranger.replace") + ) + vdiffr::expect_doppelganger( + "mp-logical-numeric", + marginal_plot(learner_ranger$instance, x = "ranger.replace", y = "ranger.num.trees") + ) +}) + +test_that("marginal plot accepts params on different branches", { + task = tsk("penguins") + + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner = lrn("classif.auto", + learner_ids = c("kknn", "svm"), + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner$train(task) + vdiffr::expect_doppelganger( + "mp-different-branches", + marginal_plot(learner$instance, x = "kknn.distance", y = "svm.cost") + ) +}) + +test_that("marginal plot handles dependence", { + task = tsk("penguins") + + set.seed(1453) + flush_redis() + rush_plan(n_workers = 2) + + learner_svm = lrn("classif.auto_svm", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + learner_svm$train(task) + vdiffr::expect_doppelganger( + "mp-dependence", + marginal_plot(learner_svm$instance, x = "svm.kernel", y = "svm.degree") + ) +}) + + +# parallel coordinates + +# pdp + +# pareto front + +# footprint +