diff --git a/DESCRIPTION b/DESCRIPTION index 3fc895881..52c168942 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -88,6 +88,10 @@ Suggests: vtreat, future, htmlwidgets, + uwot (>= 0.2.1), + RcppAnnoy, + RcppHNSW, + rnndescent, ranger ByteCompile: true Encoding: UTF-8 @@ -166,6 +170,7 @@ Collate: 'PipeOpThreshold.R' 'PipeOpTrafo.R' 'PipeOpTuneThreshold.R' + 'PipeOpUMAP.R' 'PipeOpUnbranch.R' 'PipeOpVtreat.R' 'PipeOpYeoJohnson.R' diff --git a/NAMESPACE b/NAMESPACE index 6d8c22381..a5538175d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -109,6 +109,7 @@ export(PipeOpTaskPreprocSimple) export(PipeOpTextVectorizer) export(PipeOpThreshold) export(PipeOpTuneThreshold) +export(PipeOpUMAP) export(PipeOpUnbranch) export(PipeOpVtreat) export(PipeOpYeoJohnson) diff --git a/NEWS.md b/NEWS.md index fc42cae16..4f583d8c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mlr3pipelines 0.6.1 * New PipeOp `PipeOpRowApply` / `po("rowapply")` +* New PipeOp `PipeOpUMAP` / `po("umap")` # mlr3pipelines 0.6.0 diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R new file mode 100644 index 000000000..49798bbb2 --- /dev/null +++ b/R/PipeOpUMAP.R @@ -0,0 +1,438 @@ +#' @title Uniform Manifold Approximation and Projection (UMAP) +#' +#' @usage NULL +#' @name mlr_pipeops_umap +#' @format [`R6Class`] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP). +#' See [uwot::umap2()] for details. +#' +#' @section Construction: +#' ``` +#' PipeOpUMAP$new(id = "umap", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"umap"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output is the input [`Task`][mlr3::Task] with all affected numeric features replaced by their principal components. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the list +#' returned from [uwot::umap2]. +#' These are in particular: +#' * `embedding` :: `matrix`\cr +#' Matrix of embedded coordinates. +#' * `scale_info` :: named `list()`\cr +#' If `scale`is `TRUE`, this gives the scaling attributes (`center`, `scale`, `nzvcols`) of the scaled data. +#' * `search_k` :: `numeric(1)`\cr +#' Number of nodes searched during the neighbor retrieval. Only used if the `nn_method` is `"annoy"`. +#' For details, see [uwot::umap2()]. +#' * `local_connectivity` :: `numeric(1)`\cr +#' Used local connectivity – i.e. the number of nearest neighbors that should be +#' assumed to be connected at a local level. For details, see [uwot::umap2()]. +#' * `n_epochs` :: `numeric(1)`\cr +#' Number of epochs used during the optimization of the embedded coordinates. For details, see [uwot::umap2()]. +#' * `alpha` :: `numeric(1)`\cr +#' Initial learning rate. For details, see [uwot::umap2()]. +#' * `negative_sample_rate` :: `numeric(1)`\cr +#' The number of negative edge/1-simplex samples used per positive edge/1-simplex sample +#' in optimizing the low dimensional embedding. For details, see [uwot::umap2()]. +#' * `method` :: `character(1)`\cr +#' General method used for dimensionality reduction, is always `"umap"` for this PipeOp. +#' * `a` :: named `numeric(1)`\cr +#' More specific parameters controlling the embedding. For details, see [uwot::umap2()]. +#' * `b` :: named `numeric(1)`\cr +#' More specific parameters controlling the embedding. For details, see [uwot::umap2()]. +#' * `gamma` :: `numeric(1)`\cr +#' Repulsion strength. Weighting applied to negative samples in low dimensional embedding optimization. +#' For details, see [uwot::umap2()]. +#' * `approx_pow` :: `logical(1)`\cr +#' If `TRUE`, use an approximation to the power function in the UMAP gradient. For details, see [uwot::umap2()]. +#' * `metric` :: named `list()`\cr +#' Type of distance metric used to find nearest neighbors. For details, see [uwot::umap2()]. +#' * `norig_col` :: `integer(1)`\cr +#' Number of original columns. +#' * `pcg_rand` :: `logical(1)`\cr +#' `TRUE`, if the PCG random number generator (O'Neill, 2014) was used during optimization. +#' Otherwise, Tausworthe "taus88" generator was used. For details, see [uwot::umap2()]. +#' * `batch` :: `logical(1)`\cr +#' `TRUE`, if embedding coordinates were updated at the end of each epoch rather +#' than during the epoch. For details, see [uwot::umap2()]. +#' * `opt_args` :: named `list()`\cr +#' Optimizer parameters, used when `batch = TRUE`. For details, see [uwot::umap2()]. +#' * `num_precomputed_nns` :: `numeric(1)`\cr +#' Number of precomputed nearest neighbors, via `nn_method`. +#' * `min_dist` :: `numeric(1)`\cr +#' The effective minimum distance between embedded points. For details, see [uwot::umap2()]. +#' * `spread` :: `numeric(1)`\cr +#' The effective scale of embedded points. For details, see [uwot::umap2()]. +#' * `binary_edge_weights` :: `logical(1)`\cr +#' If `TRUE` then edge weights in the input graph were treated as binary (0/1) rather than real valued. +#' For details, see [uwot::umap2()]. +#' * `seed` :: `integer(1)`\cr +#' Integer seed to use to initialize the random number generator state. For details, see [uwot::umap2()]. +#' * `nn_method` :: `any`\cr +#' Method for finding nearest neighbors. For details, see [uwot::umap2()]. +#' * `nn_args` :: `list()`\cr +#' A list containing additional arguments to pass to the nearest neighbor method. For details, see [uwot::umap2()]. +#' * `n_neighbors` :: `numeric(1)`\cr +#' The size of the neighborhood used for manifold approximation. For details, see [uwot::umap2()]. +#' * `nn_index` :: named `list()`\cr +#' Nearest neighbor index that can be used for transformation of new data points. +#' * `pca_models` :: `list()`\cr +#' Used PCA models for initialization, `pca` is specified. For details, see [uwot::umap2()]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: +#' * `n_neighbors` :: `integer(1)`\cr +#' The size of the neighborhood used for manifold approximation. Default is `15`. +#' For details, see [uwot::umap2()]. +#' * `n_components` :: `integer(1)`\cr +#' The dimension of the space to embed into. Default is `2`. For details, see [uwot::umap2()]. +#' * `metric` :: `character(1)`\cr +#' Type of distance metric to use to find nearest neighbors. Default is `"euclidean"`. +#' For details, see [uwot::umap2()]. +#' * `n_epochs` :: `integer(1)`\cr +#' Number of epochs to use during the optimization of the embedded coordinates. Default is `NULL`. +#' For details, see [uwot::umap2()]. +#' * `learning_rate` :: `numeric(1)`\cr +#' Initial learning rate used in optimization of the coordinates. Default is `1`. +#' For details, see [uwot::umap2()]. +#' * `scale` :: `logical(1)` / `character(1)`\cr +#' Scaling to apply to the data. If `TRUE`, data is standardized. Default is `FALSE`. For details, see [uwot::umap2()]. +#' * `init` :: `character(1)`\cr +#' Type of initialization for the coordinates. May be set to `"custom"`, in which case the `matrix` of initial +#' coordinates passed to `init_custom` is used. Default is `"spectral"`. For details, see [uwot::umap2()]. +#' * `init_custom` :: `matrix`\cr +#' Matrix of initial coordinates. Only used, if `init` is `"custom"`. +#' * `init_sdev` :: `character(1)` | `numeric(1)`\cr +#' Scales each dimension of the initialized coordinates to this standard deviation. +#' Default is `"range"`. For details, see [uwot::umap2()]. +#' * `spread` :: `numeric(1)`\cr +#' The effective scale of embedded points. Default is `1`. For details, see [uwot::umap2()]. +#' * `min_dist` :: `numeric(1)`\cr +#' The effective minimum distance between embedded points. Default is `0.01`. +#' For details, see [uwot::umap2()]. +#' * `set_op_mix_ratio` :: `numeric(1)`\cr +#' Interpolate between (fuzzy) union and intersection as the set operation used to +#' combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is `1`. +#' For details, see [uwot::umap2()]. +#' * `local_connectivity` :: `numeric(1)`\cr +#' The local connectivity required – i.e. the number of nearest neighbors that should be +#' assumed to be connected at a local level. Default is `1`. For details, see [uwot::umap2()]. +#' * `bandwidth` :: `numeric(1)`\cr +#' The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps. +#' Default is `1`. For details, see [uwot::umap2()]. +#' * `repulsion_strength` :: `numeric(1)`\cr +#' Weighting applied to negative samples in low dimensional embedding optimization. +#' Default is `1`. For details, see [uwot::umap2()]. +#' * `negative_sample_rate` :: `numeric(1)`\cr +#' The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample +#' in optimizing the low dimensional embedding. Default is `5`. For details, see [uwot::umap2()]. +#' * `a` :: `numeric(1)`\cr +#' More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()]. +#' * `b` :: `numeric(1)`\cr +#' More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()]. +#' * `nn_method` :: `character(1)`\cr +#' Method for finding nearest neighbors. Note that only values compatible with [uwot::umap_transform()] are allowed. +#' Default is `NULL`. For details, see [uwot::umap2()]. +#' * `n_trees` :: `integer(1)`\cr +#' Number of trees to build when constructing the nearest neighbor index. Default is `50`. +#' For details, see [uwot::umap2()]. +#' * `search_k` :: `integer(1)`\cr +#' Number of nodes to search during the neighbor retrieval. Only used if the `nn_method` is `"annoy"`. +#' For details, see [uwot::umap2()]. +#' * `approx_pow` :: `logical(1)`\cr +#' If `TRUE`, use an approximation to the power function in the UMAP gradient. Default is `FALSE`. +#' For details, see [uwot::umap2()]. +#' `use_supervised` :: `logical(1)`\cr +#' If `TRUE`, perform supervised dimension reduction. This is done by passing the task's target to [uwot::umap2()]'s `y` argument. +#' For details, see there. Initialized to `FALSE`. +#' * `target_n_neighbors` :: `integer(1)`\cr +#' Number of nearest neighbors to use to construct the target simplicial set. Only used when performing supervised dimension reduction. +#' Default is `n_neighbors`. For details, see [uwot::umap2()]. +#' * `target_metric` :: `character(1)`\cr +#' The metric used to measure distance for the task's target when performing supervised dimension reduction. +#' For details, see [uwot::umap2()]. +#' * `target_weight` :: `numeric(1)`\cr +#' Weighting factor between data topology and target topology. Only used when performing supervised dimension reduction. +#' Default is `0.5`. For details, see [uwot::umap2()]. +#' * `pca` :: `integer(1)`\cr +#' Reduce data to this number of columns using PCA. Default is `NULL`. +#' For details, see [uwot::umap2()]. +#' * `pca_center` :: `logical(1)`\cr +#' If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`. +#' For details, see [uwot::umap2()]. +#' * `pcg_rand` :: `logical(1)`\cr +#' If `TRUE`, use the PCG random number generator (O'Neill, 2014) during optimization. +#' Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator. +#' Default is `TRUE`. For details, see [uwot::umap2()]. +#' * `fast_sgd` :: `logical(1)`\cr +#' If `TRUE`, then the following combination of parameters is set: +#' * `pcg_rand = TRUE` +#' * `n_sgd_threads = "auto"` +#' * `approx_pow = TRUE` +#' Default is `FALSE`. For details, see [uwot::umap2()]. +#' * `n_threads` :: `integer(1)`\cr +#' Number of threads to use. Default is `NULL`. For details, see [uwot::umap2()]. +#' * `n_sgd_threads` :: `integer(1)`\cr +#' Number of threads to use during stochastic gradient descent. Default is `0`. +#' For details, see [uwot::umap2()]. +#' * `grain_size` :: `integer(1)`\cr +#' The minimum amount of work to do on each thread. Default is `1`. +#' For details, see [uwot::umap2()]. +#' * `verbose` :: `logical(1)`\cr +#' Should details be printed? Initialized to `FALSE`. For details, see [uwot::umap2()]. +#' * `batch` :: `logical(1)`\cr +#' If `TRUE`, then embedding coordinates are updated at the end of each epoch rather +#' than during the epoch. Default is `TRUE`. For details, see [uwot::umap2()]. +#' * `opt_args` :: named `list()`\cr +#' A list of optimizer parameters, used when `batch = TRUE`. Default is `NULL`. +#' For details, see [uwot::umap2()]. +#' * `epoch_callback` :: `function`\cr +#' A function which will be invoked at the end of every epoch. Default is `NULL`. +#' For details, see [uwot::umap2()]. +#' * `pca_method` :: `character(1)`\cr +#' Method to carry out any PCA dimensionality reduction when the `pca` is specified. +#' Default is `NULL`. For details, see [uwot::umap2()]. +#' * `binary_edge_weights` :: `logical(1)`\cr +#' If `TRUE` then edge weights in the input graph are treated as binary (0/1) rather than real valued. +#' Default is `FALSE`. For details, see [uwot::umap2()]. +#' * `dens_scale` :: `numeric(1)`\cr +#' A scaling factor to apply to the density of the input data. Default is `NULL`. +#' For details, see [uwot::umap2()]. +#' * `seed` :: `integer(1)`\cr +#' Integer seed to use to initialize the random number generator state. +#' Default is `NULL`. For details, see [uwot::umap2()]. +#' * `nn_args` :: named `list()`\cr +#' A list containing additional arguments to pass to the nearest neighbor method. +#' Default is `NULL`. For details, see [uwot::umap2()]. +#' +#' Additionally, there are several parameters that may be used to overwrite parameter values for prediction: +#' * `search_k_transform` :: `integer(1)`\cr +#' Number of nodes to search during the neighbor retrieval when predicting. +#' Only used if `nn_method` is `"annoy"`. If `NULL`, `search_k` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()]. +#' * `n_epochs_transform` :: `integer(1)`\cr +#' Number of epochs used during the optimization of the embedded coordinates when predicting. +#' If `NULL`, `n_epochs` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()]. +#' * `init_transform` :: `character(1)`\cr +#' Type of initialization for the coordinates when predicting. May be set to `"custom"`, in which case the `matrix` of initial +#' coordinates passed to `init_transform_custom` is used. Default is `"weighted"`. For details, see [uwot::umap_transform()]. +#' * `init_transform_custom` :: `matrix`\cr +#' Matrix of initial coordinates when predicting Only used, if `init_transform` is `"custom"`. +#' * `batch_transform` :: `logical(1)`\cr +#' If `TRUE`, embedding coordinates are updated at the end of each epoch rather than during the epoch when predicting. +#' If `NULL`, `batch` is used instead. Default is `FALSE`. For details, see [uwot::umap_transform()]. +#' * `learning_rate_transform` :: `numeric(1)`\cr +#' Initial learning rate used in optimization of the coordinates when predicting. +#' If `NULL`, `learning_rate` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()]. +#' * `epoch_callback_transform` :: `function`\cr +#' A function which will be invoked at the end of every epoch when predicting. +#' Default is `NULL`. For details, see [uwot::umap_transform()]. +#' +#' @section Internals: +#' Uses the [umap2()][uwot::umap2] function. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @references +#' `r format_bib("mcinnes_2018")` +#' +#' @examples +#' \dontshow{ if (requireNamespace("uwot")) \{ } +#' library("mlr3") +#' +#' task = tsk("iris") +#' pop = po("umap") +#' +#' task$data() +#' pop$train(list(task))[[1]]$data() +#' +#' pop$state +#' \dontshow{ \} } +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +PipeOpUMAP = R6Class("PipeOpUMAP", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "umap", param_vals = list()) { + ps = ps( + n_neighbors = p_int(lower = 1L, default = 15L, tags = c("train", "umap")), + n_components = p_int(lower = 1L, default = 2L, tags = c("train", "umap")), + metric = p_fct( + levels = c( + "euclidean", "cosine", "manhattan", "hamming", "correlation", + "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard", + "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener", + "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule" + ), + default = "euclidean", + tags = c("train", "umap") + ), + n_epochs = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + learning_rate = p_dbl(lower = 0, default = 1, tags = c("train", "umap")), + scale = p_fct( + levels = c("none", "scale", "maxabs", "range", "colrange"), + special_vals = list(FALSE, NULL, "Z", TRUE), + default = FALSE, + tags = c("train", "umap") + ), + init = p_fct( + levels = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral"), + special_vals = list("custom"), + default = "spectral", + tags = c("train", "umap") + ), + init_custom = p_uty(custom_check = check_matrix, tags = "train", depends = quote(init == "custom")), + init_sdev = p_dbl(default = "range", special_vals = list("range"), tags = c("train", "umap")), + spread = p_dbl(default = 1, tags = c("train", "umap")), + min_dist = p_dbl(default = 0.01, tags = c("train", "umap")), + set_op_mix_ratio = p_dbl(lower = 0, upper = 1, default = 1, tags = c("train", "umap")), + local_connectivity = p_dbl(lower = 1, default = 1, tags = c("train", "umap")), + bandwidth = p_dbl(default = 1, tags = c("train", "umap")), + repulsion_strength = p_dbl(default = 1, tags = c("train", "umap")), + negative_sample_rate = p_dbl(default = 5, tags = c("train", "umap")), + a = p_dbl(default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + b = p_dbl(default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + nn_method = p_fct(levels = c("annoy", "hnsw", "nndescent"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + n_trees = p_int(lower = 1L, default = 50L, tags = c("train", "umap"), depends = quote(nn_method == "annoy")), + search_k = p_int(tags = c("train", "umap"), depends = quote(nn_method == "annoy")), + # approx_pow is only used if dens_scale is non-NULL + approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")), + use_supervised = p_lgl(default = FALSE, tags = c("train")), + target_n_neighbors = p_int(tags = c("train", "umap"), depends = quote(use_supervised == TRUE)), + target_metric = p_fct( + levels = c( + "euclidean", "cosine", "manhattan", "hamming", "correlation", + "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard", + "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener", + "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule" + ), + default = "euclidean", + tags = c("train", "umap"), + depends = quote(use_supervised == TRUE) + ), + target_weight = p_dbl(lower = 0, upper = 1, default = 0.5, tags = c("train", "umap"), depends = quote(use_supervised == TRUE)), + # pca is ignored if metric is "hamming" + pca = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap"), + depends = quote(metric %in% c( + "euclidean", "cosine", "manhattan", "correlation", + "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard", + "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener", + "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule" + ))), + # pca_center is only used if pca is specified + pca_center = p_lgl(default = TRUE, tags = c("train", "umap")), + pcg_rand = p_lgl(default = TRUE, tags = c("train", "umap")), + fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")), + n_threads = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "predict", "umap")), + n_sgd_threads = p_int(lower = 0L, default = 0L, special_vals = list("auto"), tags = c("train", "predict", "umap")), + grain_size = p_int(lower = 1L, default = 1L, tags = c("train", "predict", "umap")), + verbose = p_lgl(default = TRUE, tags = c("train", "predict", "umap")), + batch = p_lgl(default = TRUE, tags = c("train", "umap")), + opt_args = p_uty( + default = NULL, + tags = c("train", "umap"), + custom_check = crate(function(x) check_list(x, types = c("numeric", "character"), min.len = 1, max.len = 5, + names = "unique", null.ok = TRUE)), + depends = quote(batch == TRUE) + ), + epoch_callback = p_uty( + default = NULL, + tags = c("train", "umap"), + custom_check = crate(function(x) check_function(x, args = c("epochs", "n_epochs", "coords"), null.ok = TRUE)) + ), + # pca_method is only used if pca is specified + pca_method = p_fct(c("irlba", "rsvd", "bigstatsr", "svd", "auto"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + binary_edge_weights = p_lgl(default = FALSE, tags = c("train", "umap")), + dens_scale = p_dbl(lower = 0, upper = 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "umap")), + nn_args = p_uty( + default = NULL, + tags = c("train", "umap"), + custom_check = crate(function(x) check_list(x, types = c("integer", "numeric", "character"), + min.len = 1, max.len = 8, names = "unique", null.ok = TRUE)) + ), + # Parameters that are passed to umap_transform to overwrite parameters from training for prediction + search_k_transform = p_int(default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite"), depends = quote(nn_method == "annoy")), + n_epochs_transform = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite")), + init_transform = p_fct(levels = c("weighted", "average"), special_vals = list("custom"), default = "weighted", tags = c("predict", "overwrite")), + init_transform_custom = p_uty(custom_check = check_matrix, tags = "predict", depends = quote(init_transform == "custom")), + batch_transform = p_lgl(default = FALSE, special_vals = list(NULL), tags = c("predict", "overwrite")), + learning_rate_transform = p_dbl(default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite")), + epoch_callback_transform = p_uty( + default = NULL, + tags = c("predict", "overwrite"), + custom_check = crate(function(x) check_function(x, args = c("epochs", "n_epochs", "coords", "fixed_coords"), null.ok = TRUE)) + ) + ) + ps$values = list(verbose = FALSE, use_supervised = FALSE) + + super$initialize(id, param_set = ps, param_vals = param_vals, packages = "uwot", feature_types = c("numeric", "integer")) + } + ), + private = list( + .train_dt = function(dt, levels, target) { + pv = self$param_set$values + pv_args = self$param_set$get_values(tags = c("umap", "train")) + # Indicate that umap2() should return the full model which we need for prediction + pv_args = insert_named(pv_args, list(ret_model = TRUE)) + # Use target for supervised dimension reduction when specified + if (!is.null(pv$use_supervised) && pv$use_supervised) { + pv_args = insert_named(pv_args, list(y = target)) + } + # Use matrix passed to init_custom for initialization when specified + if (!is.null(pv$init) && pv$init == "custom") { + pv_args = insert_named(pv_args, list(init = pv$init_custom)) + } + umap = invoke(uwot::umap2, dt, .args = pv_args) + self$state = umap + umap$embedding + }, + + .predict_dt = function(dt, levels) { + pv = self$param_set$values + pv_args = self$param_set$get_values(tags = c("umap", "predict")) + # Get overwriting params and rename them to the correct argument names for uwot::umap_transform() + overwrite_pv_args = self$param_set$get_values(tags = c("overwrite", "predict")) + names(overwrite_pv_args) <- sub("_transform$", "", names(overwrite_pv_args)) + pv_args = insert_named(pv_args, overwrite_pv_args) + # Use matrix passed to init_transform_custom for initialization when specified + if (!is.null(pv$init_transform) && pv$init_transform == "custom") { + pv_args = insert_named(pv_args, list(init = pv$init_transform_custom)) + } + invoke(uwot::umap_transform, dt, self$state, .args = pv_args) + }, + + # We need to overload deep_clone since state$nn_index$ann is a RefClass if nn_method is "annoy" or "hnsw" + deep_clone = function(name, value) { + if (name == "state" && "NO_OP" %nin% class(value)) { + if (!is.null(value$nn_index)) { + if (methods::is(value$nn_index$ann, "envRefClass")) { + state = value + state$nn_index$ann = value$nn_index$ann$copy() + state + } else { + super$deep_clone(name, value) + } + } else { + super$deep_clone(name, value) + } + } else { + super$deep_clone(name, value) + } + } + ) +) + +mlr_pipeops$add("umap", PipeOpUMAP) diff --git a/R/bibentries.R b/R/bibentries.R index de55741d5..09c994452 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -52,5 +52,18 @@ bibentries = c( author = "Yujun Wu and Dennis D Boos and Leonard A Stefanski", title = "Controlling Variable Selection by the Addition of Pseudovariables", journal = "Journal of the American Statistical Association" + ), + + mcinnes_2018 = bibentry("article", + doi = "10.21105/joss.00861", + year = "2018", + month = "9", + publisher = "The Open Journal", + volume = "3", + number = "29", + author = "Leland McInnes and John Healy and James Melville and Lukas Grossberger", + title = "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction", + journal = "Journal of Open Source Software" ) ) + diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 3458719fd..3d90fb2b5 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -334,6 +334,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 46bc5918b..209921f51 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -166,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index caa76efbb..ec6ad9942 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -196,6 +196,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 8a534ec18..a45c2e5bf 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -207,6 +207,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 69f92477c..fb6499f63 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -262,6 +262,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index d836e75a5..a5b51328d 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -199,6 +199,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index e2a7d1e1a..841bcc558 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index 064a069ca..a3c41ac0d 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index a83b502a1..c8a771ca4 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -168,6 +168,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index 4b98bbd2a..c2a6a778d 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -147,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index 19dcd067e..91172183d 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -188,6 +188,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index 160ba73ab..8717984bf 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -164,6 +164,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index a4666b7e4..2f292bd01 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -156,6 +156,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index bf8065f8e..9a0e23f54 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -177,6 +177,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 91798c99d..e2e90ebda 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -144,6 +144,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index a0e742faa..bcc0fde22 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index a4160342d..9f5825c51 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -166,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index 5028544b2..99f04c302 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -183,6 +183,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 71a13f26f..6ce2f4095 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -179,6 +179,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 7a435e1f2..842ac5bd9 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -161,6 +161,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index ad391725f..3fe1f3ad8 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -176,6 +176,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index a509b87eb..3a33c5181 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -181,6 +181,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index a37d1328a..f7a4d85f4 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -212,6 +212,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 6a4ac569c..7d9a1d757 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index ce133cd8b..6e30e1df2 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -148,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index d6e93d163..cbb0331e9 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -176,6 +176,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index a6ab5d027..bba90ac80 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index c5abb9d72..5f1f45479 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index 4819be20f..677eea62a 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -187,6 +187,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index 64dd29a38..0b117579a 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -135,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index 1f4286c64..d319c3445 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -135,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 9cbcdba06..f01679503 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -142,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index 221912cfb..137893da3 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -164,6 +164,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index d9f4d8f75..1cf208d0e 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -137,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index a9bddd763..647231014 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -151,6 +151,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 43c259806..06f1eea6d 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -182,6 +182,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index b9f8d51da..923133353 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -165,6 +165,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index 1e1b00c2e..0360224a7 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index e4c67c232..9f4639e0e 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -147,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index c07f85bab..cc1f9ca6b 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -153,6 +153,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index 8da58522f..1f342afd9 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -158,6 +158,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 7c8c351df..ce74b22e5 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}. \section{Internals}{ -Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and +Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and \code{\link[MASS:ginv]{ginv()}}. } @@ -193,6 +193,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index fd6fd2ea4..76995c57a 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -143,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 76c661fde..c18d45040 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -160,6 +160,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index f01cba41e..3593995ad 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -155,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index 18f5eb086..1278c7706 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index a5ef51112..5b25c5183 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -166,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 6e5a85a24..4bbb9348a 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -140,6 +140,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index 2323caf66..81eb4b18a 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index c497d3ad2..e3ba830b9 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -169,6 +169,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index a97bde700..fe0702769 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -155,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index ef3d43d75..63b2f8d31 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -145,6 +145,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 20947f1be..cd58d675c 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -144,6 +144,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 71949f16c..d9cad36fa 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -137,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd index 85e0ac30e..e57437129 100644 --- a/man/mlr_pipeops_rowapply.Rd +++ b/man/mlr_pipeops_rowapply.Rd @@ -46,14 +46,13 @@ Function to apply to each row in the affected columns of the task. The return value should be a vector of the same length for every input. Initialized as \code{\link[base:identity]{identity()}}. \item \code{col_prefix} :: \code{character(1)}\cr -If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Default is \code{""}. +If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Initialized as \code{""}. } } \section{Internals}{ -Calls \code{\link{apply}} on the data, using the value of \code{applicator} as \code{FUN} and \code{simplify = TRUE}, then coerces the output via -\code{\link[data.table:as.data.table]{as.data.table()}}. +Calls \code{\link{apply}} on the data, using the value of \code{applicator} as \code{FUN}. } \section{Fields}{ @@ -144,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 9c8a3a316..c6f3a9e9e 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -159,6 +159,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index 46c5c4c45..30b98bcf2 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 678e54b0d..4d47786aa 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index 353e280b0..19bfb3f40 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -155,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index ccbd9c6cd..ada38c14b 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -158,6 +158,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index 9fdb650d6..87aa9ab33 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index c89142226..b7dd8c777 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -149,6 +149,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index fe5073375..bf6bf06b9 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index dd7982fdf..00e39ea82 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -182,6 +182,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index e651099eb..2edbe3df8 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -148,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index 57ab20d9a..3749a96f7 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -248,6 +248,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index addaefa55..527d989d7 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 30684bfb6..0ea642439 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -166,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd new file mode 100644 index 000000000..adf871539 --- /dev/null +++ b/man/mlr_pipeops_umap.Rd @@ -0,0 +1,366 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpUMAP.R +\name{mlr_pipeops_umap} +\alias{mlr_pipeops_umap} +\alias{PipeOpUMAP} +\title{Uniform Manifold Approximation and Projection (UMAP)} +\format{ +\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP). +See \code{\link[uwot:umap2]{uwot::umap2()}} for details. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpUMAP$new(id = "umap", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"umap"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output is the input \code{\link[mlr3:Task]{Task}} with all affected numeric features replaced by their principal components. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the list +returned from \link[uwot:umap2]{uwot::umap2}. +These are in particular: +\itemize{ +\item \code{embedding} :: \code{matrix}\cr +Matrix of embedded coordinates. +\item \code{scale_info} :: named \code{list()}\cr +If \code{scale}is \code{TRUE}, this gives the scaling attributes (\code{center}, \code{scale}, \code{nzvcols}) of the scaled data. +\item \code{search_k} :: \code{numeric(1)}\cr +Number of nodes searched during the neighbor retrieval. Only used if the \code{nn_method} is \code{"annoy"}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{local_connectivity} :: \code{numeric(1)}\cr +Used local connectivity – i.e. the number of nearest neighbors that should be +assumed to be connected at a local level. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_epochs} :: \code{numeric(1)}\cr +Number of epochs used during the optimization of the embedded coordinates. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{alpha} :: \code{numeric(1)}\cr +Initial learning rate. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{negative_sample_rate} :: \code{numeric(1)}\cr +The number of negative edge/1-simplex samples used per positive edge/1-simplex sample +in optimizing the low dimensional embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{method} :: \code{character(1)}\cr +General method used for dimensionality reduction, is always \code{"umap"} for this PipeOp. +\item \code{a} :: named \code{numeric(1)}\cr +More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{b} :: named \code{numeric(1)}\cr +More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{gamma} :: \code{numeric(1)}\cr +Repulsion strength. Weighting applied to negative samples in low dimensional embedding optimization. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{approx_pow} :: \code{logical(1)}\cr +If \code{TRUE}, use an approximation to the power function in the UMAP gradient. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{metric} :: named \code{list()}\cr +Type of distance metric used to find nearest neighbors. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{norig_col} :: \code{integer(1)}\cr +Number of original columns. +\item \code{pcg_rand} :: \code{logical(1)}\cr +\code{TRUE}, if the PCG random number generator (O'Neill, 2014) was used during optimization. +Otherwise, Tausworthe "taus88" generator was used. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{batch} :: \code{logical(1)}\cr +\code{TRUE}, if embedding coordinates were updated at the end of each epoch rather +than during the epoch. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{opt_args} :: named \code{list()}\cr +Optimizer parameters, used when \code{batch = TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{num_precomputed_nns} :: \code{numeric(1)}\cr +Number of precomputed nearest neighbors, via \code{nn_method}. +\item \code{min_dist} :: \code{numeric(1)}\cr +The effective minimum distance between embedded points. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{spread} :: \code{numeric(1)}\cr +The effective scale of embedded points. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{binary_edge_weights} :: \code{logical(1)}\cr +If \code{TRUE} then edge weights in the input graph were treated as binary (0/1) rather than real valued. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{seed} :: \code{integer(1)}\cr +Integer seed to use to initialize the random number generator state. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{nn_method} :: \code{any}\cr +Method for finding nearest neighbors. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{nn_args} :: \code{list()}\cr +A list containing additional arguments to pass to the nearest neighbor method. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_neighbors} :: \code{numeric(1)}\cr +The size of the neighborhood used for manifold approximation. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{nn_index} :: named \code{list()}\cr +Nearest neighbor index that can be used for transformation of new data points. +\item \code{pca_models} :: \code{list()}\cr +Used PCA models for initialization, \code{pca} is specified. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +} +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: +\itemize{ +\item \code{n_neighbors} :: \code{integer(1)}\cr +The size of the neighborhood used for manifold approximation. Default is \code{15}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_components} :: \code{integer(1)}\cr +The dimension of the space to embed into. Default is \code{2}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{metric} :: \code{character(1)}\cr +Type of distance metric to use to find nearest neighbors. Default is \code{"euclidean"}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_epochs} :: \code{integer(1)}\cr +Number of epochs to use during the optimization of the embedded coordinates. Default is \code{NULL}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{learning_rate} :: \code{numeric(1)}\cr +Initial learning rate used in optimization of the coordinates. Default is \code{1}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{scale} :: \code{logical(1)} / \code{character(1)}\cr +Scaling to apply to the data. If \code{TRUE}, data is standardized. Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{init} :: \code{character(1)}\cr +Type of initialization for the coordinates. May be set to \code{"custom"}, in which case the \code{matrix} of initial +coordinates passed to \code{init_custom} is used. Default is \code{"spectral"}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{init_custom} :: \code{matrix}\cr +Matrix of initial coordinates. Only used, if \code{init} is \code{"custom"}. +\item \code{init_sdev} :: \code{character(1)} | \code{numeric(1)}\cr +Scales each dimension of the initialized coordinates to this standard deviation. +Default is \code{"range"}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{spread} :: \code{numeric(1)}\cr +The effective scale of embedded points. Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{min_dist} :: \code{numeric(1)}\cr +The effective minimum distance between embedded points. Default is \code{0.01}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{set_op_mix_ratio} :: \code{numeric(1)}\cr +Interpolate between (fuzzy) union and intersection as the set operation used to +combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is \code{1}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{local_connectivity} :: \code{numeric(1)}\cr +The local connectivity required – i.e. the number of nearest neighbors that should be +assumed to be connected at a local level. Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{bandwidth} :: \code{numeric(1)}\cr +The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps. +Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{repulsion_strength} :: \code{numeric(1)}\cr +Weighting applied to negative samples in low dimensional embedding optimization. +Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{negative_sample_rate} :: \code{numeric(1)}\cr +The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample +in optimizing the low dimensional embedding. Default is \code{5}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{a} :: \code{numeric(1)}\cr +More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{b} :: \code{numeric(1)}\cr +More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{nn_method} :: \code{character(1)}\cr +Method for finding nearest neighbors. Note that only values compatible with \code{\link[uwot:umap_transform]{uwot::umap_transform()}} are allowed. +Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_trees} :: \code{integer(1)}\cr +Number of trees to build when constructing the nearest neighbor index. Default is \code{50}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{search_k} :: \code{integer(1)}\cr +Number of nodes to search during the neighbor retrieval. Only used if the \code{nn_method} is \code{"annoy"}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{approx_pow} :: \code{logical(1)}\cr +If \code{TRUE}, use an approximation to the power function in the UMAP gradient. Default is \code{FALSE}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\code{use_supervised} :: \code{logical(1)}\cr +If \code{TRUE}, perform supervised dimension reduction. This is done by passing the task's target to \code{\link[uwot:umap2]{uwot::umap2()}}'s \code{y} argument. +For details, see there. Initialized to \code{FALSE}. +\item \code{target_n_neighbors} :: \code{integer(1)}\cr +Number of nearest neighbors to use to construct the target simplicial set. Only used when performing supervised dimension reduction. +Default is \code{n_neighbors}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{target_metric} :: \code{character(1)}\cr +The metric used to measure distance for the task's target when performing supervised dimension reduction. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{target_weight} :: \code{numeric(1)}\cr +Weighting factor between data topology and target topology. Only used when performing supervised dimension reduction. +Default is \code{0.5}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{pca} :: \code{integer(1)}\cr +Reduce data to this number of columns using PCA. Default is \code{NULL}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{pca_center} :: \code{logical(1)}\cr +If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{pcg_rand} :: \code{logical(1)}\cr +If \code{TRUE}, use the PCG random number generator (O'Neill, 2014) during optimization. +Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator. +Default is \code{TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{fast_sgd} :: \code{logical(1)}\cr +If \code{TRUE}, then the following combination of parameters is set: +\itemize{ +\item \code{pcg_rand = TRUE} +\item \code{n_sgd_threads = "auto"} +\item \code{approx_pow = TRUE} +Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +} +\item \code{n_threads} :: \code{integer(1)}\cr +Number of threads to use. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{n_sgd_threads} :: \code{integer(1)}\cr +Number of threads to use during stochastic gradient descent. Default is \code{0}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{grain_size} :: \code{integer(1)}\cr +The minimum amount of work to do on each thread. Default is \code{1}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{verbose} :: \code{logical(1)}\cr +Should details be printed? Initialized to \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{batch} :: \code{logical(1)}\cr +If \code{TRUE}, then embedding coordinates are updated at the end of each epoch rather +than during the epoch. Default is \code{TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{opt_args} :: named \code{list()}\cr +A list of optimizer parameters, used when \code{batch = TRUE}. Default is \code{NULL}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{epoch_callback} :: \code{function}\cr +A function which will be invoked at the end of every epoch. Default is \code{NULL}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{pca_method} :: \code{character(1)}\cr +Method to carry out any PCA dimensionality reduction when the \code{pca} is specified. +Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{binary_edge_weights} :: \code{logical(1)}\cr +If \code{TRUE} then edge weights in the input graph are treated as binary (0/1) rather than real valued. +Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{dens_scale} :: \code{numeric(1)}\cr +A scaling factor to apply to the density of the input data. Default is \code{NULL}. +For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{seed} :: \code{integer(1)}\cr +Integer seed to use to initialize the random number generator state. +Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +\item \code{nn_args} :: named \code{list()}\cr +A list containing additional arguments to pass to the nearest neighbor method. +Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}. +} + +Additionally, there are several parameters that may be used to overwrite parameter values for prediction: +\itemize{ +\item \code{search_k_transform} :: \code{integer(1)}\cr +Number of nodes to search during the neighbor retrieval when predicting. +Only used if \code{nn_method} is \code{"annoy"}. If \code{NULL}, \code{search_k} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +\item \code{n_epochs_transform} :: \code{integer(1)}\cr +Number of epochs used during the optimization of the embedded coordinates when predicting. +If \code{NULL}, \code{n_epochs} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +\item \code{init_transform} :: \code{character(1)}\cr +Type of initialization for the coordinates when predicting. May be set to \code{"custom"}, in which case the \code{matrix} of initial +coordinates passed to \code{init_transform_custom} is used. Default is \code{"weighted"}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +\item \code{init_transform_custom} :: \code{matrix}\cr +Matrix of initial coordinates when predicting Only used, if \code{init_transform} is \code{"custom"}. +\item \code{batch_transform} :: \code{logical(1)}\cr +If \code{TRUE}, embedding coordinates are updated at the end of each epoch rather than during the epoch when predicting. +If \code{NULL}, \code{batch} is used instead. Default is \code{FALSE}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +\item \code{learning_rate_transform} :: \code{numeric(1)}\cr +Initial learning rate used in optimization of the coordinates when predicting. +If \code{NULL}, \code{learning_rate} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +\item \code{epoch_callback_transform} :: \code{function}\cr +A function which will be invoked at the end of every epoch when predicting. +Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}. +} +} + +\section{Internals}{ + +Uses the \link[uwot:umap2]{umap2()} function. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +\dontshow{ if (requireNamespace("uwot")) \{ } +library("mlr3") + +task = tsk("iris") +pop = po("umap") + +task$data() +pop$train(list(task))[[1]]$data() + +pop$state +\dontshow{ \} } +} +\references{ +McInnes L, Healy J, Melville J, Grossberger L (2018). +\dQuote{UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction.} +\emph{Journal of Open Source Software}, \bold{3}(29). +\doi{10.21105/joss.00861}. +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index 6d17dfeb3..41e7b8df8 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -147,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, \code{\link{mlr_pipeops_yeojohnson}} diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 2774382f7..75b9414e5 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -162,6 +162,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_vtreat}}, \code{\link{mlr_pipeops_yeojohnson}} diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index 28d5f205a..08d5e7ccf 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -215,6 +215,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_yeojohnson}} diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 89123d332..80c56877a 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, \code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_umap}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}} diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R new file mode 100644 index 000000000..cbcde57d5 --- /dev/null +++ b/tests/testthat/test_pipeop_umap.R @@ -0,0 +1,196 @@ +context("PipeOpUMAP") + +test_that("PipeOpUMAP - basic properties", { + skip_if_not_installed("uwot") + skip_if_not_installed("RcppAnnoy") + skip_if_not_installed("RcppHNSW") + skip_if_not_installed("rnndescent") + + task = mlr_tasks$get("iris")$filter(1:30) + + # Test for different nn_methods since they are relying on different packages and deep clone is implemented differently + expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "annoy")), + deterministic_train = FALSE, deterministic_predict = FALSE, task = task) + expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "hnsw")), + deterministic_train = FALSE, deterministic_predict = FALSE, task = task) + expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "nndescent")), + deterministic_train = FALSE, deterministic_predict = FALSE, task = task) + +}) + +test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params, nn_method = annoy", { + skip_if_not_installed("uwot") + skip_if_not_installed("RcppAnnoy") + task = mlr_tasks$get("iris")$filter(1:30) + + op = PipeOpUMAP$new() + pv = list(seed = 1234L) + op$param_set$set_values(.values = pv) + + train_out = train_pipeop(op, list(task))[[1L]] + umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv) + + state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b", + "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread", + "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models") + expect_true(all(state_names %in% names(op$state))) + state_names_wo_pointers = setdiff(state_names, "nn_index") # since RefClass in state$nn_index$ann will not be equal + expect_identical(op$state[state_names_wo_pointers], umap_out[state_names_wo_pointers]) + expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]])) + + predict_out = predict_pipeop(op, list(task))[[1L]] + umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out) + expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out)) + +}) + + +test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = annoy", { + skip_if_not_installed("uwot") + skip_if_not_installed("RcppAnnoy") + task = mlr_tasks$get("iris")$filter(1:30) + + op = PipeOpUMAP$new() + + # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv = list( + seed = 1234L, + nn_method = "annoy", + n_neighbors = 10L, + metric = "correlation", + n_epochs = 100L, + learning_rate = 0.5, + scale = FALSE, + init = "pca", + init_sdev = 1e-4, + set_op_mix_ratio = 0.5, + local_connectivity = 1.1, + bandwidth = 0.9, + repulsion_strength = 1.1, + negative_sample_rate = 6 + ) + # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv_po = insert_named(pv, list(use_supervised = TRUE, + batch_transform = TRUE, + init_transform = "average", + search_k_transform = 1000L)) + op$param_set$set_values(.values = pv_po) + args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1])) + args_umap_transform = list(init = "average", search_k = 1000L, batch = TRUE) + + train_out = train_pipeop(op, list(task))[[1L]] + umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2) + + state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b", + "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread", + "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models") + expect_true(all(state_names %in% names(op$state))) + state_names = setdiff(state_names, "nn_index") # since RefClass in state$nn_index$ann will not be equal + expect_identical(op$state[state_names], umap_out[state_names]) + expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]])) + + predict_out = predict_pipeop(op, list(task))[[1L]] + umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform) + + expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out)) + +}) + + +test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = hnsw", { + skip_if_not_installed("uwot") + skip_if_not_installed("RcppHNSW") + task = mlr_tasks$get("iris")$filter(1:30) + + op = PipeOpUMAP$new() + + # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv = list( + seed = 1234L, + nn_method = "hnsw", + n_neighbors = 10L, + metric = "correlation", + n_epochs = 100L, + learning_rate = 0.5, + scale = FALSE, + init = "pca", + init_sdev = 1e-4, + set_op_mix_ratio = 0.5, + local_connectivity = 1.1, + bandwidth = 0.9, + repulsion_strength = 1.1, + negative_sample_rate = 6, + nn_args = list(M = 10L, ef_construction = 100L, ef = 20L) + ) + # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average")) + op$param_set$set_values(.values = pv_po) + args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1])) + args_umap_transform = list(init = "average") + + train_out = train_pipeop(op, list(task))[[1L]] + umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2) + + state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b", + "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread", + "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models") + expect_true(all(state_names %in% names(op$state))) + state_names = setdiff(state_names, "nn_index") # since RefClass in state$nn_index$ann will not be equal + expect_identical(op$state[state_names], umap_out[state_names]) + expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]])) + + predict_out = predict_pipeop(op, list(task))[[1L]] + umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform) + expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out)) + +}) + + +test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = rnndescent", { + skip_if_not_installed("uwot") + skip_if_not_installed("rnndescent") + task = mlr_tasks$get("iris")$filter(1:30) + + op = PipeOpUMAP$new() + + # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv = list( + seed = 1234L, + nn_method = "nndescent", + n_neighbors = 10L, + metric = "symmetrickl", + n_epochs = 100L, + learning_rate = 0.5, + scale = FALSE, + init = "pca", + init_sdev = 1e-4, + set_op_mix_ratio = 0.5, + local_connectivity = 1.1, + bandwidth = 0.9, + repulsion_strength = 1.1, + negative_sample_rate = 6, + nn_args = list(n_trees = 15L, max_candidates = 15L, pruning_degree_multiplier = 1.4, epsilon = 0.05) + ) + # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform() + pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average")) + op$param_set$set_values(.values = pv_po) + args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1])) + args_umap_transform = list(init = "average") + + train_out = train_pipeop(op, list(task))[[1L]] + umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2) + + state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b", + "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread", + "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models") + expect_true(all(state_names %in% names(op$state))) + + state_names = setdiff(state_names, "nn_index") # since RefClass in state$nn_index$ann will not be equal + expect_identical(op$state[state_names], umap_out[state_names]) + expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]])) + + predict_out = predict_pipeop(op, list(task))[[1L]] + umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform) + expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out)) + +})