From 003302d0951aaf6d1037a35b44e6c4799ffc6d94 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Sat, 9 Mar 2024 16:50:03 +0100
Subject: [PATCH 1/3] rebase

---
 R-package/R/callbacks.R                       | 98 +++++++++----------
 R-package/R/utils.R                           | 12 +--
 R-package/R/xgb.create.features.R             |  1 -
 R-package/R/xgb.cv.R                          |  4 +-
 R-package/R/xgb.train.R                       | 56 +++++------
 R-package/R/xgboost.R                         |  4 +-
 R-package/demo/basic_walkthrough.R            | 20 ++--
 R-package/demo/boost_from_prediction.R        |  6 +-
 R-package/demo/custom_objective.R             |  6 +-
 R-package/demo/early_stopping.R               |  4 +-
 R-package/demo/generalized_linear_model.R     |  4 +-
 R-package/demo/gpu_accelerated.R              |  6 +-
 R-package/demo/predict_first_ntree.R          |  4 +-
 R-package/demo/predict_leaf_indices.R         |  1 -
 R-package/demo/tweedie_regression.R           |  2 +-
 R-package/man/xgb.Callback.Rd                 | 34 +++----
 R-package/man/xgb.cb.early.stop.Rd            |  6 +-
 R-package/man/xgb.cb.evaluation.log.Rd        |  2 +-
 R-package/man/xgb.create.features.Rd          |  1 -
 R-package/man/xgb.train.Rd                    | 24 ++---
 R-package/tests/testthat/test_basic.R         | 36 +++----
 R-package/tests/testthat/test_callbacks.R     | 34 +++----
 .../tests/testthat/test_custom_objective.R    |  8 +-
 R-package/tests/testthat/test_dmatrix.R       |  6 +-
 .../tests/testthat/test_feature_weights.R     |  2 +-
 R-package/tests/testthat/test_glm.R           | 14 +--
 R-package/tests/testthat/test_ranking.R       |  4 +-
 R-package/tests/testthat/test_update.R        | 24 ++---
 R-package/vignettes/xgboostPresentation.Rmd   | 16 +--
 29 files changed, 218 insertions(+), 221 deletions(-)

diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index d768e1b9ee29..aaf1e2900eaf 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -56,10 +56,10 @@
 #' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
 #' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
 #' \item `bst`: Rhe `xgb.Booster` object for the fold.
-#' \item `watchlist`: A list with two DMatrices, with names `train` and `test`
+#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
 #' (`test` is the held-out data for the fold).
 #' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
-#' from which the `test` entry in the watchlist was obtained.
+#' from which the `test` entry in `evals` was obtained.
 #' }
 #'
 #' This object should \bold{not} be in-place modified in ways that conflict with the
@@ -78,7 +78,7 @@
 #' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
 #' folds can be found in the `model` object.
 #'
-#' \item watchlist The evaluation watchlist, as passed under argument `watchlist` to
+#' \item evals The evaluation data, as passed under argument `evals` to
 #' \link{xgb.train}.
 #'
 #' For \link{xgb.cv}, this will always be `NULL`.
@@ -101,15 +101,15 @@
 #' \item iteration Index of the iteration number that is being executed (first iteration
 #' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
 #'
-#' \item iter_feval Evaluation metrics for the `watchlist` that was supplied, either
+#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
 #' determined by the objective, or by parameter `feval`.
 #'
 #' For \link{xgb.train}, this will be a named vector with one entry per element in
-#' `watchlist`, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
-#' example, if `watchlist` contains an entry named "tr" and the metric is "rmse",
+#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
+#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
 #' this will be a one-element vector with name "tr-rmse".
 #'
-#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(watchlist), nfolds]`,
+#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
 #' where the row names will follow the same naming logic as the one-dimensional vector
 #' that is passed in \link{xgb.train}.
 #'
@@ -169,18 +169,18 @@
 #' }
 #' @examples
 #' # Example constructing a custom callback that calculates
-#' # squared error on the training data, without a watchlist,
+#' # squared error on the training data (no separate test set),
 #' # and outputs the per-iteration results.
 #' ssq_callback <- xgb.Callback(
 #'   cb_name = "ssq",
-#'   f_before_training = function(env, model, data, watchlist,
+#'   f_before_training = function(env, model, data, evals,
 #'                                begin_iteration, end_iteration) {
 #'     # A vector to keep track of a number at each iteration
 #'     env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
 #'   },
-#'   f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+#'   f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
 #'     # This calculates the sum of squared errors on the training data.
-#'     # Note that this can be better done by passing a 'watchlist' entry,
+#'     # Note that this can be better done by passing an 'evals' entry,
 #'     # but this demonstrates a way in which callbacks can be structured.
 #'     pred <- predict(model, data)
 #'     err <- pred - getinfo(data, "label")
@@ -196,7 +196,7 @@
 #'     # A return value of 'TRUE' here would signal to finalize the training
 #'     return(FALSE)
 #'   },
-#'   f_after_training = function(env, model, data, watchlist, iteration,
+#'   f_after_training = function(env, model, data, evals, iteration,
 #'                               final_feval, prev_cb_res) {
 #'     return(env$logs)
 #'   }
@@ -220,10 +220,10 @@
 xgb.Callback <- function(
   cb_name = "custom_callback",
   env = new.env(),
-  f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) NULL,
-  f_before_iter = function(env, model, data, watchlist, iteration) NULL,
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
-  f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) NULL
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+  f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL
 ) {
   stopifnot(is.null(f_before_training) || is.function(f_before_training))
   stopifnot(is.null(f_before_iter) || is.function(f_before_iter))
@@ -251,7 +251,7 @@ xgb.Callback <- function(
   callbacks,
   model,
   data,
-  watchlist,
+  evals,
   begin_iteration,
   end_iteration
 ) {
@@ -261,7 +261,7 @@ xgb.Callback <- function(
         callback$env,
         model,
         data,
-        watchlist,
+        evals,
         begin_iteration,
         end_iteration
       )
@@ -273,7 +273,7 @@ xgb.Callback <- function(
   callbacks,
   model,
   data,
-  watchlist,
+  evals,
   iteration
 ) {
   if (!length(callbacks)) {
@@ -304,7 +304,7 @@ xgb.Callback <- function(
   callbacks,
   model,
   data,
-  watchlist,
+  evals,
   iteration,
   iter_feval
 ) {
@@ -319,7 +319,7 @@ xgb.Callback <- function(
       cb$env,
       model,
       data,
-      watchlist,
+      evals,
       iteration,
       iter_feval
     )
@@ -337,7 +337,7 @@ xgb.Callback <- function(
   callbacks,
   model,
   data,
-  watchlist,
+  evals,
   iteration,
   final_feval,
   prev_cb_res
@@ -355,7 +355,7 @@ xgb.Callback <- function(
           cb$env,
           model,
           data,
-          watchlist,
+          evals,
           iteration,
           final_feval,
           getElement(old_cb_res, cb$cb_name)
@@ -428,7 +428,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
     env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),
     f_before_training = NULL,
     f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
       if (is.null(iter_feval)) {
         return(FALSE)
       }
@@ -439,7 +439,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
       env$is_first_call <- FALSE
       return(FALSE)
     },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       if (is.null(final_feval)) {
         return(NULL)
       }
@@ -453,7 +453,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
 #' @title Callback for logging the evaluation history
 #' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
 #' @details This callback creates a table with per-iteration evaluation metrics (see parameters
-#' `watchlist` and `feval` in \link{xgb.train}).
+#' `evals` and `feval` in \link{xgb.train}).
 #' @details
 #' Note: in the column names of the final data.table, the dash '-' character is replaced with
 #' the underscore '_' in order to make the column names more like regular R identifiers.
@@ -462,18 +462,18 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
 xgb.cb.evaluation.log <- function() {
   xgb.Callback(
     cb_name = "evaluation_log",
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
       env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1)
       env$next_log <- 1
     },
     f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
       tmp <- .summarize.feval(iter_feval, TRUE)
       env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)
       env$next_log <- env$next_log + 1
       return(FALSE)
     },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       if (!NROW(env$evaluation_log)) {
         return(prev_cb_res)
       }
@@ -543,7 +543,7 @@ xgb.cb.reset.parameters <- function(new_params) {
   xgb.Callback(
     cb_name = "reset_parameters",
     env = as.environment(list(new_params = new_params)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
       env$end_iteration <- end_iteration
 
       pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE)
@@ -560,7 +560,7 @@ xgb.cb.reset.parameters <- function(new_params) {
         }
       }
     },
-    f_before_iter = function(env, model, data, watchlist, iteration) {
+    f_before_iter = function(env, model, data, evals, iteration) {
       pars <- lapply(env$new_params, function(p) {
         if (is.function(p)) {
           return(p(iteration, env$end_iteration))
@@ -589,9 +589,9 @@ xgb.cb.reset.parameters <- function(new_params) {
 #' @param maximize Whether to maximize the evaluation metric.
 #' @param metric_name The name of an evaluation column to use as a criteria for early
 #'        stopping. If not set, the last column would be used.
-#'        Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+#'        Let's say the test data in \code{evals} was labelled as \code{dtest},
 #'        and one wants to use the AUC in test data for early stopping regardless of where
-#'        it is in the \code{watchlist}, then one of the following would need to be set:
+#'        it is in the \code{evals}, then one of the following would need to be set:
 #'        \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
 #'        All dash '-' characters in metric names are considered equivalent to '_'.
 #' @param verbose Whether to print the early stopping information.
@@ -615,7 +615,7 @@ xgb.cb.reset.parameters <- function(new_params) {
 #' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
 #' through \link{xgb.attr} or \link{xgb.attributes}.
 #'
-#' At least one data element is required in the evaluation watchlist for early stopping to work.
+#' At least one dataset is required in `evals` for early stopping to work.
 #' @export
 xgb.cb.early.stop <- function(
   stopping_rounds,
@@ -642,15 +642,15 @@ xgb.cb.early.stop <- function(
         stopped_by_max_rounds = FALSE
       )
     ),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
-      if (inherits(model, "xgb.Booster") && !length(watchlist)) {
-        stop("For early stopping, watchlist must have at least one element")
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+      if (inherits(model, "xgb.Booster") && !length(evals)) {
+        stop("For early stopping, 'evals' must have at least one element")
       }
       env$begin_iteration <- begin_iteration
       return(NULL)
     },
-    f_before_iter = function(env, model, data, watchlist, iteration) NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_before_iter = function(env, model, data, evals, iteration) NULL,
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
       sds <- NULL
       if (NCOL(iter_feval) > 1) {
         tmp <- .summarize.feval(iter_feval, TRUE)
@@ -729,7 +729,7 @@ xgb.cb.early.stop <- function(
       }
       return(FALSE)
     },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) {
         # Note: it loses the attributes after being sliced,
         # so they have to be re-assigned afterwards.
@@ -798,18 +798,18 @@ xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
   xgb.Callback(
     cb_name = "save_model",
     env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
       env$begin_iteration <- begin_iteration
     },
     f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
       if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {
         .save.model.w.formatted.name(model, env$save_name, iteration)
         env$last_save <- iteration
       }
       return(FALSE)
     },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       if (env$save_period == 0 && iteration > env$last_save) {
         .save.model.w.formatted.name(model, env$save_name, iteration)
       }
@@ -840,19 +840,19 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
   xgb.Callback(
     cb_name = "cv_predict",
     env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
       if (inherits(model, "xgb.Booster")) {
         stop("'cv.predict' callback is only for 'xgb.cv'.")
       }
     },
     f_before_iter = NULL,
     f_after_iter = NULL,
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       pred <- NULL
       for (fd in model) {
         pr <- predict(
           fd$bst,
-          fd$watchlist[[2L]],
+          fd$evals[[2L]],
           outputmargin = env$outputmargin,
           reshape = TRUE
         )
@@ -1002,7 +1002,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
   xgb.Callback(
     cb_name = "gblinear_history",
     env = as.environment(list(sparse = sparse)),
-    f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
+    f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
       if (!inherits(model, "xgb.Booster")) {
         model <- model[[1L]]$bst
       }
@@ -1013,7 +1013,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
       env$next_idx <- 1
     },
     f_before_iter = NULL,
-    f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+    f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
       if (inherits(model, "xgb.Booster")) {
         coef_this <- .extract.coef(model, env$sparse)
       } else {
@@ -1023,7 +1023,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
       env$next_idx <- env$next_idx + 1
       return(FALSE)
     },
-    f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
+    f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
       # in case of early stopping
       if (env$next_idx <= length(env$coef_hist)) {
         env$coef_hist <- head(env$coef_hist, env$next_idx - 1)
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 723310ee42e3..08afab889da9 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -193,20 +193,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
 # Evaluate one iteration.
 # Returns a named vector of evaluation metrics
 # with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+xgb.iter.eval <- function(bst, evals, iter, feval) {
   handle <- xgb.get.handle(bst)
 
-  if (length(watchlist) == 0)
+  if (length(evals) == 0)
     return(NULL)
 
-  evnames <- names(watchlist)
+  evnames <- names(evals)
   if (is.null(feval)) {
-    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
+    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
     mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
     res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
   } else {
-    res <- sapply(seq_along(watchlist), function(j) {
-      w <- watchlist[[j]]
+    res <- sapply(seq_along(evals), function(j) {
+      w <- evals[[j]]
       ## predict using all trees
       preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
       eval_res <- feval(preds, w)
diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R
index baef3bb03e28..27f8a0975ae7 100644
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -71,7 +71,6 @@
 #' new.dtest <- xgb.DMatrix(
 #'   data = new.features.test, label = agaricus.test$label, nthread = 2
 #' )
-#' watchlist <- list(train = new.dtrain)
 #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #'
 #' # Model accuracy with new features
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 23ca0f2ded98..1cafd7be75c6 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -215,7 +215,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
       modelfile = NULL
     )
     bst <- bst$bst
-    list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+    list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
   })
 
   # extract parameters that can affect the relationship b/w #trees and #iterations
@@ -254,7 +254,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
       )
       xgb.iter.eval(
         bst = fd$bst,
-        watchlist = fd$watchlist,
+        evals = fd$evals,
         iter = iteration - 1,
         feval = feval
       )
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 34c21d5520ed..4cea088e0e45 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -114,13 +114,13 @@
 #' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
 #'        \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
 #' @param nrounds max number of boosting iterations.
-#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
 #'        Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 #'        of these datasets during each boosting iteration, and stored in the end as a field named
 #'        \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
 #'        \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 #'        printed out during the training.
-#'        E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#'        E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 #'        the performance of each round's model on mat1 and mat2.
 #' @param obj customized objective function. Returns gradient and second order
 #'        gradient with given prediction and dtrain.
@@ -171,7 +171,7 @@
 #' @details
 #' These are the training functions for \code{xgboost}.
 #'
-#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+#' The \code{xgb.train} interface supports advanced features such as \code{evals},
 #' customized objective and evaluation metric functions, therefore it is more flexible
 #' than the \code{xgboost} interface.
 #'
@@ -209,7 +209,7 @@
 #' \itemize{
 #'   \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 #'         and the \code{print_every_n} parameter is passed to it.
-#'   \item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
+#'   \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
 #'   \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
 #'   \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 #' }
@@ -254,12 +254,12 @@
 #' dtest <- with(
 #'   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 #' )
-#' watchlist <- list(train = dtrain, eval = dtest)
+#' evals <- list(train = dtrain, eval = dtest)
 #'
 #' ## A simple xgb.train example:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' ## An xgb.train example where custom objective and evaluation metric are
 #' ## used:
@@ -280,15 +280,15 @@
 #' #  as 'objective' and 'eval_metric' parameters in the params list:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 #'
 #' #  or through the ... arguments:
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  objective = logregobj, eval_metric = evalerror)
 #'
 #' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
 #'                  obj = logregobj, feval = evalerror)
 #'
 #'
@@ -296,11 +296,11 @@
 #' param <- list(max_depth = 2, eta = 1, nthread = nthread,
 #'               objective = "binary:logistic", eval_metric = "auc")
 #' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
 #'                  callbacks = list(xgb.cb.reset.parameters(my_etas)))
 #'
 #' ## Early stopping:
-#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
 #'                  early_stopping_rounds = 3)
 #'
 #' ## An 'xgboost' interface example:
@@ -311,7 +311,7 @@
 #'
 #' @rdname xgb.train
 #' @export
-xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params = list(), data, nrounds, evals = list(),
                       obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
                       early_stopping_rounds = NULL, maximize = NULL,
                       save_period = NULL, save_name = "xgboost.model",
@@ -324,17 +324,17 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   check.custom.obj()
   check.custom.eval()
 
-  # data & watchlist checks
+  # data & evals checks
   dtrain <- data
   if (!inherits(dtrain, "xgb.DMatrix"))
     stop("second argument dtrain must be xgb.DMatrix")
-  if (length(watchlist) > 0) {
-    if (typeof(watchlist) != "list" ||
-        !all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
-      stop("watchlist must be a list of xgb.DMatrix elements")
-    evnames <- names(watchlist)
+  if (length(evals) > 0) {
+    if (typeof(evals) != "list" ||
+        !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
+      stop("'evals' must be a list of xgb.DMatrix elements")
+    evnames <- names(evals)
     if (is.null(evnames) || any(evnames == ""))
-      stop("each element of the watchlist must have a name tag")
+      stop("each element of 'evals' must have a name tag")
   }
   # Handle multiple evaluation metrics given as a list
   for (m in params$eval_metric) {
@@ -370,8 +370,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   if (verbose && !("print_evaluation" %in% cb_names)) {
     callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
   }
-  # evaluation log callback:  it is automatically enabled when watchlist is provided
-  if (length(watchlist) && !("evaluation_log" %in% cb_names)) {
+  # evaluation log callback:  it is automatically enabled when 'evals' is provided
+  if (length(evals) && !("evaluation_log" %in% cb_names)) {
     callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
   }
   # Model saving callback
@@ -385,7 +385,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   # Construct a booster (either a new one or load from xgb_model)
   bst <- xgb.Booster(
     params = params,
-    cachelist = append(watchlist, dtrain),
+    cachelist = append(evals, dtrain),
     modelfile = xgb_model
   )
   niter_init <- bst$niter
@@ -407,7 +407,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
     callbacks,
     bst,
     dtrain,
-    watchlist,
+    evals,
     begin_iteration,
     end_iteration
   )
@@ -419,7 +419,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
       callbacks,
       bst,
       dtrain,
-      watchlist,
+      evals,
       iteration
     )
 
@@ -431,10 +431,10 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
     )
 
     bst_evaluation <- NULL
-    if (length(watchlist) > 0) {
+    if (length(evals) > 0) {
       bst_evaluation <- xgb.iter.eval(
         bst = bst,
-        watchlist = watchlist,
+        evals = evals,
         iter = iteration - 1,
         feval = feval
       )
@@ -444,7 +444,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
       callbacks,
       bst,
       dtrain,
-      watchlist,
+      evals,
       iteration,
       bst_evaluation
     )
@@ -456,7 +456,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
     callbacks,
     bst,
     dtrain,
-    watchlist,
+    evals,
     iteration,
     bst_evaluation
   )
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index 7fecec39cec7..a1d37358162c 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
     nthread = merged$nthread
   )
 
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
 
-  bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
+  bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
                    early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                    save_period = save_period, save_name = save_name,
                    xgb_model = xgb_model, callbacks = callbacks, ...)
diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R
index 3dbbe0586f44..9403bac2064c 100644
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -74,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
 # to use advanced features, we need to put data in xgb.DMatrix
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
 dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using watchlist----------------
-# watchlist is a list of xgb.DMatrix, each of them is tagged with name
-watchlist <- list(train = dtrain, test = dtest)
-# to train with watchlist, use xgb.train, which contains more advanced features
-# watchlist allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with watchlist")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+#---------------Using an evaluation set----------------
+# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
+evals <- list(train = dtrain, test = dtest)
+# to train with an evaluation set, use xgb.train, which contains more advanced features
+# 'evals' argument allows us to monitor the evaluation result on all data in the list
+print("Train xgboost using xgb.train with evaluation data")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                  nthread = 2, objective = "binary:logistic")
 # we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with watchlist, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+print("train xgboost using xgb.train with evaluation data, watch logloss and error")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                  eval_metric = "error", eval_metric = "logloss",
                  nthread = 2, objective = "binary:logistic")
 
@@ -92,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
 xgb.DMatrix.save(dtrain, "dtrain.buffer")
 # to load it in, simply call xgb.DMatrix
 dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
                  nthread = 2, objective = "binary:logistic")
 # information can be extracted from xgb.DMatrix using getinfo
 label <- getinfo(dtest, "label")
diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R
index 1a3d55369d2f..75af70dba0d7 100644
--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
@@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 ###
 # advanced: start from a initial base prediction
 #
 print('start running example to start from a initial prediction')
 # train xgboost for 1 round
 param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, watchlist)
+bst <- xgb.train(param, dtrain, 1, evals)
 # Note: we need the margin value instead of transformed prediction in set_base_margin
 # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
 ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
 setinfo(dtest, "base_margin", ptest)
 
 print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
+bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R
index 35201332c5f6..03d7b346471b 100644
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: for customized objective function, we leave objective as default
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
 
 # user define objective function, given prediction, return gradient and second order gradient
@@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 
 #
 # there can be cases where you want additional information
@@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
 print('start training with user customized objective, with additional attributes in DMatrix')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
index 04da1382f031..057440882567 100644
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # note: what we are getting is margin value in prediction
 # you must know what you are doing
 param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-watchlist <- list(eval = dtest)
+evals <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
 # this is log likelihood loss
@@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
 }
 print('start training with early Stopping setting')
 
-bst <- xgb.train(param, dtrain, num_round, watchlist,
+bst <- xgb.train(param, dtrain, num_round, evals,
                  objective = logregobj, eval_metric = evalerror, maximize = FALSE,
                  early_stopping_round = 3)
 bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
diff --git a/R-package/demo/generalized_linear_model.R b/R-package/demo/generalized_linear_model.R
index c24fe72cbcad..d29a6dc5be58 100644
--- a/R-package/demo/generalized_linear_model.R
+++ b/R-package/demo/generalized_linear_model.R
@@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
 ##
 # the rest of settings are the same
 ##
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
 ypred <- predict(bst, dtest)
 labels <- getinfo(dtest, 'label')
 cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
diff --git a/R-package/demo/gpu_accelerated.R b/R-package/demo/gpu_accelerated.R
index 14ed9392b7d1..617a63e74542 100644
--- a/R-package/demo/gpu_accelerated.R
+++ b/R-package/demo/gpu_accelerated.R
@@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
 tr <- sample.int(N, N * 0.75)
 dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
 dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-wl <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
 
 # An example of running 'gpu_hist' algorithm
 # which is
@@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
 param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
               max_bin = 64, tree_method = 'gpu_hist')
 pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
 
 # Compare to the 'hist' algorithm:
 param$tree_method <- 'hist'
 pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
 proc.time() - pt
diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R
index 179c18c707f4..ba15ab39a74f 100644
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 
 param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 nrounds <- 2
 
 # training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
 cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')
 
diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R
index 21b6fa71d0b7..a57baf668896 100644
--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
 # learning with new features
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 
 # Model accuracy with new features
diff --git a/R-package/demo/tweedie_regression.R b/R-package/demo/tweedie_regression.R
index dfaf6a2ae2ce..b07858e761fa 100644
--- a/R-package/demo/tweedie_regression.R
+++ b/R-package/demo/tweedie_regression.R
@@ -39,7 +39,7 @@ bst <- xgb.train(
   data = d_train,
   params = params,
   maximize = FALSE,
-  watchlist = list(train = d_train),
+  evals = list(train = d_train),
   nrounds = 20)
 
 var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
diff --git a/R-package/man/xgb.Callback.Rd b/R-package/man/xgb.Callback.Rd
index ed1dd7bed733..b4edcd97842e 100644
--- a/R-package/man/xgb.Callback.Rd
+++ b/R-package/man/xgb.Callback.Rd
@@ -7,11 +7,11 @@
 xgb.Callback(
   cb_name = "custom_callback",
   env = new.env(),
-  f_before_training = function(env, model, data, watchlist, begin_iteration,
-    end_iteration) NULL,
-  f_before_iter = function(env, model, data, watchlist, iteration) NULL,
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
-  f_after_training = function(env, model, data, watchlist, iteration, final_feval,
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
+    NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+  f_after_training = function(env, model, data, evals, iteration, final_feval,
     prev_cb_res) NULL
 )
 }
@@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
 For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
 \item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
 \item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
-\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
 (\code{test} is the held-out data for the fold).
 \item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
-from which the \code{test} entry in the watchlist was obtained.
+from which the \code{test} entry in \code{evals} was obtained.
 }
 
 This object should \bold{not} be in-place modified in ways that conflict with the
@@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
 Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
 folds can be found in the \code{model} object.
 
-\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
+\item evals The evaluation data, as passed under argument \code{evals} to
 \link{xgb.train}.
 
 For \link{xgb.cv}, this will always be \code{NULL}.
@@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
 \item iteration Index of the iteration number that is being executed (first iteration
 will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
 
-\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
 determined by the objective, or by parameter \code{feval}.
 
 For \link{xgb.train}, this will be a named vector with one entry per element in
-\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
-example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
 this will be a one-element vector with name "tr-rmse".
 
-For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
 where the row names will follow the same naming logic as the one-dimensional vector
 that is passed in \link{xgb.train}.
 
@@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
 }
 \examples{
 # Example constructing a custom callback that calculates
-# squared error on the training data, without a watchlist,
+# squared error on the training data (no separate test set),
 # and outputs the per-iteration results.
 ssq_callback <- xgb.Callback(
   cb_name = "ssq",
-  f_before_training = function(env, model, data, watchlist,
+  f_before_training = function(env, model, data, evals,
                                begin_iteration, end_iteration) {
     # A vector to keep track of a number at each iteration
     env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
   },
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
     # This calculates the sum of squared errors on the training data.
-    # Note that this can be better done by passing a 'watchlist' entry,
+    # Note that this can be better done by passing an 'evals' entry,
     # but this demonstrates a way in which callbacks can be structured.
     pred <- predict(model, data)
     err <- pred - getinfo(data, "label")
@@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
     # A return value of 'TRUE' here would signal to finalize the training
     return(FALSE)
   },
-  f_after_training = function(env, model, data, watchlist, iteration,
+  f_after_training = function(env, model, data, evals, iteration,
                               final_feval, prev_cb_res) {
     return(env$logs)
   }
diff --git a/R-package/man/xgb.cb.early.stop.Rd b/R-package/man/xgb.cb.early.stop.Rd
index 26d2f1aa3354..2a70f4943d92 100644
--- a/R-package/man/xgb.cb.early.stop.Rd
+++ b/R-package/man/xgb.cb.early.stop.Rd
@@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}
 
 \item{metric_name}{The name of an evaluation column to use as a criteria for early
 stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+Let's say the test data in \code{evals} was labelled as \code{dtest},
 and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
+it is in the \code{evals}, then one of the following would need to be set:
 \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
 All dash '-' characters in metric names are considered equivalent to '_'.}
 
@@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
 base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
 through \link{xgb.attr} or \link{xgb.attributes}.
 
-At least one data element is required in the evaluation watchlist for early stopping to work.
+At least one dataset is required in \code{evals} for early stopping to work.
 }
diff --git a/R-package/man/xgb.cb.evaluation.log.Rd b/R-package/man/xgb.cb.evaluation.log.Rd
index 1dab64647757..4cc6ef636c66 100644
--- a/R-package/man/xgb.cb.evaluation.log.Rd
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@@ -14,7 +14,7 @@ Callback for logging the evaluation history
 }
 \details{
 This callback creates a table with per-iteration evaluation metrics (see parameters
-\code{watchlist} and \code{feval} in \link{xgb.train}).
+\code{evals} and \code{feval} in \link{xgb.train}).
 
 Note: in the column names of the final data.table, the dash '-' character is replaced with
 the underscore '_' in order to make the column names more like regular R identifiers.
diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd
index 68b5619970f9..995c27459a5e 100644
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
 new.dtest <- xgb.DMatrix(
   data = new.features.test, label = agaricus.test$label, nthread = 2
 )
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 
 # Model accuracy with new features
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 45c78ae130ff..21c8dbe16413 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -9,7 +9,7 @@ xgb.train(
   params = list(),
   data,
   nrounds,
-  watchlist = list(),
+  evals = list(),
   obj = NULL,
   feval = NULL,
   verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}
 
 \item{nrounds}{max number of boosting iterations.}
 
-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
 Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 of these datasets during each boosting iteration, and stored in the end as a field named
 \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
 \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}
 
 \item{obj}{customized objective function. Returns gradient and second order
@@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 \details{
 These are the training functions for \code{xgboost}.
 
-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
 customized objective and evaluation metric functions, therefore it is more flexible
 than the \code{xgboost} interface.
 
@@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
 \itemize{
 \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 and the \code{print_every_n} parameter is passed to it.
-\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
 \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
 \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 }
@@ -307,12 +307,12 @@ dtrain <- with(
 dtest <- with(
   agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 )
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)
 
 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 
 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
 #  as 'objective' and 'eval_metric' parameters in the params list:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
 
 #  or through the ... arguments:
 param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                  objective = logregobj, eval_metric = evalerror)
 
 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
                  obj = logregobj, feval = evalerror)
 
 
@@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
               objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                  callbacks = list(xgb.cb.reset.parameters(my_etas)))
 
 ## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                  early_stopping_rounds = 3)
 
 ## An 'xgboost' interface example:
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index ee0f4c7ba85d..18a3b99e693c 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
       data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = nrounds,
       objective = "binary:logistic", eval_metric = "error",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error"
   )
@@ -152,7 +152,7 @@ test_that("train and predict softprob", {
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
       max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
       objective = "multi:softprob", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
     ),
     "train-merror"
   )
@@ -203,7 +203,7 @@ test_that("train and predict softmax", {
       data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
       max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
       objective = "multi:softmax", num_class = 3, eval_metric = "merror",
-      watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+      evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
     ),
     "train-merror"
   )
@@ -226,7 +226,7 @@ test_that("train and predict RF", {
     nthread = n_threads,
     nrounds = 1, objective = "binary:logistic", eval_metric = "error",
     num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
-    watchlist = list(train = xgb.DMatrix(train$data, label = lb))
+    evals = list(train = xgb.DMatrix(train$data, label = lb))
   )
   expect_equal(xgb.get.num.boosted.rounds(bst), 1)
 
@@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
     objective = "multi:softprob", eval_metric = "merror",
     num_class = 3, verbose = 0,
     num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
-    watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+    evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
   )
   expect_equal(xgb.get.num.boosted.rounds(bst), 15)
   # predict for all iterations:
@@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
       data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
       eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error.*train-auc.*train-logloss"
   )
@@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
       data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
       eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
       eval_metric = list("error", "auc", "logloss"),
-      watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+      evals = list(train = xgb.DMatrix(train$data, label = train$label))
     ),
     "train-error.*train-auc.*train-logloss"
   )
@@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {
 
 test_that("training continuation works", {
   dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
   param <- list(
     objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
   )
 
   # for the reference, use 4 iterations at once:
   set.seed(11)
-  bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
   # first two iterations:
   set.seed(11)
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
   # continue for two more:
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
   if (!windows_flag && !solaris_flag) {
     expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
   }
@@ -315,7 +315,7 @@ test_that("training continuation works", {
   expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
   expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
   # test continuing from raw model data
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
   if (!windows_flag && !solaris_flag) {
     expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
   }
@@ -323,7 +323,7 @@ test_that("training continuation works", {
   # test continuing from a model in file
   fname <- file.path(tempdir(), "xgboost.json")
   xgb.save(bst1, fname)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
   if (!windows_flag && !solaris_flag) {
     expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
   }
@@ -417,7 +417,7 @@ test_that("max_delta_step works", {
   dtrain <- xgb.DMatrix(
     agaricus.train$data, label = agaricus.train$label, nthread = n_threads
   )
-  watchlist <- list(train = dtrain)
+  evals <- list(train = dtrain)
   param <- list(
     objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
     nthread = n_threads,
@@ -425,9 +425,9 @@ test_that("max_delta_step works", {
   )
   nrounds <- 5
   # model with no restriction on max_delta_step
-  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
+  bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
   # model with restricted max_delta_step
-  bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
+  bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
   # the no-restriction model is expected to have consistently lower loss during the initial iterations
   expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
   expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@@ -444,7 +444,7 @@ test_that("colsample_bytree works", {
   colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
   dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
   dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
-  watchlist <- list(train = dtrain, eval = dtest)
+  evals <- list(train = dtrain, eval = dtest)
   ## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
   ## each tree
   param <- list(
@@ -453,7 +453,7 @@ test_that("colsample_bytree works", {
     eval_metric = "auc"
   )
   set.seed(2)
-  bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
+  bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
   xgb.importance(model = bst)
   # If colsample_bytree works properly, a variety of features should be used
   # in the 100 trees
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index a0b4910cc56f..913791de4d96 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
 dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
 dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
 
 
 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@@ -39,7 +39,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
         nthread = n_threads
       ),
       nrounds = 10,
-      watchlist = list(train = dtrain, test = dtest),
+      evals = list(train = dtrain, test = dtest),
       callbacks = list(xgb.cb.print.evaluation(period = 1))
     )
   })
@@ -57,7 +57,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
         nthread = n_threads
       ),
       nrounds = 10,
-      watchlist = list(train = dtrain, test = dtest),
+      evals = list(train = dtrain, test = dtest),
       callbacks = list(xgb.cb.print.evaluation(period = 2))
     )
   })
@@ -117,7 +117,7 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
     ),
     nrounds = 10,
     verbose = FALSE,
-    watchlist = list(train = dtrain, test = dtest),
+    evals = list(train = dtrain, test = dtest),
     callbacks = list(xgb.cb.evaluation.log())
   )
   logs <- attributes(model)$evaluation_log
@@ -155,7 +155,7 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
 
 test_that("can store evaluation_log without printing", {
   expect_silent(
-    bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+    bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
   )
   expect_false(is.null(attributes(bst)$evaluation_log))
   expect_false(is.null(attributes(bst)$evaluation_log$train_error))
@@ -166,14 +166,14 @@ test_that("xgb.cb.reset.parameters works as expected", {
 
   # fixed eta
   set.seed(111)
-  bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
+  bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
   expect_false(is.null(attributes(bst0)$evaluation_log))
   expect_false(is.null(attributes(bst0)$evaluation_log$train_error))
 
   # same eta but re-set as a vector parameter in the callback
   set.seed(111)
   my_par <- list(eta = c(0.9, 0.9))
-  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                     callbacks = list(xgb.cb.reset.parameters(my_par)))
   expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
   expect_equal(attributes(bst0)$evaluation_log$train_error,
@@ -182,7 +182,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
   # same eta but re-set via a function in the callback
   set.seed(111)
   my_par <- list(eta = function(itr, itr_end) 0.9)
-  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                     callbacks = list(xgb.cb.reset.parameters(my_par)))
   expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
   expect_equal(attributes(bst0)$evaluation_log$train_error,
@@ -191,7 +191,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
   # different eta re-set as a vector parameter in the callback
   set.seed(111)
   my_par <- list(eta = c(0.6, 0.5))
-  bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                     callbacks = list(xgb.cb.reset.parameters(my_par)))
   expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
   expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))
@@ -199,7 +199,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
   # resetting multiple parameters at the same time runs with no error
   my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
   expect_error(
-    bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+    bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                       callbacks = list(xgb.cb.reset.parameters(my_par)))
   , NA) # NA = no error
   # CV works as well
@@ -210,7 +210,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
 
   # expect no learning with 0 learning rate
   my_par <- list(eta = c(0., 0.))
-  bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+  bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                     callbacks = list(xgb.cb.reset.parameters(my_par)))
   expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
   er <- unique(attributes(bstX)$evaluation_log$train_error)
@@ -223,7 +223,7 @@ test_that("xgb.cb.save.model works as expected", {
   files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
   for (f in files) if (file.exists(f)) file.remove(f)
 
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                    save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
   expect_true(file.exists(files[1]))
   expect_true(file.exists(files[2]))
@@ -239,7 +239,7 @@ test_that("xgb.cb.save.model works as expected", {
   expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
 
   # save_period = 0 saves the last iteration's model
-  bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+  bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
                    save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
   expect_true(file.exists(files[3]))
   b2 <- xgb.load(files[3])
@@ -252,7 +252,7 @@ test_that("xgb.cb.save.model works as expected", {
 test_that("early stopping xgb.train works", {
   set.seed(11)
   expect_output(
-    bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                      early_stopping_rounds = 3, maximize = FALSE)
   , "Stopping. Best iteration")
   expect_false(is.null(xgb.attr(bst, "best_iteration")))
@@ -266,7 +266,7 @@ test_that("early stopping xgb.train works", {
 
   set.seed(11)
   expect_silent(
-    bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+    bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
                       early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
   )
   expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@@ -282,7 +282,7 @@ test_that("early stopping xgb.train works", {
 test_that("early stopping using a specific metric works", {
   set.seed(11)
   expect_output(
-    bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
+    bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
                      eval_metric = "logloss", eval_metric = "auc",
                      callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
                                                         metric_name = 'test_logloss')))
@@ -315,7 +315,7 @@ test_that("early stopping works with titanic", {
     nrounds = 100,
     early_stopping_rounds = 3,
     nthread = n_threads,
-    watchlist = list(train = xgb.DMatrix(dtx, label = dty))
+    evals = list(train = xgb.DMatrix(dtx, label = dty))
   )
 
   expect_true(TRUE)  # should not crash
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index c6503124682d..d3050b152aa0 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
 dtest <- xgb.DMatrix(
   agaricus.test$data, label = agaricus.test$label, nthread = n_threads
 )
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
 
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
@@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
 num_round <- 2
 
 test_that("custom objective works", {
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
   expect_equal(class(bst), "xgb.Booster")
   expect_false(is.null(attributes(bst)$evaluation_log))
   expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
 })
 
 test_that("custom objective with early stop works", {
-  bst <- xgb.train(param, dtrain, 10, watchlist)
+  bst <- xgb.train(param, dtrain, 10, evals)
   expect_equal(class(bst), "xgb.Booster")
   train_log <- attributes(bst)$evaluation_log$train_error
   expect_true(all(diff(train_log) <= 0))
@@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
     return(list(grad = grad, hess = hess))
   }
   param$objective <- logregobjattr
-  bst <- xgb.train(param, dtrain, num_round, watchlist)
+  bst <- xgb.train(param, dtrain, num_round, evals)
   expect_equal(class(bst), "xgb.Booster")
 })
 
diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R
index 0612406444ae..44d1566c640e 100644
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {
 
   params <- list(tree_method = "hist", nthread = n_threads)
   bst_fd <- xgb.train(
-    params, nrounds = 8, fd, watchlist = list(train = fd)
+    params, nrounds = 8, fd, evals = list(train = fd)
   )
   bst_dgr <- xgb.train(
-    params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
+    params, nrounds = 8, fdgr, evals = list(train = fdgr)
   )
   bst_dgc <- xgb.train(
-    params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
+    params, nrounds = 8, fdgc, evals = list(train = fdgc)
   )
 
   raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")
diff --git a/R-package/tests/testthat/test_feature_weights.R b/R-package/tests/testthat/test_feature_weights.R
index 54fec67cfcf5..4ed78c9b6cfe 100644
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -25,7 +25,7 @@ test_that("training with feature weights works", {
     expect_lt(importance[1, Frequency], importance[9, Frequency])
   }
 
-  for (tm in c("hist", "approx")) {
+  for (tm in c("hist", "approx", "exact")) {
     test(tm)
   }
 })
diff --git a/R-package/tests/testthat/test_glm.R b/R-package/tests/testthat/test_glm.R
index c089b4fe0240..b59de8b62f15 100644
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -14,19 +14,19 @@ test_that("gblinear works", {
 
   param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
                 nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
-  watchlist <- list(eval = dtest, train = dtrain)
+  evals <- list(eval = dtest, train = dtrain)
 
   n <- 5         # iterations
   ERR_UL <- 0.005 # upper limit for the test set error
   VERB <- 0      # chatterbox switch
 
   param$updater <- 'shotgun'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
   ypred <- predict(bst, dtest)
   expect_equal(length(getinfo(dtest, 'label')), 1611)
   expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
 
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
                    callbacks = list(xgb.cb.gblinear.history()))
   expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
   h <- xgb.gblinear.history(bst)
@@ -34,16 +34,16 @@ test_that("gblinear works", {
   expect_is(h, "matrix")
 
   param$updater <- 'coord_descent'
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
   expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
 
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
   expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
 
-  bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
+  bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
   expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)
 
-  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
+  bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
                    top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
   expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
   h <- xgb.gblinear.history(bst)
diff --git a/R-package/tests/testthat/test_ranking.R b/R-package/tests/testthat/test_ranking.R
index e49a32025e0f..0e7db42da0b2 100644
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {
 
   params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
                  eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
   # Check if the metric is monotone increasing
   expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
   expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
     eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
     eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
   )
-  bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+  bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
   # Check if the metric is monotone increasing
   expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
   expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
diff --git a/R-package/tests/testthat/test_update.R b/R-package/tests/testthat/test_update.R
index 3c88178e08d3..7fdc6eb84bb3 100644
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
 win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
 
 test_that("updating the model works", {
-  watchlist <- list(train = dtrain, test = dtest)
+  evals <- list(train = dtrain, test = dtest)
 
   # no-subsampling
   p1 <- list(
@@ -25,19 +25,19 @@ test_that("updating the model works", {
     updater = "grow_colmaker,prune"
   )
   set.seed(11)
-  bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
   tr1 <- xgb.model.dt.tree(model = bst1)
 
   # with subsampling
   p2 <- modifyList(p1, list(subsample = 0.1))
   set.seed(11)
-  bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
   tr2 <- xgb.model.dt.tree(model = bst2)
 
   # the same no-subsampling boosting with an extra 'refresh' updater:
   p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
   set.seed(11)
-  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
   tr1r <- xgb.model.dt.tree(model = bst1r)
   # all should be the same when no subsampling
   expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@@ -53,7 +53,7 @@ test_that("updating the model works", {
   # the same boosting with subsampling with an extra 'refresh' updater:
   p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
   set.seed(11)
-  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
+  bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
   tr2r <- xgb.model.dt.tree(model = bst2r)
   # should be the same evaluation but different gains and larger cover
   expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@@ -66,7 +66,7 @@ test_that("updating the model works", {
   # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
   set.seed(123)
   p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
   tr1u <- xgb.model.dt.tree(model = bst1u)
   # all should be the same when no subsampling
   expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -79,7 +79,7 @@ test_that("updating the model works", {
 
   # same thing but with a serialized model
   set.seed(123)
-  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+  bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
   tr1u <- xgb.model.dt.tree(model = bst1u)
   # all should be the same when no subsampling
   expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -87,7 +87,7 @@ test_that("updating the model works", {
 
   # process type 'update' for model with subsampling, refreshing only the tree stats from training data:
   p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
+  bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
   tr2u <- xgb.model.dt.tree(model = bst2u)
   # should be the same evaluation but different gains and larger cover
   expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@@ -102,7 +102,7 @@ test_that("updating the model works", {
 
   # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
   p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
-  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+  bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
   tr1ut <- xgb.model.dt.tree(model = bst1ut)
   # should be the same evaluations but different gains and smaller cover (test data is smaller)
   expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
   dtr <- xgb.DMatrix(
     as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
   )
-  watchlist <- list(train = dtr)
+  evals <- list(train = dtr)
   p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
              objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
              base_score = 0)
   set.seed(121)
-  bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
+  bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
   tr0 <- xgb.model.dt.tree(model = bst0)
 
   # run update process for an original model with subsampling
   p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
   bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
-                     watchlist, xgb_model = bst0, verbose = 0)
+                     evals = evals, xgb_model = bst0, verbose = 0)
   tr0u <- xgb.model.dt.tree(model = bst0u)
 
   # should be the same evaluation but different gains and larger cover
diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd
index 0a6432d5f9cf..fc49adc0fcee 100644
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**
 
 > in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.
 
-For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.
 
-```{r watchlist, message=F, warning=F}
-watchlist <- list(train = dtrain, test = dtest)
+```{r evals, message=F, warning=F}
+evals <- list(train = dtrain, test = dtest)
 
 bst <- xgb.train(
     data = dtrain
@@ -355,7 +355,7 @@ bst <- xgb.train(
         , objective = "binary:logistic"
     )
     , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
 
@@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y
 
 For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
 
-```{r watchlist2, message=F, warning=F}
+```{r evals2, message=F, warning=F}
 bst <- xgb.train(
     data = dtrain
     , max_depth = 2
@@ -379,7 +379,7 @@ bst <- xgb.train(
         , eval_metric = "logloss"
     )
     , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
 
@@ -401,7 +401,7 @@ bst <- xgb.train(
         , eval_metric = "logloss"
     )
     , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
 
@@ -430,7 +430,7 @@ bst <- xgb.train(
         , objective = "binary:logistic"
     )
     , nrounds = 2
-    , watchlist = watchlist
+    , evals = evals
 )
 ```
 

From 556ca7783716b7483c8ca89a8d731f9f2b00185e Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Sat, 9 Mar 2024 17:12:25 +0100
Subject: [PATCH 2/3] missing replacement

---
 R-package/R/callbacks.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index aaf1e2900eaf..39734ab092d3 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -287,7 +287,7 @@ xgb.Callback <- function(
       cb$env,
       model,
       data,
-      watchlist,
+      evals,
       iteration
     )
     if (!NROW(should_stop)) {

From ae2288cac0be6bf0c1f6a4a6e5c215bfa861820a Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Sat, 9 Mar 2024 17:34:11 +0100
Subject: [PATCH 3/3] remove accidentally added test

---
 R-package/tests/testthat/test_feature_weights.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/tests/testthat/test_feature_weights.R b/R-package/tests/testthat/test_feature_weights.R
index 4ed78c9b6cfe..54fec67cfcf5 100644
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -25,7 +25,7 @@ test_that("training with feature weights works", {
     expect_lt(importance[1, Frequency], importance[9, Frequency])
   }
 
-  for (tm in c("hist", "approx", "exact")) {
+  for (tm in c("hist", "approx")) {
     test(tm)
   }
 })