diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index b3d6bdb1ae0a..02e0a7cd4b8e 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
 #' \code{iteration},
 #' \code{begin_iteration},
 #' \code{end_iteration},
-#' \code{num_parallel_tree}.
 #'
 #' @seealso
 #' \code{\link{callbacks}},
@@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
                           metric_name = NULL, verbose = TRUE) {
   # state variables
   best_iteration <- -1
-  best_ntreelimit <- -1
   best_score <- Inf
   best_msg <- NULL
   metric_idx <- 1
@@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
         # If the difference is due to floating-point truncation, update best_score
         best_score <- attr_best_score
       }
-      xgb.attr(env$bst, "best_iteration") <- best_iteration
-      xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
+      xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
       xgb.attr(env$bst, "best_score") <- best_score
     } else {
       env$basket$best_iteration <- best_iteration
-      env$basket$best_ntreelimit <- best_ntreelimit
     }
   }
 
@@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
       )
       best_score <<- score
       best_iteration <<- i
-      best_ntreelimit <<- best_iteration * env$num_parallel_tree
       # save the property to attributes, so they will occur in checkpoint
       if (!is.null(env$bst)) {
         xgb.attributes(env$bst) <- list(
           best_iteration = best_iteration - 1, # convert to 0-based index
           best_score = best_score,
-          best_msg = best_msg,
-          best_ntreelimit = best_ntreelimit)
+          best_msg = best_msg
+        )
       }
     } else if (i - best_iteration >= stopping_rounds) {
       env$stop_condition <- TRUE
@@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
 #' \code{data},
 #' \code{end_iteration},
 #' \code{params},
-#' \code{num_parallel_tree},
-#' \code{num_class}.
 #'
 #' @return
 #' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
@@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
       stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")
 
     N <- nrow(env$data)
-    pred <-
-      if (env$num_class > 1) {
-        matrix(NA_real_, N, env$num_class)
-      } else {
-        rep(NA_real_, N)
-      }
+    pred <- NULL
 
-    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
+    iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
     if (NVL(env$params[['booster']], '') == 'gblinear') {
-      iterationrange <- c(1, 1)  # must be 0 for gblinear
+      iterationrange <- "all"
     }
     for (fd in env$bst_folds) {
       pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
+      if (is.null(pred)) {
+        if (NCOL(pr) > 1L) {
+          pred <- matrix(NA_real_, N, ncol(pr))
+        } else {
+          pred <- matrix(NA_real_, N)
+        }
+      }
       if (is.matrix(pred)) {
         pred[fd$index, ] <- pr
       } else {
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 945d86132a08..e8ae787fc722 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
     res <- sapply(seq_along(watchlist), function(j) {
       w <- watchlist[[j]]
       ## predict using all trees
-      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
       eval_res <- feval(preds, w)
       out <- eval_res$value
       names(out) <- paste0(evnames[j], "-", eval_res$metric)
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index cee7e9fc5887..2575c369031b 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
 #' @param outputmargin Whether the prediction should be returned in the form of original untransformed
 #'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
 #'        logistic regression would return log-odds instead of probabilities.
-#' @param ntreelimit Deprecated, use `iterationrange` instead.
 #' @param predleaf Whether to predict pre-tree leaf indices.
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
@@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
 #'        or `predinteraction` is `TRUE`.
 #' @param training Whether the predictions are used for training. For dart booster,
 #'        training predicting will perform dropout.
-#' @param iterationrange Specifies which trees are used in prediction. For
-#'        example, take a random forest with 100 rounds.
-#'        With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
-#'        rounds are used in this prediction. The index is 1-based just like an R vector. When set
-#'        to `c(1, 1)`, XGBoost will use all trees.
+#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
+#'        a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
+#'        base-1 indexing, and inclusive of both ends).
+#'
+#'        For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
+#'        predict using only the first one.
+#'
+#'        If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
+#'        of the iterations (rounds) otherwise.
+#'
+#'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 #' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
 #'        type and shape of predictions are invariant to the model type.
 #' @param ... Not used.
@@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
-#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
 #'
 #' # Predicting tree leafs:
 #' # the result is an nsamples X ntrees matrix
@@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
 #' all.equal(pred, pred_labels)
 #' # prediction from using only 5 iterations should result
 #' # in the same error as seen in iteration 5:
-#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 #' sum(pred5 != lb) / length(lb)
 #'
 #' @export
-predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
+predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                 predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                 reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
   if (!inherits(newdata, "xgb.DMatrix")) {
@@ -275,25 +280,22 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
     )
   }
 
-  if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
-    ntreelimit <- 0
 
-  if (ntreelimit != 0 && is.null(iterationrange)) {
-    ## only ntreelimit, initialize iteration range
-    iterationrange <- c(0, 0)
-  } else if (ntreelimit == 0 && !is.null(iterationrange)) {
-    ## only iteration range, handle 1-based indexing
-    iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
-  } else if (ntreelimit != 0 && !is.null(iterationrange)) {
-    ## both are specified, let libgxgboost throw an error
+  if (!is.null(iterationrange)) {
+    if (is.character(iterationrange) &&
+        length(iterationrange) == 1 &&
+        iterationrange == "all") {
+      iterationrange <- c(0, 0)
+    } else {
+      iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
+    }
   } else {
     ## no limit is supplied, use best
     best_iteration <- xgb.best_iteration(object)
     if (is.null(best_iteration)) {
       iterationrange <- c(0, 0)
     } else {
-      ## We don't need to + 1 as R is 1-based index.
-      iterationrange <- c(0, as.integer(best_iteration))
+      iterationrange <- c(0, as.integer(best_iteration) + 1L)
     }
   }
   ## Handle the 0 length values.
@@ -312,7 +314,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
     strict_shape = box(TRUE),
     iteration_begin = box(as.integer(iterationrange[1])),
     iteration_end = box(as.integer(iterationrange[2])),
-    ntree_limit = box(as.integer(ntreelimit)),
     type = box(as.integer(0))
   )
 
@@ -492,7 +493,7 @@ xgb.attr <- function(object, name) {
     return(NULL)
   }
   if (!is.null(out)) {
-    if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
+    if (name %in% c("best_iteration", "best_score")) {
       out <- as.numeric(out)
     }
   }
@@ -710,12 +711,6 @@ variable.names.xgb.Booster <- function(object, ...) {
   return(getinfo(object, "feature_name"))
 }
 
-xgb.ntree <- function(bst) {
-  config <- xgb.config(bst)
-  out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
-  return(out)
-}
-
 xgb.nthread <- function(bst) {
   config <- xgb.config(bst)
   out <- strtoi(config$learner$generic_param$nthread)
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index a960957ca313..eb0495631d6e 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -103,7 +103,6 @@
 #'         parameter or randomly generated.
 #'   \item \code{best_iteration} iteration number with the best evaluation metric value
 #'         (only available with early stopping).
-#'   \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 #'   \item \code{pred} CV prediction values available when \code{prediction} is set.
 #'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 #'   \item \code{models} a list of the CV folds' models. It is only available with the explicit
@@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
 
   # extract parameters that can affect the relationship b/w #trees and #iterations
   num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
 
   # those are fixed for CV (no training continuation)
   begin_iteration <- 1
@@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
       })
     }
 
-    for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
+    for (n in c('niter', 'best_iteration')) {
       if (is.null(x[[n]]))
         next
       cat(n, ': ', x[[n]], '\n', sep = '')
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index a313ed32f414..f0f2332b58c3 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -393,7 +393,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
   # Note: it might look like these aren't used, but they need to be defined in this
   # environment for the callbacks for work correctly.
   num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
 
   if (is_update && nrounds > niter_init)
     stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R
index 02c168b77e43..179c18c707f4 100644
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -15,7 +15,7 @@ cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest, 'label')
 
 ### predict using first 1 tree
-ypred1 <- predict(bst, dtest, ntreelimit = 1)
+ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
 # by default, we predict using all the trees
 ypred2 <- predict(bst, dtest)
 
diff --git a/R-package/man/cb.cv.predict.Rd b/R-package/man/cb.cv.predict.Rd
index ded899e8a4b1..4cabac1c9569 100644
--- a/R-package/man/cb.cv.predict.Rd
+++ b/R-package/man/cb.cv.predict.Rd
@@ -35,8 +35,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{data},
 \code{end_iteration},
 \code{params},
-\code{num_parallel_tree},
-\code{num_class}.
 }
 \seealso{
 \code{\link{callbacks}}
diff --git a/R-package/man/cb.early.stop.Rd b/R-package/man/cb.early.stop.Rd
index 7b6efa8427a2..7cd51a3ce563 100644
--- a/R-package/man/cb.early.stop.Rd
+++ b/R-package/man/cb.early.stop.Rd
@@ -55,7 +55,6 @@ Callback function expects the following values to be set in its calling frame:
 \code{iteration},
 \code{begin_iteration},
 \code{end_iteration},
-\code{num_parallel_tree}.
 }
 \seealso{
 \code{\link{callbacks}},
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index 66194c64fbec..7a6dd6c1306b 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -9,7 +9,6 @@
   newdata,
   missing = NA,
   outputmargin = FALSE,
-  ntreelimit = NULL,
   predleaf = FALSE,
   predcontrib = FALSE,
   approxcontrib = FALSE,
@@ -36,8 +35,6 @@ missing values in data (e.g., 0 or some other extreme value).}
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 logistic regression would return log-odds instead of probabilities.}
 
-\item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
-
 \item{predleaf}{Whether to predict pre-tree leaf indices.}
 
 \item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
@@ -53,11 +50,18 @@ or \code{predinteraction} is \code{TRUE}.}
 \item{training}{Whether the predictions are used for training. For dart booster,
 training predicting will perform dropout.}
 
-\item{iterationrange}{Specifies which trees are used in prediction. For
-example, take a random forest with 100 rounds.
-With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
-rounds are used in this prediction. The index is 1-based just like an R vector. When set
-to \code{c(1, 1)}, XGBoost will use all trees.}
+\item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
+a two-dimensional vector with the start and end numbers in the sequence (same format as R's \code{seq} - i.e.
+base-1 indexing, and inclusive of both ends).
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
+   predict using only the first one.
+
+   If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
+   of the iterations (rounds) otherwise.
+
+   If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
+}\if{html}{\out{</div>}}}
 
 \item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
 type and shape of predictions are invariant to the model type.}
@@ -145,7 +149,7 @@ bst <- xgb.train(
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
-pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
+pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
 
 # Predicting tree leafs:
 # the result is an nsamples X ntrees matrix
@@ -216,7 +220,7 @@ str(pred)
 all.equal(pred, pred_labels)
 # prediction from using only 5 iterations should result
 # in the same error as seen in iteration 5:
-pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
 sum(pred5 != lb) / length(lb)
 
 }
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 2d8508c4d1d5..9f6103a52762 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -135,7 +135,6 @@ It is created by the \code{\link{cb.evaluation.log}} callback.
 parameter or randomly generated.
 \item \code{best_iteration} iteration number with the best evaluation metric value
 (only available with early stopping).
-\item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
 \item \code{pred} CV prediction values available when \code{prediction} is set.
 It is either vector or matrix (see \code{\link{cb.cv.predict}}).
 \item \code{models} a list of the CV folds' models. It is only available with the explicit
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 8dd934765004..03a8ddbe124d 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -33,15 +33,11 @@ test_that("train and predict binary classification", {
   pred <- predict(bst, test$data)
   expect_length(pred, 1611)
 
-  pred1 <- predict(bst, train$data, ntreelimit = 1)
+  pred1 <- predict(bst, train$data, iterationrange = c(1, 1))
   expect_length(pred1, 6513)
   err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
   err_log <- attributes(bst)$evaluation_log[1, train_error]
   expect_lt(abs(err_pred1 - err_log), 10e-6)
-
-  pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
-  expect_length(pred1, 6513)
-  expect_equal(pred1, pred2)
 })
 
 test_that("parameter validation works", {
@@ -117,8 +113,8 @@ test_that("dart prediction works", {
     nrounds = nrounds,
     objective = "reg:squarederror"
   )
-  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
-  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
+  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)
+  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))
   expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
 
   pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
@@ -139,8 +135,8 @@ test_that("dart prediction works", {
     data = dtrain,
     nrounds = nrounds
   )
-  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
-  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
+  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)
+  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
   pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
 
   expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
@@ -162,7 +158,7 @@ test_that("train and predict softprob", {
   )
   expect_false(is.null(attributes(bst)$evaluation_log))
   expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)
   pred <- predict(bst, as.matrix(iris[, -5]))
   expect_length(pred, nrow(iris) * 3)
   # row sums add up to total probability of 1:
@@ -174,12 +170,12 @@ test_that("train and predict softprob", {
   err <- sum(pred_labels != lb) / length(lb)
   expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
   # manually calculate error at the 1st iteration:
-  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
+  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
   pred_labels <- max.col(mpred) - 1
   err <- sum(pred_labels != lb) / length(lb)
   expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
 
-  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
+  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
   expect_equal(mpred, mpred1)
 
   d <- cbind(
@@ -213,7 +209,7 @@ test_that("train and predict softmax", {
   )
   expect_false(is.null(attributes(bst)$evaluation_log))
   expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
-  expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
+  expect_equal(xgb.get.num.boosted.rounds(bst), 5)
 
   pred <- predict(bst, as.matrix(iris[, -5]))
   expect_length(pred, nrow(iris))
@@ -233,19 +229,15 @@ test_that("train and predict RF", {
     watchlist = list(train = xgb.DMatrix(train$data, label = lb))
   )
   expect_equal(xgb.get.num.boosted.rounds(bst), 1)
-  expect_equal(xgb.ntree(bst), 20)
 
   pred <- predict(bst, train$data)
   pred_err <- sum((pred > 0.5) != lb) / length(lb)
   expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
   # expect_lt(pred_err, 0.03)
 
-  pred <- predict(bst, train$data, ntreelimit = 20)
+  pred <- predict(bst, train$data, iterationrange = c(1, 1))
   pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
   expect_equal(pred_err_20, pred_err)
-
-  pred1 <- predict(bst, train$data, iterationrange = c(1, 2))
-  expect_equal(pred, pred1)
 })
 
 test_that("train and predict RF with softprob", {
@@ -261,7 +253,6 @@ test_that("train and predict RF with softprob", {
     watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
   )
   expect_equal(xgb.get.num.boosted.rounds(bst), 15)
-  expect_equal(xgb.ntree(bst), 15 * 3 * 4)
   # predict for all iterations:
   pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
   expect_equal(dim(pred), c(nrow(iris), 3))
@@ -269,7 +260,7 @@ test_that("train and predict RF with softprob", {
   err <- sum(pred_labels != lb) / length(lb)
   expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
   # predict for 7 iterations and adjust for 4 parallel trees per iteration
-  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
+  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
   err <- sum((max.col(pred) - 1) != lb) / length(lb)
   expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
 })
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index afa270c0bd51..c60d0c246f81 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -211,12 +211,11 @@ test_that("early stopping xgb.train works", {
   , "Stopping. Best iteration")
   expect_false(is.null(xgb.attr(bst, "best_iteration")))
   expect_lt(xgb.attr(bst, "best_iteration"), 19)
-  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
 
   pred <- predict(bst, dtest)
   expect_equal(length(pred), 1611)
   err_pred <- err(ltest, pred)
-  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
+  err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_error]
   expect_equal(err_log, err_pred, tolerance = 5e-6)
 
   set.seed(11)
@@ -231,8 +230,7 @@ test_that("early stopping xgb.train works", {
   loaded <- xgb.load(fname)
 
   expect_false(is.null(xgb.attr(loaded, "best_iteration")))
-  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
-  expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
+  expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_iteration"))
 })
 
 test_that("early stopping using a specific metric works", {
@@ -245,12 +243,11 @@ test_that("early stopping using a specific metric works", {
   , "Stopping. Best iteration")
   expect_false(is.null(xgb.attr(bst, "best_iteration")))
   expect_lt(xgb.attr(bst, "best_iteration"), 19)
-  expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
 
-  pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
+  pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, "best_iteration") + 1))
   expect_equal(length(pred), 1611)
   logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
-  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
+  logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_logloss]
   expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
 })
 
@@ -286,7 +283,6 @@ test_that("early stopping xgb.cv works", {
   , "Stopping. Best iteration")
   expect_false(is.null(cv$best_iteration))
   expect_lt(cv$best_iteration, 19)
-  expect_equal(cv$best_iteration, cv$best_ntreelimit)
   # the best error is min error:
   expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
 })
@@ -354,3 +350,44 @@ test_that("prediction in xgb.cv for softprob works", {
   expect_equal(dim(cv$pred), c(nrow(iris), 3))
   expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
 })
+
+test_that("prediction in xgb.cv works for multi-quantile", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = y, nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      objective = "reg:quantileerror",
+      quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
+      nthread = 1
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$pred), c(nrow(x), 5))
+})
+
+test_that("prediction in xgb.cv works for multi-output", {
+  data(mtcars)
+  y <- mtcars$mpg
+  x <- as.matrix(mtcars[, -1])
+  dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
+  cv <- xgb.cv(
+    data = dm,
+    params = list(
+      tree_method = "hist",
+      multi_strategy = "multi_output_tree",
+      objective = "reg:squarederror",
+      nthread = n_threads
+    ),
+    nrounds = 5,
+    nfold = 3,
+    prediction = TRUE,
+    verbose = 0
+  )
+  expect_equal(dim(cv$pred), c(nrow(x), 2))
+})
diff --git a/R-package/tests/testthat/test_glm.R b/R-package/tests/testthat/test_glm.R
index ae698d98f9db..349bcce8d1f5 100644
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
   booster <- xgb.train(
     param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
   )
-  expect_equal(xgb.attr(booster, "best_iteration"), 5)
+  expect_equal(xgb.attr(booster, "best_iteration"), 4)
   predt_es <- predict(booster, dtrain)
 
-  n <- xgb.attr(booster, "best_iteration") + es_round
+  n <- xgb.attr(booster, "best_iteration") + es_round + 1
   booster <- xgb.train(
     param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
   )
diff --git a/R-package/tests/testthat/test_ranking.R b/R-package/tests/testthat/test_ranking.R
index 277c8f288e34..e49a32025e0f 100644
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -44,7 +44,7 @@ test_that('Test ranking with weighted data', {
   expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
   expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
   for (i in 1:10) {
-    pred <- predict(bst, newdata = dtrain, ntreelimit = i)
+    pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))
     # is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
     is_sorted <- lapply(seq(1, 20, by = 5),
       function(k) {