Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R] Remove parameters and attributes related to ntree and rebase iterationrange #9935

Merged
merged 3 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,6 @@ Imports:
data.table (>= 1.9.6),
jsonlite (>= 1.0)
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.0
Encoding: UTF-8
SystemRequirements: GNU make, C++17
31 changes: 13 additions & 18 deletions R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
#' \code{iteration},
#' \code{begin_iteration},
#' \code{end_iteration},
#' \code{num_parallel_tree}.
#'
#' @seealso
#' \code{\link{callbacks}},
Expand All @@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
metric_name = NULL, verbose = TRUE) {
# state variables
best_iteration <- -1
best_ntreelimit <- -1
best_score <- Inf
best_msg <- NULL
metric_idx <- 1
Expand Down Expand Up @@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
# If the difference is due to floating-point truncation, update best_score
best_score <- attr_best_score
}
xgb.attr(env$bst, "best_iteration") <- best_iteration
xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
xgb.attr(env$bst, "best_score") <- best_score
} else {
env$basket$best_iteration <- best_iteration
env$basket$best_ntreelimit <- best_ntreelimit
}
}

Expand All @@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
)
best_score <<- score
best_iteration <<- i
best_ntreelimit <<- best_iteration * env$num_parallel_tree
# save the property to attributes, so they will occur in checkpoint
if (!is.null(env$bst)) {
xgb.attributes(env$bst) <- list(
best_iteration = best_iteration - 1, # convert to 0-based index
best_score = best_score,
best_msg = best_msg,
best_ntreelimit = best_ntreelimit)
best_msg = best_msg
)
}
} else if (i - best_iteration >= stopping_rounds) {
env$stop_condition <- TRUE
Expand Down Expand Up @@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
#' \code{data},
#' \code{end_iteration},
#' \code{params},
#' \code{num_parallel_tree},
#' \code{num_class}.
#'
#' @return
#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
Expand All @@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")

N <- nrow(env$data)
pred <-
if (env$num_class > 1) {
matrix(NA_real_, N, env$num_class)
} else {
rep(NA_real_, N)
}
pred <- NULL

iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
if (NVL(env$params[['booster']], '') == 'gblinear') {
iterationrange <- c(1, 1) # must be 0 for gblinear
iterationrange <- "all"
}
for (fd in env$bst_folds) {
pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
if (is.null(pred)) {
if (NCOL(pr) > 1L) {
pred <- matrix(NA_real_, N, ncol(pr))
} else {
pred <- matrix(NA_real_, N)
}
}
if (is.matrix(pred)) {
pred[fd$index, ] <- pr
} else {
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
Expand Down
52 changes: 23 additions & 29 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
#' sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
#' logistic regression would return log-odds instead of probabilities.
#' @param ntreelimit Deprecated, use `iterationrange` instead.
#' @param predleaf Whether to predict pre-tree leaf indices.
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
Expand All @@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
#' or `predinteraction` is `TRUE`.
#' @param training Whether the predictions are used for training. For dart booster,
#' training predicting will perform dropout.
#' @param iterationrange Specifies which trees are used in prediction. For
#' example, take a random forest with 100 rounds.
#' With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
#' rounds are used in this prediction. The index is 1-based just like an R vector. When set
#' to `c(1, 1)`, XGBoost will use all trees.
#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
#' a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
#' base-1 indexing, and inclusive of both ends).
#'
#' For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
#' predict using only the first one.
#'
#' If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
#' of the iterations (rounds) otherwise.
#'
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
#' type and shape of predictions are invariant to the model type.
#' @param ... Not used.
Expand Down Expand Up @@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
#'
#' # Predicting tree leafs:
#' # the result is an nsamples X ntrees matrix
Expand Down Expand Up @@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
#' all.equal(pred, pred_labels)
#' # prediction from using only 5 iterations should result
#' # in the same error as seen in iteration 5:
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
#' sum(pred5 != lb) / length(lb)
#'
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
if (!inherits(newdata, "xgb.DMatrix")) {
Expand All @@ -275,25 +280,21 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
)
}

if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
ntreelimit <- 0

if (ntreelimit != 0 && is.null(iterationrange)) {
## only ntreelimit, initialize iteration range
iterationrange <- c(0, 0)
} else if (ntreelimit == 0 && !is.null(iterationrange)) {
## only iteration range, handle 1-based indexing
iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
} else if (ntreelimit != 0 && !is.null(iterationrange)) {
## both are specified, let libgxgboost throw an error
if (!is.null(iterationrange)) {
if (is.character(iterationrange)) {
stopifnot(iterationrange == "all")
iterationrange <- c(0, 0)
} else {
iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
}
} else {
## no limit is supplied, use best
best_iteration <- xgb.best_iteration(object)
if (is.null(best_iteration)) {
iterationrange <- c(0, 0)
} else {
## We don't need to + 1 as R is 1-based index.
iterationrange <- c(0, as.integer(best_iteration))
iterationrange <- c(0, as.integer(best_iteration) + 1L)
}
}
## Handle the 0 length values.
Expand All @@ -312,7 +313,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
strict_shape = box(TRUE),
iteration_begin = box(as.integer(iterationrange[1])),
iteration_end = box(as.integer(iterationrange[2])),
ntree_limit = box(as.integer(ntreelimit)),
type = box(as.integer(0))
)

Expand Down Expand Up @@ -492,7 +492,7 @@ xgb.attr <- function(object, name) {
return(NULL)
}
if (!is.null(out)) {
if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
if (name %in% c("best_iteration", "best_score")) {
out <- as.numeric(out)
}
}
Expand Down Expand Up @@ -710,12 +710,6 @@ variable.names.xgb.Booster <- function(object, ...) {
return(getinfo(object, "feature_name"))
}

xgb.ntree <- function(bst) {
config <- xgb.config(bst)
out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
return(out)
}

xgb.nthread <- function(bst) {
config <- xgb.config(bst)
out <- strtoi(config$learner$generic_param$nthread)
Expand Down
4 changes: 1 addition & 3 deletions R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@
#' parameter or randomly generated.
#' \item \code{best_iteration} iteration number with the best evaluation metric value
#' (only available with early stopping).
#' \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
#' \item \code{models} a list of the CV folds' models. It is only available with the explicit
Expand Down Expand Up @@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing

# extract parameters that can affect the relationship b/w #trees and #iterations
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

# those are fixed for CV (no training continuation)
begin_iteration <- 1
Expand Down Expand Up @@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
})
}

for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
for (n in c('niter', 'best_iteration')) {
if (is.null(x[[n]]))
next
cat(n, ': ', x[[n]], '\n', sep = '')
Expand Down
1 change: 0 additions & 1 deletion R-package/R/xgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
# Note: it might look like these aren't used, but they need to be defined in this
# environment for the callbacks for work correctly.
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

if (is_update && nrounds > niter_init)
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
Expand Down
2 changes: 1 addition & 1 deletion R-package/demo/predict_first_ntree.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest, 'label')

### predict using first 1 tree
ypred1 <- predict(bst, dtest, ntreelimit = 1)
ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
# by default, we predict using all the trees
ypred2 <- predict(bst, dtest)

Expand Down
2 changes: 0 additions & 2 deletions R-package/man/cb.cv.predict.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion R-package/man/cb.early.stop.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 14 additions & 10 deletions R-package/man/predict.xgb.Booster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion R-package/man/xgb.cv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading