Skip to content

Commit

Permalink
Merge branch 'master' into dataiter
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes committed Jan 20, 2024
2 parents bc1c66e + c5d0608 commit b012be6
Show file tree
Hide file tree
Showing 74 changed files with 1,062 additions and 1,181 deletions.
1 change: 0 additions & 1 deletion .github/workflows/r_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ jobs:
matrix:
config:
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
- {os: windows-latest, r: '4.3.0', compiler: 'msvc', build: 'cmake'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
Expand Down
31 changes: 13 additions & 18 deletions R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
#' \code{iteration},
#' \code{begin_iteration},
#' \code{end_iteration},
#' \code{num_parallel_tree}.
#'
#' @seealso
#' \code{\link{callbacks}},
Expand All @@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
metric_name = NULL, verbose = TRUE) {
# state variables
best_iteration <- -1
best_ntreelimit <- -1
best_score <- Inf
best_msg <- NULL
metric_idx <- 1
Expand Down Expand Up @@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
# If the difference is due to floating-point truncation, update best_score
best_score <- attr_best_score
}
xgb.attr(env$bst, "best_iteration") <- best_iteration
xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
xgb.attr(env$bst, "best_score") <- best_score
} else {
env$basket$best_iteration <- best_iteration
env$basket$best_ntreelimit <- best_ntreelimit
}
}

Expand All @@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
)
best_score <<- score
best_iteration <<- i
best_ntreelimit <<- best_iteration * env$num_parallel_tree
# save the property to attributes, so they will occur in checkpoint
if (!is.null(env$bst)) {
xgb.attributes(env$bst) <- list(
best_iteration = best_iteration - 1, # convert to 0-based index
best_score = best_score,
best_msg = best_msg,
best_ntreelimit = best_ntreelimit)
best_msg = best_msg
)
}
} else if (i - best_iteration >= stopping_rounds) {
env$stop_condition <- TRUE
Expand Down Expand Up @@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
#' \code{data},
#' \code{end_iteration},
#' \code{params},
#' \code{num_parallel_tree},
#' \code{num_class}.
#'
#' @return
#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
Expand All @@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")

N <- nrow(env$data)
pred <-
if (env$num_class > 1) {
matrix(NA_real_, N, env$num_class)
} else {
rep(NA_real_, N)
}
pred <- NULL

iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
if (NVL(env$params[['booster']], '') == 'gblinear') {
iterationrange <- c(1, 1) # must be 0 for gblinear
iterationrange <- "all"
}
for (fd in env$bst_folds) {
pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
if (is.null(pred)) {
if (NCOL(pr) > 1L) {
pred <- matrix(NA_real_, N, ncol(pr))
} else {
pred <- matrix(NA_real_, N)
}
}
if (is.matrix(pred)) {
pred[fd$index, ] <- pr
} else {
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
Expand Down
100 changes: 51 additions & 49 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
#' sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
#' logistic regression would return log-odds instead of probabilities.
#' @param ntreelimit Deprecated, use `iterationrange` instead.
#' @param predleaf Whether to predict pre-tree leaf indices.
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
Expand All @@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
#' or `predinteraction` is `TRUE`.
#' @param training Whether the predictions are used for training. For dart booster,
#' training predicting will perform dropout.
#' @param iterationrange Specifies which trees are used in prediction. For
#' example, take a random forest with 100 rounds.
#' With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
#' rounds are used in this prediction. The index is 1-based just like an R vector. When set
#' to `c(1, 1)`, XGBoost will use all trees.
#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
#' a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
#' base-1 indexing, and inclusive of both ends).
#'
#' For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
#' predict using only the first one.
#'
#' If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
#' of the iterations (rounds) otherwise.
#'
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
#' type and shape of predictions are invariant to the model type.
#' @param ... Not used.
Expand Down Expand Up @@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
#'
#' # Predicting tree leafs:
#' # the result is an nsamples X ntrees matrix
Expand Down Expand Up @@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
#' all.equal(pred, pred_labels)
#' # prediction from using only 5 iterations should result
#' # in the same error as seen in iteration 5:
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
#' sum(pred5 != lb) / length(lb)
#'
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
if (!inherits(newdata, "xgb.DMatrix")) {
Expand All @@ -275,25 +280,21 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
)
}

if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
ntreelimit <- 0

if (ntreelimit != 0 && is.null(iterationrange)) {
## only ntreelimit, initialize iteration range
iterationrange <- c(0, 0)
} else if (ntreelimit == 0 && !is.null(iterationrange)) {
## only iteration range, handle 1-based indexing
iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
} else if (ntreelimit != 0 && !is.null(iterationrange)) {
## both are specified, let libgxgboost throw an error
if (!is.null(iterationrange)) {
if (is.character(iterationrange)) {
stopifnot(iterationrange == "all")
iterationrange <- c(0, 0)
} else {
iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
}
} else {
## no limit is supplied, use best
best_iteration <- xgb.best_iteration(object)
if (is.null(best_iteration)) {
iterationrange <- c(0, 0)
} else {
## We don't need to + 1 as R is 1-based index.
iterationrange <- c(0, as.integer(best_iteration))
iterationrange <- c(0, as.integer(best_iteration) + 1L)
}
}
## Handle the 0 length values.
Expand All @@ -312,7 +313,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
strict_shape = box(TRUE),
iteration_begin = box(as.integer(iterationrange[1])),
iteration_end = box(as.integer(iterationrange[2])),
ntree_limit = box(as.integer(ntreelimit)),
type = box(as.integer(0))
)

Expand Down Expand Up @@ -343,68 +343,76 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
)
names(predts) <- c("shape", "results")
shape <- predts$shape
ret <- predts$results
arr <- predts$results

n_ret <- length(ret)
n_ret <- length(arr)
n_row <- nrow(newdata)
if (n_row != shape[1]) {
stop("Incorrect predict shape.")
}

arr <- array(data = ret, dim = rev(shape))
.Call(XGSetArrayDimInplace_R, arr, rev(shape))

cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
n_groups <- shape[2]

## Needed regardless of whether strict shape is being used.
if (predcontrib) {
dimnames(arr) <- list(cnames, NULL, NULL)
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
} else if (predinteraction) {
dimnames(arr) <- list(cnames, cnames, NULL, NULL)
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
}
if (strict_shape) {
return(arr) # strict shape is calculated by libxgboost uniformly.
}

if (predleaf) {
## Predict leaf
arr <- if (n_ret == n_row) {
matrix(arr, ncol = 1)
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
} else {
matrix(arr, nrow = n_row, byrow = TRUE)
arr <- matrix(arr, nrow = n_row, byrow = TRUE)
}
} else if (predcontrib) {
## Predict contribution
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
arr <- if (n_ret == n_row) {
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
lapply(seq_len(n_groups), function(g) arr[g, , ])
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
} else {
## remove the first axis (group)
dn <- dimnames(arr)
matrix(arr[1, , ], nrow = dim(arr)[2], ncol = dim(arr)[3], dimnames = c(dn[2], dn[3]))
newdim <- dim(arr)[2:3]
newdn <- dimnames(arr)[2:3]
arr <- arr[1, , ]
.Call(XGSetArrayDimInplace_R, arr, newdim)
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
}
} else if (predinteraction) {
## Predict interaction
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
arr <- if (n_ret == n_row) {
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
if (n_ret == n_row) {
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
} else if (n_groups != 1) {
## turns array into list of matrices
lapply(seq_len(n_groups), function(g) arr[g, , , ])
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
} else {
## remove the first axis (group)
arr <- arr[1, , , , drop = FALSE]
array(arr, dim = dim(arr)[2:4], dimnames(arr)[2:4])
newdim <- dim(arr)[2:4]
newdn <- dimnames(arr)[2:4]
.Call(XGSetArrayDimInplace_R, arr, newdim)
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
}
} else {
## Normal prediction
arr <- if (reshape && n_groups != 1) {
matrix(arr, ncol = n_groups, byrow = TRUE)
if (reshape && n_groups != 1) {
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
} else {
as.vector(ret)
.Call(XGSetArrayDimInplace_R, arr, NULL)
}
}
return(arr)
Expand Down Expand Up @@ -492,7 +500,7 @@ xgb.attr <- function(object, name) {
return(NULL)
}
if (!is.null(out)) {
if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
if (name %in% c("best_iteration", "best_score")) {
out <- as.numeric(out)
}
}
Expand Down Expand Up @@ -710,12 +718,6 @@ variable.names.xgb.Booster <- function(object, ...) {
return(getinfo(object, "feature_name"))
}

xgb.ntree <- function(bst) {
config <- xgb.config(bst)
out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
return(out)
}

xgb.nthread <- function(bst) {
config <- xgb.config(bst)
out <- strtoi(config$learner$generic_param$nthread)
Expand Down
4 changes: 1 addition & 3 deletions R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@
#' parameter or randomly generated.
#' \item \code{best_iteration} iteration number with the best evaluation metric value
#' (only available with early stopping).
#' \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
#' \item \code{models} a list of the CV folds' models. It is only available with the explicit
Expand Down Expand Up @@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing

# extract parameters that can affect the relationship b/w #trees and #iterations
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

# those are fixed for CV (no training continuation)
begin_iteration <- 1
Expand Down Expand Up @@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
})
}

for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
for (n in c('niter', 'best_iteration')) {
if (is.null(x[[n]]))
next
cat(n, ': ', x[[n]], '\n', sep = '')
Expand Down
9 changes: 1 addition & 8 deletions R-package/R/xgb.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,12 @@
#' xgb.importance(model = mbst)
#'
#' @export
xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
data = NULL, label = NULL, target = NULL) {

if (!(is.null(data) && is.null(label) && is.null(target)))
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")

if (is.null(feature_names)) {
model_feature_names <- xgb.feature_names(model)
if (NROW(model_feature_names)) {
feature_names <- model_feature_names
}
}

if (!(is.null(feature_names) || is.character(feature_names)))
stop("feature_names: Has to be a character vector")

Expand Down
Loading

0 comments on commit b012be6

Please sign in to comment.