Skip to content

Commit

Permalink
[R] Rename ExternalDMatrix -> ExtMemDMatrix. (#10849)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Sep 28, 2024
1 parent 9ee4008 commit c9f89c4
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 43 deletions.
2 changes: 1 addition & 1 deletion R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export(xgb.DMatrix.hasinfo)
export(xgb.DMatrix.save)
export(xgb.DataBatch)
export(xgb.DataIter)
export(xgb.ExternalDMatrix)
export(xgb.ExtMemDMatrix)
export(xgb.QuantileDMatrix)
export(xgb.QuantileDMatrix.from_iterator)
export(xgb.attr)
Expand Down
36 changes: 18 additions & 18 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,10 @@ xgb.QuantileDMatrix <- function(
#' This function is responsible for generating an R object structure containing callback
#' functions and an environment shared with them.
#'
#' The output structure from this function is then meant to be passed to [xgb.ExternalDMatrix()],
#' The output structure from this function is then meant to be passed to [xgb.ExtMemDMatrix()],
#' which will consume the data and create a DMatrix from it by executing the callback functions.
#'
#' For more information, and for a usage example, see the documentation for [xgb.ExternalDMatrix()].
#' For more information, and for a usage example, see the documentation for [xgb.ExtMemDMatrix()].
#'
#' @param env An R environment to pass to the callback functions supplied here, which can be
#' used to keep track of variables to determine how to handle the batches.
Expand All @@ -443,8 +443,8 @@ xgb.QuantileDMatrix <- function(
#' Note that, after resetting the iterator, the batches will be accessed again, so the same data
#' (and in the same order) must be passed in subsequent iterations.
#' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then
#' be passed to [xgb.ExternalDMatrix()].
#' @seealso [xgb.ExternalDMatrix()], [xgb.DataBatch()].
#' be passed to [xgb.ExtMemDMatrix()].
#' @seealso [xgb.ExtMemDMatrix()], [xgb.DataBatch()].
#' @export
xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
if (!is.function(f_next)) {
Expand Down Expand Up @@ -512,7 +512,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
#'
#' @description
#' Helper function to supply data in batches of a data iterator when
#' constructing a DMatrix from external memory through [xgb.ExternalDMatrix()]
#' constructing a DMatrix from external memory through [xgb.ExtMemDMatrix()]
#' or through [xgb.QuantileDMatrix.from_iterator()].
#'
#' This function is **only** meant to be called inside of a callback function (which
Expand All @@ -524,23 +524,23 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
#' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only
#' possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
#'
#' For more information and for example usage, see the documentation for [xgb.ExternalDMatrix()].
#' For more information and for example usage, see the documentation for [xgb.ExtMemDMatrix()].
#' @inheritParams xgb.DMatrix
#' @param data Batch of data belonging to this batch.
#'
#' Note that not all of the input types supported by [xgb.DMatrix()] are possible
#' to pass here. Supported types are:
#' - `matrix`, with types `numeric`, `integer`, and `logical`. Note that for types
#' `integer` and `logical`, missing values might not be automatically recognized as
#' as such - see the documentation for parameter `missing` in [xgb.ExternalDMatrix()]
#' as such - see the documentation for parameter `missing` in [xgb.ExtMemDMatrix()]
#' for details on this.
#' - `data.frame`, with the same types as supported by 'xgb.DMatrix' and same
#' conversions applied to it. See the documentation for parameter `data` in
#' [xgb.DMatrix()] for details on it.
#' - CSR matrices, as class `dgRMatrix` from package "Matrix".
#' @return An object of class `xgb.DataBatch`, which is just a list containing the
#' data and parameters passed here. It does **not** inherit from `xgb.DMatrix`.
#' @seealso [xgb.DataIter()], [xgb.ExternalDMatrix()].
#' @seealso [xgb.DataIter()], [xgb.ExtMemDMatrix()].
#' @export
xgb.DataBatch <- function(
data,
Expand Down Expand Up @@ -643,10 +643,10 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
#'
#' For example, in R `integer` types, missing values are represented by integer number `-2147483648`
#' (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes `NA`,
#' which is interpreted as a floating-point NaN by [xgb.ExternalDMatrix()] and by
#' which is interpreted as a floating-point NaN by [xgb.ExtMemDMatrix()] and by
#' [xgb.QuantileDMatrix.from_iterator()], these integer missing values will not be treated as missing.
#' This should not pose any problem for `numeric` types, since they do have an inheret NaN value.
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not
#' held internally but accessed through the iterator when needed.
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.QuantileDMatrix.from_iterator()]
#' @examples
Expand Down Expand Up @@ -706,7 +706,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
#' cache_prefix <- tempdir()
#'
#' # DMatrix will be constructed from the iterator's batches
#' dm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1)
#' dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)
#'
#' # After construction, can be used as a regular DMatrix
#' params <- list(nthread = 1, objective = "reg:squarederror")
Expand All @@ -717,7 +717,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
#' pred_dm <- predict(model, dm)
#' pred_mat <- predict(model, as.matrix(mtcars[, -1]))
#' @export
xgb.ExternalDMatrix <- function(
xgb.ExtMemDMatrix <- function(
data_iterator,
cache_prefix = tempdir(),
missing = NA,
Expand Down Expand Up @@ -753,7 +753,7 @@ xgb.ExternalDMatrix <- function(
)

attributes(dmat) <- list(
class = c("xgb.DMatrix", "xgb.ExternalDMatrix"),
class = c("xgb.DMatrix", "xgb.ExtMemDMatrix"),
fields = attributes(proxy_handle)$fields
)
return(dmat)
Expand All @@ -766,7 +766,7 @@ xgb.ExternalDMatrix <- function(
#' Create an `xgb.QuantileDMatrix` object (exact same class as would be returned by
#' calling function [xgb.QuantileDMatrix()], with the same advantages and limitations) from
#' external data supplied by [xgb.DataIter()], potentially passed in batches from
#' a bigger set that might not fit entirely in memory, same way as [xgb.ExternalDMatrix()].
#' a bigger set that might not fit entirely in memory, same way as [xgb.ExtMemDMatrix()].
#'
#' Note that, while external data will only be loaded through the iterator (thus the full data
#' might not be held entirely in-memory), the quantized representation of the data will get
Expand All @@ -776,10 +776,10 @@ xgb.ExternalDMatrix <- function(
#'
#' For more information, see the guide 'Using XGBoost External Memory Version':
#' \url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}
#' @inheritParams xgb.ExternalDMatrix
#' @inheritParams xgb.ExtMemDMatrix
#' @inheritParams xgb.QuantileDMatrix
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExternalDMatrix()],
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExtMemDMatrix()],
#' [xgb.QuantileDMatrix()]
#' @export
xgb.QuantileDMatrix.from_iterator <- function( # nolint
Expand Down Expand Up @@ -1318,8 +1318,8 @@ print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
}
class_print <- if (inherits(x, "xgb.QuantileDMatrix")) {
"xgb.QuantileDMatrix"
} else if (inherits(x, "xgb.ExternalDMatrix")) {
"xgb.ExternalDMatrix"
} else if (inherits(x, "xgb.ExtMemDMatrix")) {
"xgb.ExtMemDMatrix"
} else if (inherits(x, "xgb.ProxyDMatrix")) {
"xgb.ProxyDMatrix"
} else {
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' for model training by the objective.
#'
#' Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
#' or `xgb.ExternalDMatrix` are not supported here.
#' or `xgb.ExtMemDMatrix` are not supported here.
#' @param nrounds The max number of iterations.
#' @param nfold The original dataset is randomly partitioned into `nfold` equal size subsamples.
#' @param prediction A logical value indicating whether to return the test fold predictions
Expand Down
8 changes: 4 additions & 4 deletions R-package/man/xgb.DataBatch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions R-package/man/xgb.DataIter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions R-package/man/xgb.QuantileDMatrix.from_iterator.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion R-package/man/xgb.cv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions R-package/tests/testthat/test_dmatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ test_that("xgb.DMatrix: QuantileDMatrix is not accepted by exact method", {
})
})

test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMatrix", {
test_that("xgb.DMatrix: ExtMemDMatrix produces the same results as regular DMatrix", {
data(mtcars)
y <- mtcars[, 1]
x <- as.matrix(mtcars[, -1])
Expand Down Expand Up @@ -528,8 +528,8 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
f_reset = iterator_reset
)
cache_prefix <- tempdir()
edm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1)
expect_true(inherits(edm, "xgb.ExternalDMatrix"))
edm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)
expect_true(inherits(edm, "xgb.ExtMemDMatrix"))
expect_true(inherits(edm, "xgb.DMatrix"))
set.seed(123)
model_ext <- xgb.train(
Expand Down Expand Up @@ -660,7 +660,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
f_reset = iterator_reset
)
expect_error(
{xgb.ExternalDMatrix(data_iterator, nthread = 1)},
{xgb.ExtMemDMatrix(data_iterator, nthread = 1)},
"custom error"
)
})
Expand Down
5 changes: 4 additions & 1 deletion tests/ci_build/test_r_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ def pkgroot(path: str) -> None:
output = subprocess.run(["git", "clean", "-xdf", "--dry-run"], capture_output=True)
if output.returncode != 0:
raise ValueError("Failed to check git repository status.", output)
would_remove = output.stdout.decode("utf-8").strip().split("\n")
if len(output.stdout) == 0:
would_remove = None
else:
would_remove = output.stdout.decode("utf-8").strip().split("\n")

if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove):
raise ValueError(
Expand Down

0 comments on commit c9f89c4

Please sign in to comment.