Skip to content

Commit

Permalink
update to libxgboost 2.0 (#191)
Browse files Browse the repository at this point in the history
  • Loading branch information
ExpandingMan authored Sep 27, 2023
1 parent 02e1249 commit d799a79
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 26 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "XGBoost"
uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
version = "2.3.2"
version = "2.4.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand All @@ -27,13 +27,13 @@ XGBoostTermExt = "Term"
[compat]
AbstractTrees = "0.4"
CEnum = "0.4"
CUDA = "3, 4"
CUDA = "3, 4, 5"
JSON3 = "1"
OrderedCollections = "1"
SparseMatricesCSR = "0.6"
Tables = "1"
Term = "1, 2"
XGBoost_jll = "1.7.2"
XGBoost_jll = "2"
julia = "1.6"

[extras]
Expand Down
22 changes: 17 additions & 5 deletions src/Lib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ function XGDMatrixCreateFromFile(fname, silent, out)
@ccall libxgboost.XGDMatrixCreateFromFile(fname::Ptr{Cchar}, silent::Cint, out::Ptr{DMatrixHandle})::Cint
end

function XGDMatrixCreateFromURI(config, out)
@ccall libxgboost.XGDMatrixCreateFromURI(config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGDMatrixCreateFromCSREx(indptr, indices, data, nindptr, nelem, num_col, out)
@ccall libxgboost.XGDMatrixCreateFromCSREx(indptr::Ptr{Csize_t}, indices::Ptr{Cuint}, data::Ptr{Cfloat}, nindptr::Csize_t, nelem::Csize_t, num_col::Csize_t, out::Ptr{DMatrixHandle})::Cint
end
Expand All @@ -80,6 +84,10 @@ function XGDMatrixCreateFromDense(data, config, out)
@ccall libxgboost.XGDMatrixCreateFromDense(data::Ptr{Cchar}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGDMatrixCreateFromCSC(indptr, indices, data, nrow, config, out)
@ccall libxgboost.XGDMatrixCreateFromCSC(indptr::Ptr{Cchar}, indices::Ptr{Cchar}, data::Ptr{Cchar}, nrow::bst_ulong, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGDMatrixCreateFromCSCEx(col_ptr, indices, data, nindptr, nelem, num_row, out)
@ccall libxgboost.XGDMatrixCreateFromCSCEx(col_ptr::Ptr{Csize_t}, indices::Ptr{Cuint}, data::Ptr{Cfloat}, nindptr::Csize_t, nelem::Csize_t, num_row::Csize_t, out::Ptr{DMatrixHandle})::Cint
end
Expand Down Expand Up @@ -125,7 +133,7 @@ const XGBCallbackSetData = Cvoid
const XGBCallbackDataIterNext = Cvoid

function XGDMatrixCreateFromDataIter(data_handle, callback, cache_info, out)
@ccall libxgboost.XGDMatrixCreateFromDataIter(data_handle::DataIterHandle, callback::Ptr{Cvoid}, cache_info::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
@ccall libxgboost.XGDMatrixCreateFromDataIter(data_handle::DataIterHandle, callback::Ptr{XGBCallbackDataIterNext}, cache_info::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGProxyDMatrixCreate(out)
Expand All @@ -139,15 +147,15 @@ const XGDMatrixCallbackNext = Cvoid
const DataIterResetCallback = Cvoid

function XGDMatrixCreateFromCallback(iter, proxy, reset, next, config, out)
@ccall libxgboost.XGDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, reset::Ptr{Cvoid}, next::Ptr{Cvoid}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
@ccall libxgboost.XGDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, reset::Ptr{DataIterResetCallback}, next::Ptr{XGDMatrixCallbackNext}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGQuantileDMatrixCreateFromCallback(iter, proxy, ref, reset, next, config, out)
@ccall libxgboost.XGQuantileDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, ref::DataIterHandle, reset::Ptr{Cvoid}, next::Ptr{Cvoid}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
@ccall libxgboost.XGQuantileDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, ref::DataIterHandle, reset::Ptr{DataIterResetCallback}, next::Ptr{XGDMatrixCallbackNext}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGDeviceQuantileDMatrixCreateFromCallback(iter, proxy, reset, next, missing, nthread, max_bin, out)
@ccall libxgboost.XGDeviceQuantileDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, reset::Ptr{Cvoid}, next::Ptr{Cvoid}, missing::Cfloat, nthread::Cint, max_bin::Cint, out::Ptr{DMatrixHandle})::Cint
@ccall libxgboost.XGDeviceQuantileDMatrixCreateFromCallback(iter::DataIterHandle, proxy::DMatrixHandle, reset::Ptr{DataIterResetCallback}, next::Ptr{XGDMatrixCallbackNext}, missing::Cfloat, nthread::Cint, max_bin::Cint, out::Ptr{DMatrixHandle})::Cint
end

function XGProxyDMatrixSetDataCudaArrayInterface(handle, c_interface_str)
Expand All @@ -171,7 +179,7 @@ function XGImportArrowRecordBatch(data_handle, ptr_array, ptr_schema)
end

function XGDMatrixCreateFromArrowCallback(next, config, out)
@ccall libxgboost.XGDMatrixCreateFromArrowCallback(next::Ptr{Cvoid}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
@ccall libxgboost.XGDMatrixCreateFromArrowCallback(next::Ptr{XGDMatrixCallbackNext}, config::Ptr{Cchar}, out::Ptr{DMatrixHandle})::Cint
end

function XGDMatrixSliceDMatrix(handle, idxset, len, out)
Expand Down Expand Up @@ -242,6 +250,10 @@ function XGDMatrixGetDataAsCSR(handle, config, out_indptr, out_indices, out_data
@ccall libxgboost.XGDMatrixGetDataAsCSR(handle::DMatrixHandle, config::Ptr{Cchar}, out_indptr::Ptr{bst_ulong}, out_indices::Ptr{Cuint}, out_data::Ptr{Cfloat})::Cint
end

function XGDMatrixGetQuantileCut(handle, config, out_indptr, out_data)
@ccall libxgboost.XGDMatrixGetQuantileCut(handle::DMatrixHandle, config::Ptr{Cchar}, out_indptr::Ptr{Ptr{Cchar}}, out_data::Ptr{Ptr{Cchar}})::Cint
end

function XGBoosterCreate(dmats, len, out)
@ccall libxgboost.XGBoosterCreate(dmats::Ptr{DMatrixHandle}, len::bst_ulong, out::Ptr{BoosterHandle})::Cint
end
Expand Down
10 changes: 7 additions & 3 deletions src/booster.jl
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,20 @@ load(::Type{Booster}, fname::AbstractString) = Booster(DMatrix[], model_file=fna
load(::Type{Booster}, io) = Booster(DMatrix[], model_buffer=io)

"""
save(b::Booster, fname)
save(b::Booster, fname; format="json")
save(b::Booster, Vector{UInt8}; format="json")
save(b::Booster, io::IO; format="json")
Save the [`Booster`](@ref) object. This saves to formats which are intended to be stored
on disk but the formats used are a lot zanier than those used by `deserialize`.
A model saved with this function can be retrieved with [`load`](@ref) or [`load!`](@ref).
Valid formats are `"json"` and `"ubj"` (universal binary JSON).
"""
function save(b::Booster, fname::AbstractString)
xgbcall(XGBoosterSaveModel, b.handle, fname)
function save(b::Booster, fname::AbstractString; kw...)
# note that XGBoosterSaveModel seems to be deprecated
open(fname, write=true, create=true) do io
save(b, io; kw...)
end
fname
end
function save(b::Booster, ::Type{Vector{UInt8}}; format::AbstractString="json")
Expand Down
32 changes: 27 additions & 5 deletions src/dmatrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,37 @@ function getinfo(dm::DMatrix, ::Type{T}, name::AbstractString) where {T<:Real}
end
getinfo(dm::DMatrix, t::Type, name::Symbol) = getinfo(dm, t, string(name))

# see https://xgboost.readthedocs.io/en/stable/tutorials/input_format.html
function _fileuri(fname::AbstractString, format::Symbol)
if '?' fname
throw(ArgumentError("file name strings passed to libxgboost cannot contain '?'"))
end
format == :binary && return fname
string(fname, "?format=", format)
end

"""
load(DMatrix, fname; silent=true, kw...)
load(DMatrix, fname; silent=true, format=:libsvm, kw...)
Load a `DMatrix` from file with name `fname`. The matrix must have been serialized with a call to
`save(::DMatrix, fname)`. If `silent` the xgboost library will print logs to `stdout`.
Additional keyword arguments are passed to the `DMatrix` on construction.
"""
function load(::Type{DMatrix}, fname::AbstractString; silent::Bool=true, kw...)
Format describes the file format, valid options are `:binary`, `:csv` and `:libsvm`.
"""
function load(::Type{DMatrix}, fname::AbstractString;
#TODO: would be better to have :binary as default, but would be breaking
format::Symbol=:libsvm,
silent::Bool=true,
kw...
)
o = Ref{DMatrixHandle}()
xgbcall(XGDMatrixCreateFromFile, fname, silent, o)
cfg = Dict("uri"=>_fileuri(fname, format),
# gives runtime error if not int even though docs say bool
"silent"=>Int(silent),
# docs are inconsistent and don't explain this, so it's disabled
#"data_split_mode"=>string(data_split_mode),
)
xgbcall(XGDMatrixCreateFromURI, JSON3.write(cfg), o)
DMatrix(o[], kw...)
end

Expand Down Expand Up @@ -385,7 +406,8 @@ getweights(dm::DMatrix) = getinfo(dm, Float32, "weight")
save(dm::DMatrix, fname; silent=true)
Save the `DMatrix` to file `fname` in an opaque (xgboost-specific) serialization format.
Will print logs to `stdout` unless `silent`.
Will print logs to `stdout` unless `silent`. Files created with this function can be loaded
using `XGBoost.load(DMatrix, fname, format=:binary)`.
"""
function save(dm::DMatrix, fname::AbstractString; silent::Bool=true)
xgbcall(XGDMatrixSaveBinary, dm.handle, fname, convert(Cint, silent))
Expand Down
22 changes: 12 additions & 10 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ end

@testset "DMatrix IO" begin
for (fname, sz) [("agaricus.txt.train", (6513, 126)), ("agaricus.txt.test", (1611, 126))]
dm = XGBoost.load(DMatrix, testfilepath(fname))
dm = XGBoost.load(DMatrix, testfilepath(fname), format=:libsvm)
@test size(dm) == sz

(X, y) = readlibsvm(testfilepath(fname), sz)
Expand All @@ -86,15 +86,15 @@ end
dm = DMatrix((X, y))
fname = tempname()
XGBoost.save(dm, fname)
dm′ = XGBoost.load(DMatrix, fname)
dm′ = XGBoost.load(DMatrix, fname, format=:binary)
@test size(dm) == size(dm′)
@test XGBoost.getlabel(dm) == XGBoost.getlabel(dm′)
isfile(fname) && rm(fname)
end

@testset "Agaricus training" begin
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"))
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"))
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"), format=:libsvm)
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"), format=:libsvm)
watchlist = Dict("eval"=>dtest, "train"=>dtrain)

bst = @test_logs (:info, r"XGBoost") (:info, r"") (:info, r"") (:info, r"Training") begin
Expand Down Expand Up @@ -142,8 +142,8 @@ end
end

@testset "Feature importance" begin
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"))
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"))
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"), format=:libsvm)
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"), format=:libsvm)

bst = xgboost(dtrain, num_round=5,
η=1.0, max_depth=2,
Expand All @@ -166,8 +166,8 @@ end

# these just ensure we don't have any exceptions
@testset "Term extension" begin
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"))
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"))
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"), format=:libsvm)
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"), format=:libsvm)

bst = xgboost(dtrain, num_round=5,
η=1.0, max_depth=2,
Expand All @@ -180,8 +180,8 @@ end
end

@testset "Booster" begin
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"))
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"))
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"), format=:libsvm)
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"), format=:libsvm)

(model_file, _) = mktemp()

Expand Down Expand Up @@ -228,6 +228,8 @@ end
end

has_cuda() && @testset "cuda" begin
@info("runing CUDA tests")

X = randn(Float32, 4, 5)
dm = DMatrix(cu(X))
@test size(dm) == size(X)
Expand Down

0 comments on commit d799a79

Please sign in to comment.