Skip to content

Commit

Permalink
copy predicted array so we dont overwrite used memory location (#188)
Browse files Browse the repository at this point in the history
* copy predicted array so we dont overwrite used memory location

* use mersenee instead of xoshiro for 1.6

* add predict_nocopy and stop using transpose
  • Loading branch information
ExpandingMan authored Aug 7, 2023
1 parent 359175c commit 02e1249
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "XGBoost"
uuid = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
version = "2.3.1"
version = "2.3.2"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Booster
updateone!
update!
predict
predict_nocopy
setparam!
setparams!
getnrounds
Expand Down
53 changes: 32 additions & 21 deletions src/booster.jl
Original file line number Diff line number Diff line change
Expand Up @@ -275,29 +275,18 @@ function deserialize(::Type{Booster}, buf::AbstractVector{UInt8}, data=DMatrix[]
deserialize!(b, buf)
end

# sadly this is type unstable because we might return a transpose
"""
predict(b::Booster, data; margin=false, training=false, ntree_limit=0)
predict_nocopy(b::Booster, data; kw...)
Use the model `b` to run predictions on `data`. This will return a `Vector{Float32}` which can be compared
to training or test target data.
If `ntree_limit > 0` only the first `ntree_limit` trees will be used in prediction.
## Examples
```julia
(X, y) = (randn(100,3), randn(100))
b = xgboost((X, y), 10)
ŷ = predict(b, X)
```
Same as [`predict`](@ref), but the output array is not copied. Data in the array output
by this function may be overwritten by future calls to `predict_nocopy` or `predict`.
"""
function predict(b::Booster, Xy::DMatrix;
margin::Bool=false, # whether to output margin
training::Bool=false,
ntree_lower_limit::Integer=0,
ntree_limit::Integer=0, # 0 corresponds to no limit
)
function predict_nocopy(b::Booster, Xy::DMatrix;
margin::Bool=false, # whether to output margin
training::Bool=false,
ntree_lower_limit::Integer=0,
ntree_limit::Integer=0, # 0 corresponds to no limit
)
opts = Dict("type"=>(margin ? 1 : 0),
"iteration_begin"=>ntree_lower_limit,
"iteration_end"=>ntree_limit,
Expand All @@ -309,9 +298,31 @@ function predict(b::Booster, Xy::DMatrix;
o = Ref{Ptr{Cfloat}}()
xgbcall(XGBoosterPredictFromDMatrix, b.handle, Xy.handle, opts, oshape, odim, o)
dims = reverse(unsafe_wrap(Array, oshape[], odim[]))
# this `copy` is needed because libxgboost re-uses the pointer
o = unsafe_wrap(Array, o[], tuple(dims...))
length(dims) > 1 ? transpose(o) : o
length(dims) > 1 ? permutedims(o) : o
end

predict_nocopy(b::Booster, Xy; kw...) = predict_nocopy(b, DMatrix(Xy); kw...)

"""
predict(b::Booster, data; margin=false, training=false, ntree_limit=0)
Use the model `b` to run predictions on `data`. This will return a `Vector{Float32}` which can be compared
to training or test target data.
If `ntree_limit > 0` only the first `ntree_limit` trees will be used in prediction.
## Examples
```julia
(X, y) = (randn(100,3), randn(100))
b = xgboost((X, y), 10)
ŷ = predict(b, X)
```
"""
predict(b::Booster, Xy::DMatrix; kw...) = copy(predict_nocopy(b, Xy; kw...))

predict(b::Booster, Xy; kw...) = predict(b, DMatrix(Xy); kw...)

function evaliter(b::Booster, watch, n::Integer=1)
Expand Down
10 changes: 9 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ end
@test Term.Panel(bst) isa Term.Panel
end

@testset "Booster Save/Load/Serialize" begin
@testset "Booster" begin
dtrain = XGBoost.load(DMatrix, testfilepath("agaricus.txt.train"))
dtest = XGBoost.load(DMatrix, testfilepath("agaricus.txt.test"))

Expand Down Expand Up @@ -217,6 +217,14 @@ end
bst2 = Booster(DMatrix[])
XGBoost.deserialize!(bst2, bin)
@test preds == predict(bst2, dtest)

# libxgboost re-uses the prediction memory location,
# so we are testing to make sure we don't do that
rng = MersenneTwister(999) # note that Xoshiro is not available on 1.6
(X, y) = (randn(rng, 10,2), randn(rng, 10))
b = xgboost((X, y))
= predict(b, X)
@test predict(b, randn(MersenneTwister(998), 10,2))
end

has_cuda() && @testset "cuda" begin
Expand Down

0 comments on commit 02e1249

Please sign in to comment.