diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 652b60685221..cbd5763c6107 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -206,7 +206,7 @@ in-core training is one additional data read when the data is dense. To run experiments on these platforms, the open source `NVIDIA Linux driver `__ -with version ``>=565.47`` is required. +with version ``>=565.47`` is required, it should come with CTK 12.7 and later versions. ************** Best Practices diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py index 8b8da6da8805..b92d7a9fa7e3 100644 --- a/python-package/xgboost/testing/updater.py +++ b/python-package/xgboost/testing/updater.py @@ -211,6 +211,7 @@ def check_extmem_qdm( cache="cache", on_host=on_host, ) + Xy_it = xgb.ExtMemQuantileDMatrix(it) with pytest.raises(ValueError, match="Only the `hist`"): booster_it = xgb.train( @@ -227,12 +228,10 @@ def check_extmem_qdm( Xy = xgb.QuantileDMatrix(it) booster = xgb.train({"device": device}, Xy, num_boost_round=8) - if device == "cpu": - # Get cuts from ellpack without CPU-GPU interpolation is not yet supported. - cut_it = Xy_it.get_quantile_cut() - cut = Xy.get_quantile_cut() - np.testing.assert_allclose(cut_it[0], cut[0]) - np.testing.assert_allclose(cut_it[1], cut[1]) + cut_it = Xy_it.get_quantile_cut() + cut = Xy.get_quantile_cut() + np.testing.assert_allclose(cut_it[0], cut[0]) + np.testing.assert_allclose(cut_it[1], cut[1]) predt_it = booster_it.predict(Xy_it) predt = booster.predict(Xy) diff --git a/src/data/extmem_quantile_dmatrix.cc b/src/data/extmem_quantile_dmatrix.cc index e3659f205dd9..df56b497f0a2 100644 --- a/src/data/extmem_quantile_dmatrix.cc +++ b/src/data/extmem_quantile_dmatrix.cc @@ -58,7 +58,7 @@ ExtMemQuantileDMatrix::~ExtMemQuantileDMatrix() { } BatchSet ExtMemQuantileDMatrix::GetExtBatches(Context const *, BatchParam const &) { - LOG(FATAL) << "Not implemented"; + LOG(FATAL) << "Not implemented for `ExtMemQuantileDMatrix`."; auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(nullptr)); return BatchSet{begin_iter}; @@ -121,7 +121,8 @@ BatchSet ExtMemQuantileDMatrix::GetGradientIndex(Context const CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin(); } - CHECK(this->ghist_index_source_); + CHECK(this->ghist_index_source_) + << "The `ExtMemQuantileDMatrix` is initialized using GPU data, cannot be used for CPU."; this->ghist_index_source_->Reset(param); if (!std::isnan(param.sparse_thresh) && diff --git a/src/data/extmem_quantile_dmatrix.cu b/src/data/extmem_quantile_dmatrix.cu index ea3f12c2ec1e..3fb1557e9993 100644 --- a/src/data/extmem_quantile_dmatrix.cu +++ b/src/data/extmem_quantile_dmatrix.cu @@ -80,7 +80,8 @@ BatchSet ExtMemQuantileDMatrix::GetEllpackBatches(Context const *, std::visit( [this, param](auto &&ptr) { - CHECK(ptr); + CHECK(ptr) + << "The `ExtMemQuantileDMatrix` is initialized using CPU data, cannot be used for GPU."; ptr->Reset(param); }, this->ellpack_page_source_); diff --git a/src/data/extmem_quantile_dmatrix.h b/src/data/extmem_quantile_dmatrix.h index 33a80f5cda92..842bfd2d49d3 100644 --- a/src/data/extmem_quantile_dmatrix.h +++ b/src/data/extmem_quantile_dmatrix.h @@ -54,7 +54,9 @@ class ExtMemQuantileDMatrix : public QuantileDMatrix { [[nodiscard]] bool EllpackExists() const override { return std::visit([](auto &&v) { return static_cast(v); }, ellpack_page_source_); } - [[nodiscard]] bool GHistIndexExists() const override { return true; } + [[nodiscard]] bool GHistIndexExists() const override { + return static_cast(ghist_index_source_); + } [[nodiscard]] BatchSet GetExtBatches(Context const *ctx, BatchParam const ¶m) override; diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc index 14d3c7c642f8..80d2e30f6855 100644 --- a/src/data/gradient_index.cc +++ b/src/data/gradient_index.cc @@ -189,13 +189,13 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const { bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const { auto begin = RowIdx(ridx); if (IsDense()) { - return static_cast(index[begin + fidx]); + return static_cast(this->index[begin + fidx]); } auto end = RowIdx(ridx + 1); auto const& cut_ptrs = cut.Ptrs(); auto f_begin = cut_ptrs[fidx]; auto f_end = cut_ptrs[fidx + 1]; - return BinarySearchBin(begin, end, index, f_begin, f_end); + return BinarySearchBin(begin, end, this->index, f_begin, f_end); } float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const { diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py index 7198941cd034..094b324b5a3f 100644 --- a/tests/python-gpu/test_gpu_data_iterator.py +++ b/tests/python-gpu/test_gpu_data_iterator.py @@ -68,17 +68,35 @@ def test_cpu_data_iterator() -> None: strategies.booleans(), ) @settings(deadline=None, max_examples=10, print_blob=True) +@pytest.mark.filterwarnings("ignore") def test_extmem_qdm( n_samples_per_batch: int, n_features: int, n_batches: int, on_host: bool ) -> None: check_extmem_qdm(n_samples_per_batch, n_features, n_batches, "cuda", on_host) +@pytest.mark.filterwarnings("ignore") +def test_invalid_device_extmem_qdm() -> None: + it = tm.IteratorForTest( + *tm.make_batches(16, 4, 2, use_cupy=False), cache="cache", on_host=True + ) + Xy = xgb.ExtMemQuantileDMatrix(it) + with pytest.raises(ValueError, match="cannot be used for GPU"): + xgb.train({"device": "cuda"}, Xy) + + it = tm.IteratorForTest( + *tm.make_batches(16, 4, 2, use_cupy=True), cache="cache", on_host=True + ) + Xy = xgb.ExtMemQuantileDMatrix(it) + with pytest.raises(ValueError, match="cannot be used for CPU"): + xgb.train({"device": "cpu"}, Xy) + + def test_concat_pages() -> None: it = tm.IteratorForTest(*tm.make_batches(64, 16, 4, use_cupy=True), cache=None) Xy = xgb.ExtMemQuantileDMatrix(it) with pytest.raises(ValueError, match="can not be used with concatenated pages"): - booster = xgb.train( + xgb.train( { "device": "cuda", "subsample": 0.5,