Skip to content

Commit

Permalink
tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Sep 18, 2024
1 parent c9eea65 commit 1158014
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/data/ellpack_page.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ namespace {
cudaMemcpyAsync(&h_me, d_me.get(), sizeof(PtrT), cudaMemcpyDeviceToHost, cuctx->Stream()));
cuctx->Stream().Sync();
// No missing, hence no null value, hence no + 1 symbol.
// FIXME(jiamingy): When we extend this to use a sparsity threshold, +1 is needed back.
return h_me;
}
} // namespace
Expand Down Expand Up @@ -519,8 +520,7 @@ void EllpackPageImpl::CreateHistIndices(Context const* ctx,
if (row_batch.Size() == 0) {
return;
}
auto d_acc = this->GetDeviceAccessor(ctx, feature_types);
auto null_gidx_value = d_acc.NullValue();
auto null_gidx_value = this->GetDeviceAccessor(ctx, feature_types).NullValue();

auto const& offset_vec = row_batch.offset.ConstHostVector();

Expand Down
6 changes: 3 additions & 3 deletions src/data/gradient_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch, common::Span<FeatureTy
}

GHistIndexMatrix::GHistIndexMatrix(SparsePage const &batch, common::Span<FeatureType const> ft,
common::HistogramCuts cuts, int32_t max_bins_per_feat,
bool isDense, double sparse_thresh, int32_t n_threads)
common::HistogramCuts cuts, bst_bin_t max_bins_per_feat,
bool is_dense, double sparse_thresh, std::int32_t n_threads)
: cut{std::move(cuts)},
max_numeric_bins_per_feat{max_bins_per_feat},
base_rowid{batch.base_rowid},
isDense_{isDense} {
isDense_{is_dense} {
CHECK_GE(n_threads, 1);
CHECK_EQ(row_ptr.size(), 0);
row_ptr = common::MakeFixedVecWithMalloc(batch.Size() + 1, std::size_t{0});
Expand Down
2 changes: 1 addition & 1 deletion src/data/gradient_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class GHistIndexMatrix {
* @brief Constructor for external memory.
*/
GHistIndexMatrix(SparsePage const& page, common::Span<FeatureType const> ft,
common::HistogramCuts cuts, int32_t max_bins_per_feat, bool is_dense,
common::HistogramCuts cuts, bst_bin_t max_bins_per_feat, bool is_dense,
double sparse_thresh, std::int32_t n_threads);
GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back.

Expand Down
42 changes: 42 additions & 0 deletions tests/cpp/data/test_sparse_page_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../../../src/data/file_iterator.h"
#include "../../../src/data/simple_dmatrix.h"
#include "../../../src/data/sparse_page_dmatrix.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"

Expand Down Expand Up @@ -115,6 +116,47 @@ TEST(SparsePageDMatrix, RetainSparsePage) {
TestRetainPage<SortedCSCPage>();
}

class TestGradientIndexExt : public ::testing::TestWithParam<bool> {
protected:
void Run(bool is_dense) {
constexpr bst_idx_t kRows = 64;
constexpr size_t kCols = 2;
float sparsity = is_dense ? 0.0 : 0.4;
bst_bin_t n_bins = 16;
Context ctx;
auto p_ext_fmat =
RandomDataGenerator{kRows, kCols, sparsity}.Batches(4).GenerateSparsePageDMatrix("temp",
true);

auto cuts = common::SketchOnDMatrix(&ctx, p_ext_fmat.get(), n_bins, false, {});
std::vector<std::unique_ptr<GHistIndexMatrix>> pages;
for (auto const &page : p_ext_fmat->GetBatches<SparsePage>()) {
pages.emplace_back(std::make_unique<GHistIndexMatrix>(
page, common::Span<FeatureType const>{}, cuts, n_bins, is_dense, 0.8, ctx.Threads()));
}
std::int32_t k = 0;
for (auto const &page : p_ext_fmat->GetBatches<GHistIndexMatrix>(
&ctx, BatchParam{n_bins, tree::TrainParam::DftSparseThreshold()})) {
auto const &from_sparse = pages[k];
ASSERT_TRUE(std::equal(page.index.begin(), page.index.end(), from_sparse->index.begin()));
if (is_dense) {
ASSERT_TRUE(std::equal(page.index.Offset(), page.index.Offset() + kCols,
from_sparse->index.Offset()));
} else {
ASSERT_FALSE(page.index.Offset());
ASSERT_FALSE(from_sparse->index.Offset());
}
ASSERT_TRUE(
std::equal(page.row_ptr.cbegin(), page.row_ptr.cend(), from_sparse->row_ptr.cbegin()));
++k;
}
}
};

TEST_P(TestGradientIndexExt, Basic) { this->Run(this->GetParam()); }

INSTANTIATE_TEST_SUITE_P(SparsePageDMatrix, TestGradientIndexExt, testing::Bool());

// Test GHistIndexMatrix can avoid loading sparse page after the initialization.
TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
dmlc::TemporaryDirectory tmpdir;
Expand Down
11 changes: 3 additions & 8 deletions tests/cpp/data/test_sparse_page_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ TEST(SparsePageDMatrix, EllpackPage) {

TEST(SparsePageDMatrix, EllpackSkipSparsePage) {
// Test Ellpack can avoid loading sparse page after the initialization.
dmlc::TemporaryDirectory tmpdir;
std::size_t n_batches = 6;
auto Xy = RandomDataGenerator{180, 12, 0.0}.Batches(n_batches).GenerateSparsePageDMatrix(
tmpdir.path + "/", true);
auto Xy =
RandomDataGenerator{180, 12, 0.0}.Batches(n_batches).GenerateSparsePageDMatrix("temp", true);
auto ctx = MakeCUDACtx(0);
auto cpu = ctx.MakeCPU();
bst_bin_t n_bins{256};
Expand Down Expand Up @@ -117,7 +116,6 @@ TEST(SparsePageDMatrix, EllpackSkipSparsePage) {
TEST(SparsePageDMatrix, MultipleEllpackPages) {
auto ctx = MakeCUDACtx(0);
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
dmlc::TemporaryDirectory tmpdir;
auto dmat = RandomDataGenerator{1024, 2, 0.5f}.Batches(2).GenerateSparsePageDMatrix("temp", true);

// Loop over the batches and count the records
Expand Down Expand Up @@ -196,13 +194,10 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
auto p_fmat = RandomDataGenerator{kRows, kCols, sparsity}.GenerateDMatrix(true);

// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
auto prefix = tmpdir.path + "/cache";

auto p_ext_fmat = RandomDataGenerator{kRows, kCols, sparsity}
.Batches(4)
.OnHost(on_host)
.GenerateSparsePageDMatrix(prefix, true);
.GenerateSparsePageDMatrix("temp", true);

auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
auto impl = (*p_fmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
Expand Down

0 comments on commit 1158014

Please sign in to comment.