Skip to content

Commit

Permalink
Merge pull request #15811 from rapidsai/branch-24.06
Browse files Browse the repository at this point in the history
Forward-merge branch-24.06 into branch-24.08
  • Loading branch information
GPUtester authored May 22, 2024
2 parents acedfc0 + 9a0612b commit 968bea2
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
12 changes: 10 additions & 2 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -782,8 +782,16 @@ std::vector<std::vector<rowgroup_rows>> calculate_aligned_rowgroup_bounds(
} else {
// pushdown mask present; null mask bits w/ set pushdown mask bits will be encoded
// Use the number of set bits in pushdown mask as size
auto bits_to_borrow =
8 - (d_pd_set_counts[rg_idx][parent_col_idx] - previously_borrowed) % 8;
auto bits_to_borrow = [&]() {
auto const parent_valid_count = d_pd_set_counts[rg_idx][parent_col_idx];
if (parent_valid_count < previously_borrowed) {
// Borrow to make an empty rowgroup
return previously_borrowed - parent_valid_count;
}
auto const misalignment = (parent_valid_count - previously_borrowed) % 8;
return (8 - misalignment) % 8;
}();

if (bits_to_borrow == 0) {
// Didn't borrow any bits for this rowgroup
previously_borrowed = 0;
Expand Down
Binary file not shown.
13 changes: 13 additions & 0 deletions python/cudf/cudf/tests/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,3 +1954,16 @@ def test_writer_lz4():

got = pd.read_orc(buffer)
assert_eq(gdf, got)


def test_row_group_alignment(datadir):
path = datadir / "TestOrcFile.MapManyNulls.parquet"

expected = cudf.read_parquet(path)

buffer = BytesIO()
expected.to_orc(buffer)

got = cudf.read_orc(buffer)

assert_eq(expected, got)

0 comments on commit 968bea2

Please sign in to comment.