Skip to content

Commit

Permalink
fix: Use the same encoding for nullable as non-nullable arrays (#20323)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Dec 17, 2024
1 parent 80f8945 commit 2f6c23b
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
1 change: 0 additions & 1 deletion crates/polars-row/src/fixed/packed_u32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ pub unsafe fn encode_iter(
.copy_from_slice(null_value.to_be_bytes()[4 - num_bytes..].as_uninit());
},
Some(v) => {
let v = v | ((32 - v.leading_zeros()) << ((num_bytes * 8) - 6));
let v = (v ^ invert_mask) | valid_mask;
unsafe { buffer.get_unchecked_mut(*offset..*offset + num_bytes) }
.copy_from_slice(v.to_be_bytes()[4 - num_bytes..].as_uninit());
Expand Down
38 changes: 36 additions & 2 deletions py-polars/tests/unit/test_row_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from hypothesis import given

import polars as pl
from polars.testing import assert_frame_equal
from polars.testing.parametric import dataframes
from polars.testing import assert_frame_equal, assert_series_equal
from polars.testing.parametric import dataframes, series
from polars.testing.parametric.strategies.dtype import dtypes

if TYPE_CHECKING:
from polars._typing import PolarsDataType
Expand Down Expand Up @@ -338,3 +339,36 @@ def test_int_after_null() -> None:
),
[(False, True, False), (False, True, False)],
)


@pytest.mark.parametrize("field", FIELD_COMBS)
@given(s=series(allow_null=False, allow_chunks=False, excluded_dtypes=[pl.Categorical]))
def test_optional_eq_non_optional_20320(
field: tuple[bool, bool, bool], s: pl.Series
) -> None:
with_null = s.extend(pl.Series([None], dtype=s.dtype))

re_without_null = s.to_frame()._row_encode([field])
re_with_null = with_null.to_frame()._row_encode([field])

re_without_null = re_without_null.cast(pl.Binary)
re_with_null = re_with_null.cast(pl.Binary)

assert_series_equal(re_with_null.head(s.len()), re_without_null)


@pytest.mark.parametrize("field", FIELD_COMBS)
@given(dtype=dtypes(excluded_dtypes=[pl.Categorical]))
def test_null(
field: tuple[bool, bool, bool],
dtype: pl.DataType,
) -> None:
s = pl.Series("a", [None], dtype)

assert_series_equal(
s.to_frame()
._row_encode([field])
._row_decode([("a", dtype)], [field])
.to_series(),
s,
)

0 comments on commit 2f6c23b

Please sign in to comment.