From 2f6c23bb8735372c46efe6e1e7ecab03306b871b Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Tue, 17 Dec 2024 08:41:29 +0100 Subject: [PATCH] fix: Use the same encoding for nullable as non-nullable arrays (#20323) --- crates/polars-row/src/fixed/packed_u32.rs | 1 - py-polars/tests/unit/test_row_encoding.py | 38 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/crates/polars-row/src/fixed/packed_u32.rs b/crates/polars-row/src/fixed/packed_u32.rs index 18a578914a36..ac252d4376a3 100644 --- a/crates/polars-row/src/fixed/packed_u32.rs +++ b/crates/polars-row/src/fixed/packed_u32.rs @@ -108,7 +108,6 @@ pub unsafe fn encode_iter( .copy_from_slice(null_value.to_be_bytes()[4 - num_bytes..].as_uninit()); }, Some(v) => { - let v = v | ((32 - v.leading_zeros()) << ((num_bytes * 8) - 6)); let v = (v ^ invert_mask) | valid_mask; unsafe { buffer.get_unchecked_mut(*offset..*offset + num_bytes) } .copy_from_slice(v.to_be_bytes()[4 - num_bytes..].as_uninit()); diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py index 0705b7dd685f..de5e65e7582e 100644 --- a/py-polars/tests/unit/test_row_encoding.py +++ b/py-polars/tests/unit/test_row_encoding.py @@ -7,8 +7,9 @@ from hypothesis import given import polars as pl -from polars.testing import assert_frame_equal -from polars.testing.parametric import dataframes +from polars.testing import assert_frame_equal, assert_series_equal +from polars.testing.parametric import dataframes, series +from polars.testing.parametric.strategies.dtype import dtypes if TYPE_CHECKING: from polars._typing import PolarsDataType @@ -338,3 +339,36 @@ def test_int_after_null() -> None: ), [(False, True, False), (False, True, False)], ) + + +@pytest.mark.parametrize("field", FIELD_COMBS) +@given(s=series(allow_null=False, allow_chunks=False, excluded_dtypes=[pl.Categorical])) +def test_optional_eq_non_optional_20320( + field: tuple[bool, bool, bool], s: pl.Series +) -> None: + with_null = s.extend(pl.Series([None], dtype=s.dtype)) + + re_without_null = s.to_frame()._row_encode([field]) + re_with_null = with_null.to_frame()._row_encode([field]) + + re_without_null = re_without_null.cast(pl.Binary) + re_with_null = re_with_null.cast(pl.Binary) + + assert_series_equal(re_with_null.head(s.len()), re_without_null) + + +@pytest.mark.parametrize("field", FIELD_COMBS) +@given(dtype=dtypes(excluded_dtypes=[pl.Categorical])) +def test_null( + field: tuple[bool, bool, bool], + dtype: pl.DataType, +) -> None: + s = pl.Series("a", [None], dtype) + + assert_series_equal( + s.to_frame() + ._row_encode([field]) + ._row_decode([("a", dtype)], [field]) + .to_series(), + s, + )