From 2f6c23bb8735372c46efe6e1e7ecab03306b871b Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Tue, 17 Dec 2024 08:41:29 +0100
Subject: [PATCH] fix: Use the same encoding for nullable as non-nullable
 arrays (#20323)

---
 crates/polars-row/src/fixed/packed_u32.rs |  1 -
 py-polars/tests/unit/test_row_encoding.py | 38 +++++++++++++++++++++--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/crates/polars-row/src/fixed/packed_u32.rs b/crates/polars-row/src/fixed/packed_u32.rs
index 18a578914a36..ac252d4376a3 100644
--- a/crates/polars-row/src/fixed/packed_u32.rs
+++ b/crates/polars-row/src/fixed/packed_u32.rs
@@ -108,7 +108,6 @@ pub unsafe fn encode_iter(
                         .copy_from_slice(null_value.to_be_bytes()[4 - num_bytes..].as_uninit());
                 },
                 Some(v) => {
-                    let v = v | ((32 - v.leading_zeros()) << ((num_bytes * 8) - 6));
                     let v = (v ^ invert_mask) | valid_mask;
                     unsafe { buffer.get_unchecked_mut(*offset..*offset + num_bytes) }
                         .copy_from_slice(v.to_be_bytes()[4 - num_bytes..].as_uninit());
diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py
index 0705b7dd685f..de5e65e7582e 100644
--- a/py-polars/tests/unit/test_row_encoding.py
+++ b/py-polars/tests/unit/test_row_encoding.py
@@ -7,8 +7,9 @@
 from hypothesis import given
 
 import polars as pl
-from polars.testing import assert_frame_equal
-from polars.testing.parametric import dataframes
+from polars.testing import assert_frame_equal, assert_series_equal
+from polars.testing.parametric import dataframes, series
+from polars.testing.parametric.strategies.dtype import dtypes
 
 if TYPE_CHECKING:
     from polars._typing import PolarsDataType
@@ -338,3 +339,36 @@ def test_int_after_null() -> None:
         ),
         [(False, True, False), (False, True, False)],
     )
+
+
+@pytest.mark.parametrize("field", FIELD_COMBS)
+@given(s=series(allow_null=False, allow_chunks=False, excluded_dtypes=[pl.Categorical]))
+def test_optional_eq_non_optional_20320(
+    field: tuple[bool, bool, bool], s: pl.Series
+) -> None:
+    with_null = s.extend(pl.Series([None], dtype=s.dtype))
+
+    re_without_null = s.to_frame()._row_encode([field])
+    re_with_null = with_null.to_frame()._row_encode([field])
+
+    re_without_null = re_without_null.cast(pl.Binary)
+    re_with_null = re_with_null.cast(pl.Binary)
+
+    assert_series_equal(re_with_null.head(s.len()), re_without_null)
+
+
+@pytest.mark.parametrize("field", FIELD_COMBS)
+@given(dtype=dtypes(excluded_dtypes=[pl.Categorical]))
+def test_null(
+    field: tuple[bool, bool, bool],
+    dtype: pl.DataType,
+) -> None:
+    s = pl.Series("a", [None], dtype)
+
+    assert_series_equal(
+        s.to_frame()
+        ._row_encode([field])
+        ._row_decode([("a", dtype)], [field])
+        .to_series(),
+        s,
+    )