Skip to content

Commit

Permalink
Backport PR pandas-dev#54685: ENH: support integer bitwise ops in Arr…
Browse files Browse the repository at this point in the history
…owExtensionArray
  • Loading branch information
lukemanley authored and meeseeksmachine committed Aug 22, 2023
1 parent 8b03024 commit 4475bfa
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ Other enhancements
- :meth:`Series.cummax`, :meth:`Series.cummin` and :meth:`Series.cumprod` are now supported for pyarrow dtypes with pyarrow version 13.0 and above (:issue:`52085`)
- Added support for the DataFrame Consortium Standard (:issue:`54383`)
- Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`51722`)
- PyArrow-backed integer dtypes now support bitwise operations (:issue:`54495`)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.api_breaking:
Expand Down
22 changes: 20 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@
"rxor": lambda x, y: pc.xor(y, x),
}

ARROW_BIT_WISE_FUNCS = {
"and_": pc.bit_wise_and,
"rand_": lambda x, y: pc.bit_wise_and(y, x),
"or_": pc.bit_wise_or,
"ror_": lambda x, y: pc.bit_wise_or(y, x),
"xor": pc.bit_wise_xor,
"rxor": lambda x, y: pc.bit_wise_xor(y, x),
}

def cast_for_truediv(
arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
) -> pa.ChunkedArray:
Expand Down Expand Up @@ -582,7 +591,11 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
return self.to_numpy(dtype=dtype)

def __invert__(self) -> Self:
return type(self)(pc.invert(self._pa_array))
# This is a bit wise op for integer types
if pa.types.is_integer(self._pa_array.type):
return type(self)(pc.bit_wise_not(self._pa_array))
else:
return type(self)(pc.invert(self._pa_array))

def __neg__(self) -> Self:
return type(self)(pc.negate_checked(self._pa_array))
Expand Down Expand Up @@ -657,7 +670,12 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
return type(self)(result)

def _logical_method(self, other, op):
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
# For integer types `^`, `|`, `&` are bitwise operators and return
# integer types. Otherwise these are boolean ops.
if pa.types.is_integer(self._pa_array.type):
return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
else:
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)

def _arith_method(self, other, op):
return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
Expand Down
27 changes: 26 additions & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
class TestBaseUnaryOps(base.BaseUnaryOpsTests):
def test_invert(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
if not pa.types.is_boolean(pa_dtype):
if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)):
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowNotImplementedError,
Expand Down Expand Up @@ -1339,6 +1339,31 @@ def test_logical_masked_numpy(self, op, exp):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES)
def test_bitwise(pa_type):
# GH 54495
dtype = ArrowDtype(pa_type)
left = pd.Series([1, None, 3, 4], dtype=dtype)
right = pd.Series([None, 3, 5, 4], dtype=dtype)

result = left | right
expected = pd.Series([None, None, 3 | 5, 4 | 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = left & right
expected = pd.Series([None, None, 3 & 5, 4 & 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = left ^ right
expected = pd.Series([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
tm.assert_series_equal(result, expected)

result = ~left
expected = ~(left.fillna(0).to_numpy())
expected = pd.Series(expected, dtype=dtype).mask(left.isnull())
tm.assert_series_equal(result, expected)


def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
Expand Down

0 comments on commit 4475bfa

Please sign in to comment.