Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring parity with pandas for datetime & timedelta comparison operations #13877

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,14 +460,22 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
if isinstance(other, ColumnBase) and not isinstance(
other, DatetimeColumn
):
return _all_bools_with_nulls(
result = _all_bools_with_nulls(
self, other, bool_fill_value=op == "__ne__"
)
if cudf.get_option("mode.pandas_compatible"):
result = result.fillna(op == "__ne__")
return result

if out_dtype is None:
return NotImplemented

return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
result = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
if cudf.get_option(
"mode.pandas_compatible"
) and out_dtype == cudf.dtype(np.bool_):
result = result.fillna(True if op == "__ne__" else False)
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
return result

def fillna(
self,
Expand Down
14 changes: 11 additions & 3 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
"__ge__",
"NULL_EQUALS",
}:
out_dtype = np.bool_
out_dtype = cudf.dtype(np.bool_)
elif op == "__mod__":
out_dtype = determine_out_dtype(self.dtype, other.dtype)
elif op in {"__truediv__", "__floordiv__"}:
Expand All @@ -206,16 +206,24 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
if isinstance(other, ColumnBase) and not isinstance(
other, TimeDeltaColumn
):
return _all_bools_with_nulls(
result = _all_bools_with_nulls(
self, other, bool_fill_value=op == "__ne__"
)
if cudf.get_option("mode.pandas_compatible"):
result = result.fillna(op == "__ne__")
return result

if out_dtype is None:
return NotImplemented

lhs, rhs = (other, this) if reflect else (this, other)

return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
result = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
if cudf.get_option(
"mode.pandas_compatible"
) and out_dtype == cudf.dtype(np.bool_):
result = result.fillna(op == "__ne__")
return result

def normalize_binop_value(self, other) -> ColumnBinaryOperand:
if isinstance(other, (ColumnBase, cudf.Scalar)):
Expand Down
17 changes: 17 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2107,3 +2107,20 @@ def test_datetime_binop_tz_timestamp(op):
date_scalar = datetime.datetime.now(datetime.timezone.utc)
with pytest.raises(NotImplementedError):
op(s, date_scalar)


@pytest.mark.parametrize("data1", [["20110101", "20120101", None]])
@pytest.mark.parametrize("data2", [["20110101", "20120101", "20130101"]])
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("op", _cmpops)
def test_datetime_series_cmpops_pandas_compatibility(data1, data2, op):
gsr1 = cudf.Series(data=data1, dtype="datetime64[ns]")
psr1 = gsr1.to_pandas()

gsr2 = cudf.Series(data=data2, dtype="datetime64[ns]")
psr2 = gsr2.to_pandas()

expect = op(psr1, psr2)
with cudf.option_context("mode.pandas_compatible", True):
got = op(gsr1, gsr2)

assert_eq(expect, got)
26 changes: 26 additions & 0 deletions python/cudf/cudf/tests/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@
[12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
]

_cmpops = [
operator.lt,
operator.gt,
operator.le,
operator.ge,
operator.eq,
operator.ne,
]


@pytest.mark.parametrize(
"data",
Expand Down Expand Up @@ -1442,3 +1451,20 @@ def test_timdelta_binop_tz_timestamp(op):
def test_timedelta_getitem_na():
s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]")
assert s[2] is cudf.NA


@pytest.mark.parametrize("data1", [[123, 456, None]])
@pytest.mark.parametrize("data2", [[123, 456, 789]])
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("op", _cmpops)
def test_timedelta_series_cmpops_pandas_compatibility(data1, data2, op):
gsr1 = cudf.Series(data=data1, dtype="timedelta64[ns]")
psr1 = gsr1.to_pandas()

gsr2 = cudf.Series(data=data2, dtype="timedelta64[ns]")
psr2 = gsr2.to_pandas()

expect = op(psr1, psr2)
with cudf.option_context("mode.pandas_compatible", True):
got = op(gsr1, gsr2)

assert_eq(expect, got)
Loading