Skip to content

Commit

Permalink
Raise NotImplementedError in to_datetime with dayfirst without infer_…
Browse files Browse the repository at this point in the history
…format (#14058)

Raises a `NotImplementedError` to avoid this incorrect behavior (which seems to actually not be implemented)

```python
In [6]: cudf.to_datetime(["10-02-2014"], dayfirst=True)
Out[6]: DatetimeIndex(['2014-10-02'], dtype='datetime64[ns]')
```

closes #14042

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14058
  • Loading branch information
mroeschke authored Sep 15, 2023
1 parent 1bfeee7 commit 3b691f4
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
11 changes: 6 additions & 5 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,15 +353,16 @@ def _process_col(col, unit, dayfirst, infer_datetime_format, format):
format=format,
)
else:
if infer_datetime_format and format is None:
if format is None:
if not infer_datetime_format and dayfirst:
raise NotImplementedError(
f"{dayfirst=} not implemented "
f"when {format=} and {infer_datetime_format=}."
)
format = column.datetime.infer_format(
element=col.element_indexing(0),
dayfirst=dayfirst,
)
elif format is None:
format = column.datetime.infer_format(
element=col.element_indexing(0)
)
return col.as_datetime_column(
dtype=_unit_dtype_map[unit],
format=format,
Expand Down
38 changes: 30 additions & 8 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,22 +617,44 @@ def test_datetime_dataframe():
@pytest.mark.parametrize("infer_datetime_format", [True, False])
def test_cudf_to_datetime(data, dayfirst, infer_datetime_format):
pd_data = data
is_string_data = False
if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
gd_data = cudf.from_pandas(pd_data)
is_string_data = (
gd_data.ndim == 1
and not gd_data.empty
and gd_data.dtype.kind == "O"
)
else:
if type(pd_data).__module__ == np.__name__:
gd_data = cp.array(pd_data)
else:
gd_data = pd_data
is_string_data = isinstance(gd_data, list) and isinstance(
next(iter(gd_data), None), str
)

expected = pd.to_datetime(
pd_data, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format
)
actual = cudf.to_datetime(
gd_data, dayfirst=dayfirst, infer_datetime_format=infer_datetime_format
)

assert_eq(actual, expected)
if dayfirst and not infer_datetime_format and is_string_data:
# Note: pandas<2.0 also does not respect dayfirst=True correctly
# for object data
with pytest.raises(NotImplementedError):
cudf.to_datetime(
gd_data,
dayfirst=dayfirst,
infer_datetime_format=infer_datetime_format,
)
else:
expected = pd.to_datetime(
pd_data,
dayfirst=dayfirst,
infer_datetime_format=infer_datetime_format,
)
actual = cudf.to_datetime(
gd_data,
dayfirst=dayfirst,
infer_datetime_format=infer_datetime_format,
)
assert_eq(actual, expected)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 3b691f4

Please sign in to comment.