From d5d60a5d5ca302f5bb61cd13e01851d35b33af80 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:48:10 -0700 Subject: [PATCH 1/2] Validate ignore_index type in drop_duplicates --- python/cudf/cudf/core/indexed_frame.py | 5 +++++ python/cudf/cudf/tests/test_duplicates.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 69b25c51a66..41c6e21499d 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1961,6 +1961,11 @@ def drop_duplicates( ignore_index: bool, default False If True, the resulting axis will be labeled 0, 1, ..., n - 1. """ + if not isinstance(ignore_index, bool): + raise ValueError( + f"{ignore_index=} must be bool, " + f"not {type(ignore_index).__name__}" + ) subset = self._preprocess_subset(subset) subset_cols = [name for name in self._column_names if name in subset] if len(subset_cols) == 0: diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py index 8a83ec150bc..f77e7b4d775 100644 --- a/python/cudf/cudf/tests/test_duplicates.py +++ b/python/cudf/cudf/tests/test_duplicates.py @@ -623,3 +623,9 @@ def test_drop_duplicates_multi_index(): gdf[col].drop_duplicates().to_pandas(), pdf[col].drop_duplicates(), ) + + +def test_drop_duplicates_ignore_index_wrong_type(): + gdf = cudf.DataFrame([1, 1, 2]) + with pytest.raises(ValueError): + gdf.drop_duplicates(ignore_index="True") From d9fc2a3c6ca31fd678da45f836101bd5836f88fc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:02:23 -0700 Subject: [PATCH 2/2] Include np._bool --- python/cudf/cudf/core/indexed_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 41c6e21499d..518262ae926 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1961,7 +1961,7 @@ def drop_duplicates( ignore_index: bool, default False If True, the resulting axis will be labeled 0, 1, ..., n - 1. """ - if not isinstance(ignore_index, bool): + if not isinstance(ignore_index, (np.bool_, bool)): raise ValueError( f"{ignore_index=} must be bool, " f"not {type(ignore_index).__name__}"