Skip to content

Commit

Permalink
Enable pytest failures on warnings from cudf (#4223)
Browse files Browse the repository at this point in the history
In 24.04, cudf issues a FutureWarning when using positional indexing with `__getitem__`. The change here is to use `iloc` instead.

Additionally the test suite will now fail when a warning comes from cudf as apart of rapidsai/build-planning#26

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Ralph Liu (https://github.com/nv-rliu)
  - Brad Rees (https://github.com/BradReesWork)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)
  - Don Acosta (https://github.com/acostadon)

URL: #4223
  • Loading branch information
mroeschke authored Mar 17, 2024
1 parent 7aeaec9 commit a467da9
Show file tree
Hide file tree
Showing 17 changed files with 63 additions and 48 deletions.
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/centrality/betweenness_centrality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -160,7 +160,7 @@ def betweenness_centrality(

if not isinstance(k, (cudf.DataFrame, cudf.Series)):
if isinstance(k, list):
vertex_dtype = G.edgelist.edgelist_df.dtypes[0]
vertex_dtype = G.edgelist.edgelist_df.dtypes.iloc[0]
k = cudf.Series(k, dtype=vertex_dtype)

if isinstance(k, (cudf.DataFrame, cudf.Series)):
Expand Down Expand Up @@ -300,7 +300,7 @@ def edge_betweenness_centrality(

if not isinstance(k, (cudf.DataFrame, cudf.Series)):
if isinstance(k, list):
vertex_dtype = G.edgelist.edgelist_df.dtypes[0]
vertex_dtype = G.edgelist.edgelist_df.dtypes.iloc[0]
k = cudf.Series(k, dtype=vertex_dtype)

if isinstance(k, (cudf.DataFrame, cudf.Series)):
Expand Down
24 changes: 12 additions & 12 deletions python/cugraph/cugraph/community/spectral_clustering.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -92,8 +92,8 @@ def spectralBalancedCutClustering(
G, isNx = ensure_cugraph_obj_for_nx(G)
# Check if vertex type is "int32"
if (
G.edgelist.edgelist_df.dtypes[0] != np.int32
or G.edgelist.edgelist_df.dtypes[1] != np.int32
G.edgelist.edgelist_df.dtypes.iloc[0] != np.int32
or G.edgelist.edgelist_df.dtypes.iloc[1] != np.int32
):
raise ValueError(
"'spectralBalancedCutClustering' requires the input graph's vertex to be "
Expand Down Expand Up @@ -186,8 +186,8 @@ def spectralModularityMaximizationClustering(

G, isNx = ensure_cugraph_obj_for_nx(G)
if (
G.edgelist.edgelist_df.dtypes[0] != np.int32
or G.edgelist.edgelist_df.dtypes[1] != np.int32
G.edgelist.edgelist_df.dtypes.iloc[0] != np.int32
or G.edgelist.edgelist_df.dtypes.iloc[1] != np.int32
):
raise ValueError(
"'spectralModularityMaximizationClustering' requires the input graph's "
Expand Down Expand Up @@ -271,8 +271,8 @@ def analyzeClustering_modularity(

G, isNx = ensure_cugraph_obj_for_nx(G)
if (
G.edgelist.edgelist_df.dtypes[0] != np.int32
or G.edgelist.edgelist_df.dtypes[1] != np.int32
G.edgelist.edgelist_df.dtypes.iloc[0] != np.int32
or G.edgelist.edgelist_df.dtypes.iloc[1] != np.int32
):
raise ValueError(
"'analyzeClustering_modularity' requires the input graph's "
Expand All @@ -284,7 +284,7 @@ def analyzeClustering_modularity(
clustering, "vertex", vertex_col_name, drop=True
)

if clustering.dtypes[0] != np.int32 or clustering.dtypes[1] != np.int32:
if clustering.dtypes.iloc[0] != np.int32 or clustering.dtypes.iloc[1] != np.int32:
raise ValueError(
"'analyzeClustering_modularity' requires both the clustering 'vertex' "
"and 'cluster' to be of type 'int32'"
Expand Down Expand Up @@ -354,8 +354,8 @@ def analyzeClustering_edge_cut(
G, isNx = ensure_cugraph_obj_for_nx(G)

if (
G.edgelist.edgelist_df.dtypes[0] != np.int32
or G.edgelist.edgelist_df.dtypes[1] != np.int32
G.edgelist.edgelist_df.dtypes.iloc[0] != np.int32
or G.edgelist.edgelist_df.dtypes.iloc[1] != np.int32
):
raise ValueError(
"'analyzeClustering_edge_cut' requires the input graph's vertex to be "
Expand All @@ -367,7 +367,7 @@ def analyzeClustering_edge_cut(
clustering, "vertex", vertex_col_name, drop=True
)

if clustering.dtypes[0] != np.int32 or clustering.dtypes[1] != np.int32:
if clustering.dtypes.iloc[0] != np.int32 or clustering.dtypes.iloc[1] != np.int32:
raise ValueError(
"'analyzeClustering_edge_cut' requires both the clustering 'vertex' "
"and 'cluster' to be of type 'int32'"
Expand Down Expand Up @@ -437,7 +437,7 @@ def analyzeClustering_ratio_cut(
clustering, "vertex", vertex_col_name, drop=True
)

if clustering.dtypes[0] != np.int32 or clustering.dtypes[1] != np.int32:
if clustering.dtypes.iloc[0] != np.int32 or clustering.dtypes.iloc[1] != np.int32:
raise ValueError(
"'analyzeClustering_ratio_cut' requires both the clustering 'vertex' "
"and 'cluster' to be of type 'int32'"
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/community/triangle_count.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -24,7 +24,7 @@
# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, start_list):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
if isinstance(start_list, cudf.Series):
start_list_dtypes = start_list.dtype
else:
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/community/leiden.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -170,7 +170,7 @@ def leiden(
part_mod_score = [client.submit(convert_to_cudf, r) for r in result]
wait(part_mod_score)

vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
empty_df = cudf.DataFrame(
{
"vertex": numpy.empty(shape=0, dtype=vertex_dtype),
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/community/louvain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -191,7 +191,7 @@ def louvain(
part_mod_score = [client.submit(convert_to_cudf, r) for r in result]
wait(part_mod_score)

vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
empty_df = cudf.DataFrame(
{
"vertex": numpy.empty(shape=0, dtype=vertex_dtype),
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/dask/link_analysis/pagerank.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -76,7 +76,7 @@ def ensure_valid_dtype(input_graph, input_df, input_df_name):
warnings.warn(warning_msg, UserWarning)
input_df = input_df.astype({"values": edge_attr_dtype})

vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
input_df_vertex_dtype = input_df["vertex"].dtype
if input_df_vertex_dtype != vertex_dtype:
warning_msg = (
Expand Down Expand Up @@ -406,7 +406,7 @@ def pagerank(

wait(result)

vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]

# Have each worker convert tuple of arrays and bool from PLC to cudf
# DataFrames and bools. This will be a list of futures.
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/sampling/random_walks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -123,7 +123,7 @@ def random_walks(
start_vertices_type = input_graph.edgelist.edgelist_df.dtypes[0]
else:
# FIXME: Get the 'src' column names instead and retrieve the type
start_vertices_type = input_graph.input_df.dtypes[0]
start_vertices_type = input_graph.input_df.dtypes.iloc[0]
start_vertices = dask_cudf.from_cudf(
start_vertices, npartitions=min(input_graph._npartitions, len(start_vertices))
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _get_type_id_from_indices(indices, etype_id_range_dict):

for etype_id, (start, stop) in etype_id_range_dict.items():
range_types = (start <= indices) & (indices < stop)
type_ser[range_types] = etype_id
type_ser[range_types] = type_ser.dtype.type(etype_id)

return type_ser

Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/link_analysis/pagerank.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -64,7 +64,7 @@ def ensure_valid_dtype(input_graph, input_df, input_df_name):
warnings.warn(warning_msg, UserWarning)
input_df = input_df.astype({"values": edge_attr_dtype})

vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
input_df_vertex_dtype = input_df["vertex"].dtype
if input_df_vertex_dtype != vertex_dtype:
warning_msg = (
Expand Down
7 changes: 5 additions & 2 deletions python/cugraph/cugraph/link_prediction/jaccard.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,13 @@
# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, vertex_pair):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
vertex_pair_dtypes = vertex_pair.dtypes

if vertex_pair_dtypes[0] != vertex_dtype or vertex_pair_dtypes[1] != vertex_dtype:
if (
vertex_pair_dtypes.iloc[0] != vertex_dtype
or vertex_pair_dtypes.iloc[1] != vertex_dtype
):
warning_msg = (
"Jaccard requires 'vertex_pair' to match the graph's 'vertex' type. "
f"input graph's vertex type is: {vertex_dtype} and got "
Expand Down
9 changes: 6 additions & 3 deletions python/cugraph/cugraph/link_prediction/overlap.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -38,10 +38,13 @@
# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, vertex_pair):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
vertex_pair_dtypes = vertex_pair.dtypes

if vertex_pair_dtypes[0] != vertex_dtype or vertex_pair_dtypes[1] != vertex_dtype:
if (
vertex_pair_dtypes.iloc[0] != vertex_dtype
or vertex_pair_dtypes.iloc[1] != vertex_dtype
):
warning_msg = (
"Overlap requires 'vertex_pair' to match the graph's 'vertex' type. "
f"input graph's vertex type is: {vertex_dtype} and got "
Expand Down
9 changes: 6 additions & 3 deletions python/cugraph/cugraph/link_prediction/sorensen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -38,10 +38,13 @@
# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, vertex_pair):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
vertex_pair_dtypes = vertex_pair.dtypes

if vertex_pair_dtypes[0] != vertex_dtype or vertex_pair_dtypes[1] != vertex_dtype:
if (
vertex_pair_dtypes.iloc[0] != vertex_dtype
or vertex_pair_dtypes.iloc[1] != vertex_dtype
):
warning_msg = (
"Sorensen requires 'vertex_pair' to match the graph's 'vertex' type. "
f"input graph's vertex type is: {vertex_dtype} and got "
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/sampling/random_walks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -145,7 +145,7 @@ def random_walks(
if isinstance(start_vertices, list):
# Ensure the 'start_vertices' have the same dtype as the edge list.
# Failing to do that may produce erroneous results.
vertex_dtype = G.edgelist.edgelist_df.dtypes[0]
vertex_dtype = G.edgelist.edgelist_df.dtypes.iloc[0]
start_vertices = cudf.Series(start_vertices, dtype=vertex_dtype)

if G.renumbered is True:
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -38,11 +38,11 @@
# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, start_list):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
if isinstance(start_list, cudf.Series):
start_list_dtypes = start_list.dtype
else:
start_list_dtypes = start_list.dtypes[0]
start_list_dtypes = start_list.dtypes.iloc[0]

if start_list_dtypes != vertex_dtype:
warning_msg = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def get_sg_graph(dataset, directed, edge_ids):
if not directed:
# Edge ids not supported for undirected graph
return None

dtype = df.dtypes.iloc[0]
edge_id = "edge_id"
df[edge_id] = df.index
Expand All @@ -79,7 +80,7 @@ def get_mg_graph(dataset, directed, edge_ids, weight):
weight = None

if edge_ids:
dtype = ddf.dtypes[0]
dtype = ddf.dtypes.iloc[0]
edge_id = "edge_id"
ddf = ddf.assign(idx=1)
ddf["edge_id"] = ddf.idx.cumsum().astype(dtype) - 1
Expand Down
8 changes: 4 additions & 4 deletions python/cugraph/cugraph/tests/layout/test_force_atlas2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -122,9 +122,9 @@ def cugraph_call(
BARNES_HUT_OPTIMIZE = [False, True]


class TestCallback(GraphBasedDimRedCallback):
class ExampleCallback(GraphBasedDimRedCallback):
def __init__(self):
super(TestCallback, self).__init__()
super().__init__()
self.on_preprocess_end_called_count = 0
self.on_epoch_end_called_count = 0
self.on_train_end_called_count = 0
Expand All @@ -145,7 +145,7 @@ def on_train_end(self, positions):
@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
cu_M = graph_file.get_edgelist(download=True)
test_callback = TestCallback()
test_callback = ExampleCallback()
cu_pos = cugraph_call(
cu_M,
max_iter=max_iter,
Expand Down
7 changes: 6 additions & 1 deletion python/cugraph/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -56,3 +56,8 @@ python_files =
python_functions =
bench_*
test_*

filterwarnings =
error:::cudf
# Called via dask. Not obviously addressable in cugraph.
ignore:The behavior of array concatenation with empty entries is deprecated:FutureWarning:cudf

0 comments on commit a467da9

Please sign in to comment.