Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MG Implementation K-Truss #4438

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
41d62bc
update docstring
jnke2016 May 23, 2024
6ccfac3
update docstring
jnke2016 May 23, 2024
4e33a25
add mg implementation of K-Truss
jnke2016 May 23, 2024
ab143fb
update docstrings
jnke2016 May 23, 2024
50ba897
update docstrings
jnke2016 May 23, 2024
9a1cb67
add type annotation
jnke2016 May 23, 2024
bbeece9
fix style
jnke2016 May 23, 2024
d4c4575
add tests for mg k-truss
jnke2016 May 23, 2024
557f6d1
handle edge case
jnke2016 May 23, 2024
a931f30
Merge branch 'branch-24.08' into branch-24.06_mg-python-k-truss
nv-rliu Jun 19, 2024
ae41405
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 15, 2024
dd0ceac
fix style
jnke2016 Jul 15, 2024
6c07651
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 29, 2024
c3b1bab
fix style
jnke2016 Jul 29, 2024
9a865d1
update copyright
jnke2016 Jul 29, 2024
8695e1f
update copyright
jnke2016 Jul 29, 2024
ef32a63
follow PEP8 when ordering imports
jnke2016 Jul 29, 2024
81eee49
udpate docstrings
jnke2016 Jul 29, 2024
b36320d
remove deprecated parameter
jnke2016 Jul 29, 2024
372b812
remove outdated tests
jnke2016 Jul 29, 2024
9bc610b
fix typo
jnke2016 Jul 29, 2024
84efc9e
add tests for directed graphs
jnke2016 Jul 29, 2024
235ab8b
fix style
jnke2016 Jul 29, 2024
486b92b
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 29, 2024
10c2a6d
update copyright
jnke2016 Jul 29, 2024
5c3ae0a
fix typo
jnke2016 Jul 29, 2024
b4d64f2
update copyright change
jnke2016 Jul 29, 2024
55e3bdf
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 30, 2024
6989716
update branch
jnke2016 Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 10 additions & 52 deletions python/cugraph/cugraph/community/ktruss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure.graph_classes import Graph
from typing import Union

import cudf
from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph
from pylibcugraph import ResourceHandle
from cugraph.structure.graph_classes import Graph
from cugraph.utilities import (
ensure_cugraph_obj_for_nx,
cugraph_to_nx,
)

from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph
from pylibcugraph import ResourceHandle
import warnings

from numba import cuda
import cudf
from cugraph.utilities.utils import import_optional

# FIXME: the networkx.Graph type used in the type annotation for
Expand All @@ -34,37 +31,17 @@
networkx = import_optional("networkx")


# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to
# crash in that environment. Allow ktruss to import on non-11.4 systems, but
# raise an exception if ktruss is directly imported on 11.4.
def _ensure_compatible_cuda_version():
try:
cuda_version = cuda.runtime.get_version()
except cuda.cudadrv.runtime.CudaRuntimeAPIError:
cuda_version = "n/a"

unsupported_cuda_version = (11, 4)

if cuda_version == unsupported_cuda_version:
ver_string = ".".join([str(n) for n in unsupported_cuda_version])
raise NotImplementedError(
"k_truss is not currently supported in CUDA" f" {ver_string} environments."
)


def k_truss(
G: Union[Graph, "networkx.Graph"], k: int
) -> Union[Graph, "networkx.Graph"]:
"""
Returns the K-Truss subgraph of a graph for a specific k.

NOTE: this function is currently not available on CUDA 11.4 systems.

The k-truss of a graph is a subgraph where each edge is part of at least
(k−2) triangles. K-trusses are used for finding tighlty knit groups of
vertices in a graph. A k-truss is a relaxation of a k-clique in the graph
and was define in [1]. Finding cliques is computationally demanding and
finding the maximal k-clique is known to be NP-Hard.
The k-truss of a graph is a subgraph where each edge is incident to at
least (k−2) triangles. K-trusses are used for finding tighlty knit groups
of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph.
Finding cliques is computationally demanding and finding the maximal
k-clique is known to be NP-Hard.

Parameters
----------
Expand All @@ -89,9 +66,6 @@ def k_truss(
>>> k_subgraph = cugraph.k_truss(G, 3)

"""

_ensure_compatible_cuda_version()

G, isNx = ensure_cugraph_obj_for_nx(G)

if isNx is True:
Expand Down Expand Up @@ -159,12 +133,6 @@ def ktruss_subgraph(
k : int
The desired k to be used for extracting the k-truss subgraph.

use_weights : bool, optional (default=True)
Whether the output should contain the edge weights if G has them.

Deprecated: If 'weights' were passed at the graph creation, they will
be used.

Returns
-------
G_truss : cuGraph.Graph
Expand All @@ -177,20 +145,10 @@ def ktruss_subgraph(
>>> k_subgraph = cugraph.ktruss_subgraph(G, 3, use_weights=False)
"""

_ensure_compatible_cuda_version()

KTrussSubgraph = Graph()
if G.is_directed():
raise ValueError("input graph must be undirected")

if use_weights:
warning_msg = (
"The use_weights flag is deprecated "
"and will be removed in the next release. if weights "
"were passed at the graph creation, they will be used."
)
warnings.warn(warning_msg, FutureWarning)

sources, destinations, edge_weights, _ = pylibcugraph_k_truss_subgraph(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .community.triangle_count import triangle_count
from .community.egonet import ego_graph
from .community.induced_subgraph import induced_subgraph
from .community.ktruss_subgraph import ktruss_subgraph
from .centrality.katz_centrality import katz_centrality
from .components.connectivity import weakly_connected_components
from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
Expand Down
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/dask/community/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -15,3 +15,4 @@
from .triangle_count import triangle_count
from .induced_subgraph import induced_subgraph
from .leiden import leiden
from .ktruss_subgraph import ktruss_subgraph
119 changes: 119 additions & 0 deletions python/cugraph/cugraph/dask/community/ktruss_subgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Tuple

import cudf
import cupy as cp
from dask.distributed import wait, default_client
import dask_cudf

from pylibcugraph import (
ResourceHandle,
k_truss_subgraph as pylibcugraph_k_truss_subgraph,
)
import cugraph.dask.comms.comms as Comms


def _call_k_truss_subgraph(
sID: bytes,
mg_graph_x,
k: int,
do_expensive_check: bool,
) -> Tuple[cp.ndarray, cp.ndarray, cp.ndarray]:

return pylibcugraph_k_truss_subgraph(
resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
graph=mg_graph_x,
k=k,
do_expensive_check=do_expensive_check,
)


def convert_to_cudf(cp_arrays: cp.ndarray) -> cudf.DataFrame:
cp_src, cp_dst, cp_weight, _ = cp_arrays

df = cudf.DataFrame()
if cp_src is not None:
df["src"] = cp_src
df["dst"] = cp_dst
if cp_weight is not None:
df["weight"] = cp_weight

return df


def ktruss_subgraph(input_graph, k: int) -> dask_cudf.DataFrame:
"""
Returns the K-Truss subgraph of a graph for a specific k.

The k-truss of a graph is a subgraph where each edge is incident to at
least (k−2) triangles. K-trusses are used for finding tighlty knit groups
of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph.
Finding cliques is computationally demanding and finding the maximal
k-clique is known to be NP-Hard.

Parameters
----------
input_graph : cugraph.Graph
Graph or matrix object, which should contain the connectivity
information. Edge weights, if present, should be single or double
precision floating point values

k : int
The desired k to be used for extracting the k-truss subgraph.


Returns
-------
k_truss_edge_lists : dask_cudf.DataFrame
Distributed GPU data frame containing all source identifiers,
destination identifiers, and edge weights belonging to the truss.
"""
if input_graph.is_directed():
raise ValueError("input graph must be undirected")
# Initialize dask client
client = default_client()

do_expensive_check = False

result = [
client.submit(
_call_k_truss_subgraph,
Comms.get_session_id(),
input_graph._plc_graph[w],
k,
do_expensive_check,
workers=[w],
allow_other_workers=False,
)
for w in Comms.get_workers()
]
wait(result)

cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result]

wait(cudf_result)

ddf = dask_cudf.from_delayed(cudf_result).persist()
wait(ddf)
# Wait until the inactive futures are released
wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)])

if input_graph.renumbered:
ddf = input_graph.unrenumber(ddf, "src")
ddf = input_graph.unrenumber(ddf, "dst")

return ddf
34 changes: 0 additions & 34 deletions python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import cugraph
from cugraph.testing import utils
from cugraph.datasets import polbooks, karate_asymmetric
from numba import cuda


# =============================================================================
Expand Down Expand Up @@ -67,32 +66,7 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file):
return True


__cuda_version = cuda.runtime.get_version()
__unsupported_cuda_version = (11, 4)


# FIXME: remove when ktruss is supported on CUDA 11.4
@pytest.mark.sg
def test_unsupported_cuda_version():
"""
Ensures the proper exception is raised when ktruss is called in an
unsupported env, and not when called in a supported env.
"""
k = 5

G = polbooks.get_graph(download=True)
if __cuda_version == __unsupported_cuda_version:
with pytest.raises(NotImplementedError):
cugraph.k_truss(G, k)
else:
cugraph.k_truss(G, k)


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
@pytest.mark.parametrize("_, nx_ground_truth", utils.DATASETS_KTRUSS)
def test_ktruss_subgraph_Graph(_, nx_ground_truth):

Expand All @@ -104,10 +78,6 @@ def test_ktruss_subgraph_Graph(_, nx_ground_truth):


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
def test_ktruss_subgraph_Graph_nx():
k = 5
dataset_path = polbooks.get_path()
Expand All @@ -122,10 +92,6 @@ def test_ktruss_subgraph_Graph_nx():


@pytest.mark.sg
@pytest.mark.skipif(
(__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.",
)
def test_ktruss_subgraph_directed_Graph():
k = 5
edgevals = True
Expand Down
Loading
Loading