Skip to content

Commit

Permalink
add plc implementation of all-pairs similarity leveraging the capi
Browse files Browse the repository at this point in the history
  • Loading branch information
jnke2016 committed Jun 24, 2024
1 parent 34f8471 commit ea0e66d
Show file tree
Hide file tree
Showing 6 changed files with 526 additions and 1 deletion.
3 changes: 3 additions & 0 deletions python/pylibcugraph/pylibcugraph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ set(cython_sources
weakly_connected_components.pyx
replicate_edgelist.pyx
degrees.pyx
all_pairs_jaccard_coefficients.pyx
all_pairs_sorensen_coefficients.pyx
all_pairs_overlap_coefficients.pyx
)
set(linked_libraries cugraph::cugraph;cugraph::cugraph_c)

Expand Down
6 changes: 6 additions & 0 deletions python/pylibcugraph/pylibcugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@

from pylibcugraph.sorensen_coefficients import sorensen_coefficients

from pylibcugraph.all_pairs_jaccard_coefficients import all_pairs_jaccard_coefficients

from pylibcugraph.all_pairs_overlap_coefficients import all_pairs_overlap_coefficients

from pylibcugraph.all_pairs_sorensen_coefficients import all_pairs_sorensen_coefficients

from pylibcugraph.degrees import in_degrees, out_degrees, degrees


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h":
cugraph_similarity_result_free(
cugraph_similarity_result_t* result
)

###########################################################################
# jaccard coefficients
cdef cugraph_error_code_t \
Expand All @@ -63,6 +63,20 @@ cdef extern from "cugraph_c/similarity_algorithms.h":
cugraph_similarity_result_t** result,
cugraph_error_t** error
)

###########################################################################
# all-pairs jaccard coefficients
cdef cugraph_error_code_t \
cugraph_all_pairs_jaccard_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error
)

###########################################################################
# sorensen coefficients
Expand All @@ -76,6 +90,20 @@ cdef extern from "cugraph_c/similarity_algorithms.h":
cugraph_similarity_result_t** result,
cugraph_error_t** error
)

###########################################################################
# all-pairs sorensen coefficients
cdef cugraph_error_code_t \
cugraph_all_pairs_sorensen_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error
)

###########################################################################
# overlap coefficients
Expand All @@ -89,3 +117,17 @@ cdef extern from "cugraph_c/similarity_algorithms.h":
cugraph_similarity_result_t** result,
cugraph_error_t** error
)

###########################################################################
# all-pairs overlap coefficients
cdef cugraph_error_code_t \
cugraph_all_pairs_overlap_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error
)
158 changes: 158 additions & 0 deletions python/pylibcugraph/pylibcugraph/all_pairs_jaccard_coefficients.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Have cython use python 3 syntax
# cython: language_level = 3

from libc.stdint cimport uintptr_t
from libc.stdio cimport printf
from cython.operator cimport dereference

from pylibcugraph._cugraph_c.resource_handle cimport (
bool_t,
cugraph_resource_handle_t,
)
from pylibcugraph._cugraph_c.error cimport (
cugraph_error_code_t,
cugraph_error_t,
)
from pylibcugraph._cugraph_c.array cimport (
cugraph_type_erased_device_array_view_t,
cugraph_type_erased_device_array_view_free
)
from pylibcugraph._cugraph_c.graph_functions cimport (
cugraph_vertex_pairs_t,
cugraph_vertex_pairs_get_first,
cugraph_vertex_pairs_get_second,
cugraph_vertex_pairs_free,
cugraph_create_vertex_pairs
)
from pylibcugraph._cugraph_c.graph cimport (
cugraph_graph_t,
)
from pylibcugraph._cugraph_c.similarity_algorithms cimport (
cugraph_all_pairs_jaccard_coefficients,
cugraph_similarity_result_t,
cugraph_similarity_result_get_similarity,
cugraph_similarity_result_free
)
from pylibcugraph.resource_handle cimport (
ResourceHandle,
)
from pylibcugraph.graphs cimport (
_GPUGraph,
)
from pylibcugraph.utils cimport (
assert_success,
copy_to_cupy_array,
create_cugraph_type_erased_device_array_view_from_py_obj
)


def all_pairs_jaccard_coefficients(ResourceHandle resource_handle,
_GPUGraph graph,
vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check):
"""
Perform All-Pairs Jaccard similarity computation.
Note that Jaccard similarity must run on a symmetric graph.
Parameters
----------
resource_handle : ResourceHandle
Handle to the underlying device resources needed for referencing data
and running algorithms.
graph : SGGraph or MGGraph
The input graph, for either Single or Multi-GPU operations.
vertices : cudf.Series or None
Vertex list to compute all-pairs. If None, then compute based
on all vertices in the graph.
use_weight : bool, optional
If set to True, the compute weighted jaccard_coefficients(
the input graph must be weighted in that case).
Otherwise, computed un-weighted jaccard_coefficients
topk : size_t
Specify how many answers to return otherwise will return all values.
do_expensive_check : bool
If True, performs more extensive tests on the inputs to ensure
validitity, at the expense of increased run time.
Returns
-------
A tuple of device arrays containing the vertex pairs with
their corresponding Jaccard coefficient scores.
Examples
--------
# FIXME: No example yet
"""

cdef cugraph_vertex_pairs_t* vertex_pairs_ptr

cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
resource_handle.c_resource_handle_ptr
cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr

cdef cugraph_similarity_result_t* result_ptr
cdef cugraph_error_code_t error_code
cdef cugraph_error_t* error_ptr

cdef cugraph_type_erased_device_array_view_t* \
vertices_view_ptr = \
create_cugraph_type_erased_device_array_view_from_py_obj(
vertices)

error_code = cugraph_all_pairs_jaccard_coefficients(c_resource_handle_ptr,
c_graph_ptr,
vertices_view_ptr,
use_weight,
topk,
do_expensive_check,
&result_ptr,
&error_ptr)
assert_success(error_code, error_ptr, "cugraph_all_pairs_jaccard_coefficients")

# Extract individual device array pointers from result and copy to cupy
# arrays for returning.
cdef cugraph_type_erased_device_array_view_t* similarity_ptr = \
cugraph_similarity_result_get_similarity(result_ptr)

cupy_similarity = copy_to_cupy_array(c_resource_handle_ptr, similarity_ptr)

cdef cugraph_type_erased_device_array_view_t* first_ptr = \
cugraph_vertex_pairs_get_first(vertex_pairs_ptr)

cupy_first = copy_to_cupy_array(c_resource_handle_ptr, first_ptr)

cdef cugraph_type_erased_device_array_view_t* second_ptr = \
cugraph_vertex_pairs_get_second(vertex_pairs_ptr)

cupy_second = copy_to_cupy_array(c_resource_handle_ptr, second_ptr)

# Free all pointers
cugraph_similarity_result_free(result_ptr)
cugraph_vertex_pairs_free(vertex_pairs_ptr)

cugraph_type_erased_device_array_view_free(vertices_view_ptr)

return cupy_first, cupy_second, cupy_similarity
Loading

0 comments on commit ea0e66d

Please sign in to comment.