Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nxadb-to-nxcg #2

Merged
merged 27 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions _nx_arangodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
"functions": {
# BEGIN: functions
"betweenness_centrality",
"louvain_communities",
"louvain_partitions",
"modularity",
"pagerank",
"to_scipy_sparse_array",
# END: functions
},
"additional_docs": {
Expand All @@ -40,7 +45,21 @@
},
"additional_parameters": {
# BEGIN: additional_parameters

"louvain_communities": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"louvain_partitions": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"modularity": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"pagerank": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"to_scipy_sparse_array": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
# END: additional_parameters
},
}
Expand Down Expand Up @@ -91,8 +110,7 @@ def __call__(self, *args, **kwargs):

sys.modules["cupy"] = Stub()
sys.modules["numpy"] = Stub()
# sys.modules["pylibcugraph"] = Stub() # TODO Anthony: re-introduce when ready
sys.modules["python-arango"] = Stub() # TODO Anthony: Double check
sys.modules["python-arango"] = Stub()

from _nx_arangodb.core import main

Expand Down
4 changes: 0 additions & 4 deletions nx_arangodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
from . import convert
from .convert import *

# TODO Anthony: Do we need this?
# from . import convert_matrix
# from .convert_matrix import *

from . import algorithms
from .algorithms import *

Expand Down
4 changes: 3 additions & 1 deletion nx_arangodb/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from . import centrality
from . import centrality, community, link_analysis
from .centrality import *
from .community import *
from .link_analysis import *
45 changes: 15 additions & 30 deletions nx_arangodb/algorithms/centrality/betweenness.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
from networkx.algorithms.centrality import betweenness as nx_betweenness

from nx_arangodb.convert import _to_graph as _to_nx_arangodb_graph
from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
from nx_arangodb.utils import networkx_algorithm

try:
import pylibcugraph as plc
from nx_cugraph.convert import _to_graph as _to_nx_cugraph_graph
from nx_cugraph.utils import _seed_to_int
import nx_cugraph as nxcg

GPU_ENABLED = True
print("ANTHONY: GPU is enabled")
except ModuleNotFoundError:
GPU_ENABLED = False
print("ANTHONY: GPU is disabled")


__all__ = ["betweenness_centrality"]

# 1. If GPU is enabled, call nx-cugraph bc() after converting to a nx_cugraph graph (in-memory graph)
# 2. If GPU is not enabled, call networkx bc() after converting to a networkx graph (in-memory graph)
# 3. If GPU is not enabled, call networkx bc() **without** converting to a networkx graph (remote graph)
# 1. If GPU is enabled, call nx-cugraph bc() after converting to an ncxg graph (in-memory graph)
# 2. If GPU is not enabled, call networkx bc() after converting to an nxadb graph (in-memory graph)
# 3. If GPU is not enabled, call networkx bc() **without** converting to a nxadb graph (remote graph)


@networkx_algorithm(
Expand All @@ -33,32 +33,19 @@ def betweenness_centrality(

# 1.
if GPU_ENABLED and run_on_gpu:
print("ANTHONY: GPU is enabled. Using nx-cugraph bc()")

if weight is not None:
raise NotImplementedError(
"Weighted implementation of betweenness centrality not currently supported"
)

seed = _seed_to_int(seed)
G = _to_nx_cugraph_graph(G, weight)
node_ids, values = plc.betweenness_centrality(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
k=k,
random_state=seed,
normalized=normalized,
include_endpoints=endpoints,
do_expensive_check=False,
)
print("ANTHONY: to_nxcg")
G = _to_nxcg_graph(G, weight)

return G._nodearrays_to_dict(node_ids, values)
print("ANTHONY: Using nxcg bc()")
return nxcg.betweenness_centrality(G, k=k, normalized=normalized, weight=weight)

# 2.
else:
print("ANTHONY: GPU is disabled. Using nx bc()")

G = _to_nx_arangodb_graph(G)
print("ANTHONY: to_nxadb")
G = _to_nxadb_graph(G)

print("ANTHONY: Using nx bc()")

betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
if k is None:
Expand Down Expand Up @@ -93,5 +80,3 @@ def betweenness_centrality(
)

return betweenness

# 3. TODO
1 change: 1 addition & 0 deletions nx_arangodb/algorithms/community/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .louvain import *
144 changes: 144 additions & 0 deletions nx_arangodb/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from collections import deque

import networkx as nx

from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
from nx_arangodb.utils import _dtype_param, networkx_algorithm

try:
import nx_cugraph as nxcg

GPU_ENABLED = True
print("ANTHONY: GPU is enabled")
except ModuleNotFoundError:
GPU_ENABLED = False
print("ANTHONY: GPU is disabled")


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
_plc="louvain",
name="louvain_communities",
)
def louvain_communities(
G,
weight="weight",
resolution=1,
threshold=0.0000001,
max_level=None,
seed=None,
run_on_gpu=True,
):
if GPU_ENABLED and run_on_gpu:
print("ANTHONY: to_nxcg")
G = _to_nxcg_graph(G, weight)

print("ANTHONY: Using nxcg louvain()")
return nxcg.algorithms.community.louvain._louvain_communities(
G,
weight=weight,
resolution=resolution,
threshold=threshold,
max_level=max_level,
seed=seed,
)

else:
print("ANTHONY: to_nxadb")
G = _to_nxadb_graph(G)

print("ANTHONY: Using nx pagerank()")
import random

d = louvain_partitions(G, weight, resolution, threshold, random.Random())
q = deque(d, maxlen=1)
return q.pop()


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
_plc="louvain",
name="louvain_partitions",
)
def louvain_partitions(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
):
partition = [{u} for u in G.nodes()]
if nx.is_empty(G):
yield partition
return
mod = modularity(G, partition, resolution=resolution, weight=weight)
is_directed = G.is_directed()
if G.is_multigraph():
graph = nx.community._convert_multigraph(G, weight, is_directed)
else:
graph = G.__class__()
graph.add_nodes_from(G)
graph.add_weighted_edges_from(G.edges(data=weight, default=1))

m = graph.size(weight="weight")
partition, inner_partition, improvement = nx.community.louvain._one_level(
graph, m, partition, resolution, is_directed, seed
)
improvement = True
while improvement:
# gh-5901 protect the sets in the yielded list from further manipulation here
yield [s.copy() for s in partition]
new_mod = modularity(
graph, inner_partition, resolution=resolution, weight="weight"
)
if new_mod - mod <= threshold:
return
mod = new_mod
graph = nx.community.louvain._gen_graph(graph, inner_partition)
partition, inner_partition, improvement = nx.community.louvain._one_level(
graph, m, partition, resolution, is_directed, seed
)


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
)
def modularity(G, communities, weight="weight", resolution=1):
if not isinstance(communities, list):
communities = list(communities)
# if not is_partition(G, communities):
# raise NotAPartition(G, communities)

directed = G.is_directed()
if directed:
out_degree = dict(G.out_degree(weight=weight))
in_degree = dict(G.in_degree(weight=weight))
m = sum(out_degree.values())
norm = 1 / m**2
else:
out_degree = in_degree = dict(G.degree(weight=weight))
deg_sum = sum(out_degree.values())
m = deg_sum / 2
norm = 1 / deg_sum**2

def community_contribution(community):
comm = set(community)
L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)

out_degree_sum = sum(out_degree[u] for u in comm)
in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum

return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

return sum(map(community_contribution, communities))
1 change: 1 addition & 0 deletions nx_arangodb/algorithms/link_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pagerank_alg import *
Loading
Loading