Skip to content

Commit

Permalink
nxadb-to-nxcg (#2)
Browse files Browse the repository at this point in the history
* wip: nxadb-to-nxcg

using the adapter for now...

* fix: typo

* attempt fix: graph classes

* fix: graph classes (again)

* fix: typo

* add DiGraph property

not sure what's going on..

* nxadb-to-nxcg (rust) | initial commit

* print statements

* fix: function name

* fix: `as_directed`

* more print statements

* cleanup: `vertex_ids_to_index`

* new: `parallelism` & `batch_size` kwargs

hacky for now...

* Update digraph.py

* new: cache coo

* cleanup

* new: `louvain` & `pagerank`

* fix: condition

* update algorithms

* cleanup

* fix: bad import

* cleanup: convert

* new: Graph `pull` method

* update `digraph`

* fix: missing param

* copy methods to digraph

temporary workaround...

* new: `load_adj_dict_as_undirected`
  • Loading branch information
aMahanna authored May 2, 2024
1 parent 4d6ad31 commit cf41f50
Show file tree
Hide file tree
Showing 15 changed files with 816 additions and 100 deletions.
24 changes: 21 additions & 3 deletions _nx_arangodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
"functions": {
# BEGIN: functions
"betweenness_centrality",
"louvain_communities",
"louvain_partitions",
"modularity",
"pagerank",
"to_scipy_sparse_array",
# END: functions
},
"additional_docs": {
Expand All @@ -40,7 +45,21 @@
},
"additional_parameters": {
# BEGIN: additional_parameters

"louvain_communities": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"louvain_partitions": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"modularity": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"pagerank": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
"to_scipy_sparse_array": {
"dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
},
# END: additional_parameters
},
}
Expand Down Expand Up @@ -91,8 +110,7 @@ def __call__(self, *args, **kwargs):

sys.modules["cupy"] = Stub()
sys.modules["numpy"] = Stub()
# sys.modules["pylibcugraph"] = Stub() # TODO Anthony: re-introduce when ready
sys.modules["python-arango"] = Stub() # TODO Anthony: Double check
sys.modules["python-arango"] = Stub()

from _nx_arangodb.core import main

Expand Down
4 changes: 0 additions & 4 deletions nx_arangodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
from . import convert
from .convert import *

# TODO Anthony: Do we need this?
# from . import convert_matrix
# from .convert_matrix import *

from . import algorithms
from .algorithms import *

Expand Down
4 changes: 3 additions & 1 deletion nx_arangodb/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from . import centrality
from . import centrality, community, link_analysis
from .centrality import *
from .community import *
from .link_analysis import *
45 changes: 15 additions & 30 deletions nx_arangodb/algorithms/centrality/betweenness.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
from networkx.algorithms.centrality import betweenness as nx_betweenness

from nx_arangodb.convert import _to_graph as _to_nx_arangodb_graph
from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
from nx_arangodb.utils import networkx_algorithm

try:
import pylibcugraph as plc
from nx_cugraph.convert import _to_graph as _to_nx_cugraph_graph
from nx_cugraph.utils import _seed_to_int
import nx_cugraph as nxcg

GPU_ENABLED = True
print("ANTHONY: GPU is enabled")
except ModuleNotFoundError:
GPU_ENABLED = False
print("ANTHONY: GPU is disabled")


__all__ = ["betweenness_centrality"]

# 1. If GPU is enabled, call nx-cugraph bc() after converting to a nx_cugraph graph (in-memory graph)
# 2. If GPU is not enabled, call networkx bc() after converting to a networkx graph (in-memory graph)
# 3. If GPU is not enabled, call networkx bc() **without** converting to a networkx graph (remote graph)
# 1. If GPU is enabled, call nx-cugraph bc() after converting to an ncxg graph (in-memory graph)
# 2. If GPU is not enabled, call networkx bc() after converting to an nxadb graph (in-memory graph)
# 3. If GPU is not enabled, call networkx bc() **without** converting to a nxadb graph (remote graph)


@networkx_algorithm(
Expand All @@ -33,32 +33,19 @@ def betweenness_centrality(

# 1.
if GPU_ENABLED and run_on_gpu:
print("ANTHONY: GPU is enabled. Using nx-cugraph bc()")

if weight is not None:
raise NotImplementedError(
"Weighted implementation of betweenness centrality not currently supported"
)

seed = _seed_to_int(seed)
G = _to_nx_cugraph_graph(G, weight)
node_ids, values = plc.betweenness_centrality(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
k=k,
random_state=seed,
normalized=normalized,
include_endpoints=endpoints,
do_expensive_check=False,
)
print("ANTHONY: to_nxcg")
G = _to_nxcg_graph(G, weight)

return G._nodearrays_to_dict(node_ids, values)
print("ANTHONY: Using nxcg bc()")
return nxcg.betweenness_centrality(G, k=k, normalized=normalized, weight=weight)

# 2.
else:
print("ANTHONY: GPU is disabled. Using nx bc()")

G = _to_nx_arangodb_graph(G)
print("ANTHONY: to_nxadb")
G = _to_nxadb_graph(G)

print("ANTHONY: Using nx bc()")

betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
if k is None:
Expand Down Expand Up @@ -93,5 +80,3 @@ def betweenness_centrality(
)

return betweenness

# 3. TODO
1 change: 1 addition & 0 deletions nx_arangodb/algorithms/community/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .louvain import *
144 changes: 144 additions & 0 deletions nx_arangodb/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from collections import deque

import networkx as nx

from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
from nx_arangodb.utils import _dtype_param, networkx_algorithm

try:
import nx_cugraph as nxcg

GPU_ENABLED = True
print("ANTHONY: GPU is enabled")
except ModuleNotFoundError:
GPU_ENABLED = False
print("ANTHONY: GPU is disabled")


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
_plc="louvain",
name="louvain_communities",
)
def louvain_communities(
G,
weight="weight",
resolution=1,
threshold=0.0000001,
max_level=None,
seed=None,
run_on_gpu=True,
):
if GPU_ENABLED and run_on_gpu:
print("ANTHONY: to_nxcg")
G = _to_nxcg_graph(G, weight)

print("ANTHONY: Using nxcg louvain()")
return nxcg.algorithms.community.louvain._louvain_communities(
G,
weight=weight,
resolution=resolution,
threshold=threshold,
max_level=max_level,
seed=seed,
)

else:
print("ANTHONY: to_nxadb")
G = _to_nxadb_graph(G)

print("ANTHONY: Using nx pagerank()")
import random

d = louvain_partitions(G, weight, resolution, threshold, random.Random())
q = deque(d, maxlen=1)
return q.pop()


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
_plc="louvain",
name="louvain_partitions",
)
def louvain_partitions(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
):
partition = [{u} for u in G.nodes()]
if nx.is_empty(G):
yield partition
return
mod = modularity(G, partition, resolution=resolution, weight=weight)
is_directed = G.is_directed()
if G.is_multigraph():
graph = nx.community._convert_multigraph(G, weight, is_directed)
else:
graph = G.__class__()
graph.add_nodes_from(G)
graph.add_weighted_edges_from(G.edges(data=weight, default=1))

m = graph.size(weight="weight")
partition, inner_partition, improvement = nx.community.louvain._one_level(
graph, m, partition, resolution, is_directed, seed
)
improvement = True
while improvement:
# gh-5901 protect the sets in the yielded list from further manipulation here
yield [s.copy() for s in partition]
new_mod = modularity(
graph, inner_partition, resolution=resolution, weight="weight"
)
if new_mod - mod <= threshold:
return
mod = new_mod
graph = nx.community.louvain._gen_graph(graph, inner_partition)
partition, inner_partition, improvement = nx.community.louvain._one_level(
graph, m, partition, resolution, is_directed, seed
)


@networkx_algorithm(
extra_params={
**_dtype_param,
},
is_incomplete=True, # seed not supported; self-loops not supported
is_different=True, # RNG different
version_added="23.10",
)
def modularity(G, communities, weight="weight", resolution=1):
if not isinstance(communities, list):
communities = list(communities)
# if not is_partition(G, communities):
# raise NotAPartition(G, communities)

directed = G.is_directed()
if directed:
out_degree = dict(G.out_degree(weight=weight))
in_degree = dict(G.in_degree(weight=weight))
m = sum(out_degree.values())
norm = 1 / m**2
else:
out_degree = in_degree = dict(G.degree(weight=weight))
deg_sum = sum(out_degree.values())
m = deg_sum / 2
norm = 1 / deg_sum**2

def community_contribution(community):
comm = set(community)
L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)

out_degree_sum = sum(out_degree[u] for u in comm)
in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum

return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

return sum(map(community_contribution, communities))
1 change: 1 addition & 0 deletions nx_arangodb/algorithms/link_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pagerank_alg import *
Loading

0 comments on commit cf41f50

Please sign in to comment.