nxadb-to-nxcg (#2)

* wip: nxadb-to-nxcg using the adapter for now... * fix: typo * attempt fix: graph classes * fix: graph classes (again) * fix: typo * add DiGraph property not sure what's going on.. * nxadb-to-nxcg (rust) | initial commit * print statements * fix: function name * fix: `as_directed` * more print statements * cleanup: `vertex_ids_to_index` * new: `parallelism` & `batch_size` kwargs hacky for now... * Update digraph.py * new: cache coo * cleanup * new: `louvain` & `pagerank` * fix: condition * update algorithms * cleanup * fix: bad import * cleanup: convert * new: Graph `pull` method * update `digraph` * fix: missing param * copy methods to digraph temporary workaround... * new: `load_adj_dict_as_undirected`
arangodb · May 2, 2024 · cf41f50 · cf41f50
1 parent 4d6ad31
commit cf41f50
Show file tree

Hide file tree

Showing 15 changed files with 816 additions and 100 deletions.
diff --git a/_nx_arangodb/__init__.py b/_nx_arangodb/__init__.py
@@ -31,6 +31,11 @@
     "functions": {
         # BEGIN: functions
         "betweenness_centrality",
+        "louvain_communities",
+        "louvain_partitions",
+        "modularity",
+        "pagerank",
+        "to_scipy_sparse_array",
         # END: functions
     },
     "additional_docs": {
@@ -40,7 +45,21 @@
     },
     "additional_parameters": {
         # BEGIN: additional_parameters
-
+        "louvain_communities": {
+            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
+        },
+        "louvain_partitions": {
+            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
+        },
+        "modularity": {
+            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
+        },
+        "pagerank": {
+            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
+        },
+        "to_scipy_sparse_array": {
+            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
+        },
         # END: additional_parameters
     },
 }
@@ -91,8 +110,7 @@ def __call__(self, *args, **kwargs):
 
     sys.modules["cupy"] = Stub()
     sys.modules["numpy"] = Stub()
-    # sys.modules["pylibcugraph"] = Stub() # TODO Anthony: re-introduce when ready
-    sys.modules["python-arango"] = Stub() # TODO Anthony: Double check
+    sys.modules["python-arango"] = Stub()
 
     from _nx_arangodb.core import main
 

diff --git a/nx_arangodb/__init__.py b/nx_arangodb/__init__.py
@@ -10,10 +10,6 @@
 from . import convert
 from .convert import *
 
-# TODO Anthony: Do we need this?
-# from . import convert_matrix
-# from .convert_matrix import *
-
 from . import algorithms
 from .algorithms import *
 

diff --git a/nx_arangodb/algorithms/__init__.py b/nx_arangodb/algorithms/__init__.py
@@ -1,2 +1,4 @@
-from . import centrality
+from . import centrality, community, link_analysis
 from .centrality import *
+from .community import *
+from .link_analysis import *
diff --git a/nx_arangodb/algorithms/centrality/betweenness.py b/nx_arangodb/algorithms/centrality/betweenness.py
@@ -1,23 +1,23 @@
 from networkx.algorithms.centrality import betweenness as nx_betweenness
 
-from nx_arangodb.convert import _to_graph as _to_nx_arangodb_graph
+from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
 from nx_arangodb.utils import networkx_algorithm
 
 try:
-    import pylibcugraph as plc
-    from nx_cugraph.convert import _to_graph as _to_nx_cugraph_graph
-    from nx_cugraph.utils import _seed_to_int
+    import nx_cugraph as nxcg
 
     GPU_ENABLED = True
+    print("ANTHONY: GPU is enabled")
 except ModuleNotFoundError:
     GPU_ENABLED = False
+    print("ANTHONY: GPU is disabled")
 
 
 __all__ = ["betweenness_centrality"]
 
-# 1. If GPU is enabled, call nx-cugraph bc() after converting to a nx_cugraph graph (in-memory graph)
-# 2. If GPU is not enabled, call networkx bc() after converting to a networkx graph (in-memory graph)
-# 3. If GPU is not enabled, call networkx bc() **without** converting to a networkx graph (remote graph)
+# 1. If GPU is enabled, call nx-cugraph bc() after converting to an ncxg graph (in-memory graph)
+# 2. If GPU is not enabled, call networkx bc() after converting to an nxadb graph (in-memory graph)
+# 3. If GPU is not enabled, call networkx bc() **without** converting to a nxadb graph (remote graph)
 
 
 @networkx_algorithm(
@@ -33,32 +33,19 @@ def betweenness_centrality(
 
     # 1.
     if GPU_ENABLED and run_on_gpu:
-        print("ANTHONY: GPU is enabled. Using nx-cugraph bc()")
-
-        if weight is not None:
-            raise NotImplementedError(
-                "Weighted implementation of betweenness centrality not currently supported"
-            )
-
-        seed = _seed_to_int(seed)
-        G = _to_nx_cugraph_graph(G, weight)
-        node_ids, values = plc.betweenness_centrality(
-            resource_handle=plc.ResourceHandle(),
-            graph=G._get_plc_graph(),
-            k=k,
-            random_state=seed,
-            normalized=normalized,
-            include_endpoints=endpoints,
-            do_expensive_check=False,
-        )
+        print("ANTHONY: to_nxcg")
+        G = _to_nxcg_graph(G, weight)
 
-        return G._nodearrays_to_dict(node_ids, values)
+        print("ANTHONY: Using nxcg bc()")
+        return nxcg.betweenness_centrality(G, k=k, normalized=normalized, weight=weight)
 
     # 2.
     else:
-        print("ANTHONY: GPU is disabled. Using nx bc()")
 
-        G = _to_nx_arangodb_graph(G)
+        print("ANTHONY: to_nxadb")
+        G = _to_nxadb_graph(G)
+
+        print("ANTHONY: Using nx bc()")
 
         betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
         if k is None:
@@ -93,5 +80,3 @@ def betweenness_centrality(
         )
 
         return betweenness
-
-    # 3. TODO
diff --git a/nx_arangodb/algorithms/community/__init__.py b/nx_arangodb/algorithms/community/__init__.py
@@ -0,0 +1 @@
+from .louvain import *
diff --git a/nx_arangodb/algorithms/community/louvain.py b/nx_arangodb/algorithms/community/louvain.py
@@ -0,0 +1,144 @@
+from collections import deque
+
+import networkx as nx
+
+from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph
+from nx_arangodb.utils import _dtype_param, networkx_algorithm
+
+try:
+    import nx_cugraph as nxcg
+
+    GPU_ENABLED = True
+    print("ANTHONY: GPU is enabled")
+except ModuleNotFoundError:
+    GPU_ENABLED = False
+    print("ANTHONY: GPU is disabled")
+
+
+@networkx_algorithm(
+    extra_params={
+        **_dtype_param,
+    },
+    is_incomplete=True,  # seed not supported; self-loops not supported
+    is_different=True,  # RNG different
+    version_added="23.10",
+    _plc="louvain",
+    name="louvain_communities",
+)
+def louvain_communities(
+    G,
+    weight="weight",
+    resolution=1,
+    threshold=0.0000001,
+    max_level=None,
+    seed=None,
+    run_on_gpu=True,
+):
+    if GPU_ENABLED and run_on_gpu:
+        print("ANTHONY: to_nxcg")
+        G = _to_nxcg_graph(G, weight)
+
+        print("ANTHONY: Using nxcg louvain()")
+        return nxcg.algorithms.community.louvain._louvain_communities(
+            G,
+            weight=weight,
+            resolution=resolution,
+            threshold=threshold,
+            max_level=max_level,
+            seed=seed,
+        )
+
+    else:
+        print("ANTHONY: to_nxadb")
+        G = _to_nxadb_graph(G)
+
+        print("ANTHONY: Using nx pagerank()")
+        import random
+
+        d = louvain_partitions(G, weight, resolution, threshold, random.Random())
+        q = deque(d, maxlen=1)
+        return q.pop()
+
+
+@networkx_algorithm(
+    extra_params={
+        **_dtype_param,
+    },
+    is_incomplete=True,  # seed not supported; self-loops not supported
+    is_different=True,  # RNG different
+    version_added="23.10",
+    _plc="louvain",
+    name="louvain_partitions",
+)
+def louvain_partitions(
+    G, weight="weight", resolution=1, threshold=0.0000001, seed=None
+):
+    partition = [{u} for u in G.nodes()]
+    if nx.is_empty(G):
+        yield partition
+        return
+    mod = modularity(G, partition, resolution=resolution, weight=weight)
+    is_directed = G.is_directed()
+    if G.is_multigraph():
+        graph = nx.community._convert_multigraph(G, weight, is_directed)
+    else:
+        graph = G.__class__()
+        graph.add_nodes_from(G)
+        graph.add_weighted_edges_from(G.edges(data=weight, default=1))
+
+    m = graph.size(weight="weight")
+    partition, inner_partition, improvement = nx.community.louvain._one_level(
+        graph, m, partition, resolution, is_directed, seed
+    )
+    improvement = True
+    while improvement:
+        # gh-5901 protect the sets in the yielded list from further manipulation here
+        yield [s.copy() for s in partition]
+        new_mod = modularity(
+            graph, inner_partition, resolution=resolution, weight="weight"
+        )
+        if new_mod - mod <= threshold:
+            return
+        mod = new_mod
+        graph = nx.community.louvain._gen_graph(graph, inner_partition)
+        partition, inner_partition, improvement = nx.community.louvain._one_level(
+            graph, m, partition, resolution, is_directed, seed
+        )
+
+
+@networkx_algorithm(
+    extra_params={
+        **_dtype_param,
+    },
+    is_incomplete=True,  # seed not supported; self-loops not supported
+    is_different=True,  # RNG different
+    version_added="23.10",
+)
+def modularity(G, communities, weight="weight", resolution=1):
+    if not isinstance(communities, list):
+        communities = list(communities)
+    # if not is_partition(G, communities):
+    #     raise NotAPartition(G, communities)
+
+    directed = G.is_directed()
+    if directed:
+        out_degree = dict(G.out_degree(weight=weight))
+        in_degree = dict(G.in_degree(weight=weight))
+        m = sum(out_degree.values())
+        norm = 1 / m**2
+    else:
+        out_degree = in_degree = dict(G.degree(weight=weight))
+        deg_sum = sum(out_degree.values())
+        m = deg_sum / 2
+        norm = 1 / deg_sum**2
+
+    def community_contribution(community):
+        comm = set(community)
+        L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
+
+        out_degree_sum = sum(out_degree[u] for u in comm)
+        in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
+
+        return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
+
+    return sum(map(community_contribution, communities))
diff --git a/nx_arangodb/algorithms/link_analysis/__init__.py b/nx_arangodb/algorithms/link_analysis/__init__.py
@@ -0,0 +1 @@
+from .pagerank_alg import *