From 7e46c5f9b25c1c0c4866b556f5e16383b8eb08e1 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Sun, 28 Apr 2024 22:17:30 -0400 Subject: [PATCH] wip: nxadb-to-nxcg using the adapter for now... --- .../algorithms/centrality/betweenness.py | 13 ++- nx_arangodb/classes/graph.py | 82 +++++++++++++++++++ nx_arangodb/convert.py | 69 +++++++++++++++- 3 files changed, 154 insertions(+), 10 deletions(-) diff --git a/nx_arangodb/algorithms/centrality/betweenness.py b/nx_arangodb/algorithms/centrality/betweenness.py index c8bd0f1b..8b8b945b 100644 --- a/nx_arangodb/algorithms/centrality/betweenness.py +++ b/nx_arangodb/algorithms/centrality/betweenness.py @@ -1,11 +1,10 @@ from networkx.algorithms.centrality import betweenness as nx_betweenness -from nx_arangodb.convert import _to_graph as _to_nx_arangodb_graph +from nx_arangodb.convert import _to_nxadb_graph, _to_nxcg_graph from nx_arangodb.utils import networkx_algorithm try: import pylibcugraph as plc - from nx_cugraph.convert import _to_graph as _to_nx_cugraph_graph from nx_cugraph.utils import _seed_to_int GPU_ENABLED = True @@ -15,9 +14,9 @@ __all__ = ["betweenness_centrality"] -# 1. If GPU is enabled, call nx-cugraph bc() after converting to a nx_cugraph graph (in-memory graph) -# 2. If GPU is not enabled, call networkx bc() after converting to a networkx graph (in-memory graph) -# 3. If GPU is not enabled, call networkx bc() **without** converting to a networkx graph (remote graph) +# 1. If GPU is enabled, call nx-cugraph bc() after converting to an ncxg graph (in-memory graph) +# 2. If GPU is not enabled, call networkx bc() after converting to an nxadb graph (in-memory graph) +# 3. If GPU is not enabled, call networkx bc() **without** converting to a nxadb graph (remote graph) @networkx_algorithm( @@ -41,7 +40,7 @@ def betweenness_centrality( ) seed = _seed_to_int(seed) - G = _to_nx_cugraph_graph(G, weight) + G = _to_nxcg_graph(G, weight) node_ids, values = plc.betweenness_centrality( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(), @@ -58,7 +57,7 @@ def betweenness_centrality( else: print("ANTHONY: GPU is disabled. Using nx bc()") - G = _to_nx_arangodb_graph(G) + G = _to_nxadb_graph(G) betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G if k is None: diff --git a/nx_arangodb/classes/graph.py b/nx_arangodb/classes/graph.py index db495e36..5b5ebfa6 100644 --- a/nx_arangodb/classes/graph.py +++ b/nx_arangodb/classes/graph.py @@ -1,6 +1,9 @@ +import os from typing import ClassVar import networkx as nx +from arango import ArangoClient +from arango.database import StandardDatabase import nx_arangodb as nxadb @@ -16,3 +19,82 @@ class Graph(nx.Graph): @classmethod def to_networkx_class(cls) -> type[nx.Graph]: return nx.Graph + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + self.set_db() + + self.__graph_exists = False + if self.__db is not None: + self.set_graph_name() + + @property + def db(self) -> StandardDatabase: + if self.__db is None: + raise ValueError("Database not set") + + return self.__db + + @property + def graph_name(self) -> str: + if self.__graph_name is None: + raise ValueError("Graph name not set") + + return self.__graph_name + + @property + def graph_exists(self) -> bool: + return self.__graph_exists + + def set_db(self, db: StandardDatabase | None = None): + if db is not None: + if not isinstance(db, StandardDatabase): + raise TypeError( + "**db** must be an instance of arango.database.StandardDatabase" + ) + + self.__db = db + return + + host = os.getenv("DATABASE_HOST") + username = os.getenv("DATABASE_USERNAME") + password = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + # TODO: Raise a custom exception if any of the environment + # variables are missing. For now, we'll just set db to None. + if not all([host, username, password, db_name]): + self.__db = None + return + + self.__db = ArangoClient(host=host, request_timeout=None).db( + db_name, username, password, verify=True + ) + + def set_graph_name(self, graph_name: str | None = None): + if self.__db is None: + raise ValueError("Cannot set graph name without setting the database first") + + self.__graph_name = os.getenv("DATABASE_GRAPH_NAME") + if graph_name is not None: + if not isinstance(graph_name, str): + raise TypeError("**graph_name** must be a string") + + self.__graph_name = graph_name + + if self.__graph_name is None: + self.graph_exists = False + print("DATABASE_GRAPH_NAME environment variable not set") + + elif not self.db.has_graph(self.__graph_name): + self.graph_exists = False + print(f"Graph '{self.__graph_name}' does not exist in the database") + + else: + self.graph_exists = True + print(f"Found graph '{self.__graph_name}' in the database") diff --git a/nx_arangodb/convert.py b/nx_arangodb/convert.py index fae54ce4..9b9664f0 100644 --- a/nx_arangodb/convert.py +++ b/nx_arangodb/convert.py @@ -167,7 +167,7 @@ def to_networkx(G: nxadb.Graph, *, sort_edges: bool = False) -> nx.Graph: return G.to_networkx_class()(incoming_graph_data=G) -def _to_graph( +def _to_nxadb_graph( G, edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, @@ -188,7 +188,7 @@ def _to_graph( raise TypeError -def _to_directed_graph( +def _to_nxadb_directed_graph( G, edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, @@ -214,7 +214,7 @@ def _to_directed_graph( raise TypeError -def _to_undirected_graph( +def _to_nxadb_undirected_graph( G, edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, @@ -235,3 +235,66 @@ def _to_undirected_graph( ) # TODO: handle cugraph.Graph raise TypeError + + +try: + import nx_cugraph as nxcg + from adbnx_adapter import ADBNX_Adapter + + def _to_nxcg_graph( + G, + edge_attr: AttrKey | None = None, + edge_default: EdgeValue | None = 1, + edge_dtype: Dtype | None = None, + ) -> nxcg.Graph | nxcg.DiGraph: + """Ensure that input type is a nx_cugraph graph, and convert if necessary. + + Directed and undirected graphs are both allowed. + This is an internal utility function and may change or be removed. + """ + if isinstance(G, nxcg.Graph): + return G + if isinstance(G, nxadb.Graph): + # Assumption: G.adb_graph_name points to an existing graph in ArangoDB + # Therefore, the user wants us to pull the graph from ArangoDB, + # and convert it to an nx_cugraph graph. + # We currently accomplish this by using the NetworkX adapter for ArangoDB, + # which converts the ArangoDB graph to a NetworkX graph, and then we convert + # the NetworkX graph to an nx_cugraph graph. + # TODO: Implement a direct conversion from ArangoDB to nx_cugraph + if G.graph_exists: + adapter = ADBNX_Adapter(G.db) + nx_g = adapter.arangodb_graph_to_networkx( + G.graph_name, G.to_networkx_class()() + ) + + return nxcg.convert.from_networkx( + nx_g, + {edge_attr: edge_default} if edge_attr is not None else None, + edge_dtype, + ) + + # If G is a networkx graph, or is a nxadb graph that doesn't point to an "existing" + # ArangoDB graph, then we just treat it as a normal networkx graph & + # convert it to nx_cugraph. + # TODO: Need to revisit the "existing" ArangoDB graph condition... + if isinstance(G, nx.Graph): + return nxcg.convert.from_networkx( + G, + {edge_attr: edge_default} if edge_attr is not None else None, + edge_dtype, + ) + + # TODO: handle cugraph.Graph + raise TypeError + +except ModuleNotFoundError: + + def _to_nxcg_graph( + G, + edge_attr: AttrKey | None = None, + edge_default: EdgeValue | None = 1, + edge_dtype: Dtype | None = None, + ) -> nxadb.Graph: + m = "nx-cugraph is not installed; cannot convert to nx-cugraph graph" + raise NotImplementedError(m)