diff --git a/.github/disabled-workflows/docs.yaml b/.github/workflows/docs.yaml similarity index 80% rename from .github/disabled-workflows/docs.yaml rename to .github/workflows/docs.yaml index 0b19163f..7067b613 100644 --- a/.github/disabled-workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -20,7 +20,7 @@ jobs: python-version: '3.10' - name: Install dependencies - run: pip install .[dev] && pip install -r docs/requirements.txt + run: pip install .[dev] && pip install -r doc/requirements.txt - name: Generate Sphinx HTML - run: cd docs && make html \ No newline at end of file + run: cd doc && make html \ No newline at end of file diff --git a/.github/disabled-workflows/release.yml b/.github/workflows/release.yml similarity index 100% rename from .github/disabled-workflows/release.yml rename to .github/workflows/release.yml diff --git a/.gitignore b/.gitignore index 752223e5..52f3a861 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,15 @@ wheels/ *.egg MANIFEST +# Generated while building documentation. +doc/auto_examples +doc/modules +doc/generated +doc/algorithms/generated +doc/classes/generated +doc/readwrite/generated +doc/path.to.file + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. @@ -64,7 +73,7 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/_build/ # PyBuilder target/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 47b44e5a..06a9138a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,9 +11,9 @@ build: tools: python: "3.12" -# Build documentation in the "docs/" directory with Sphinx +# Build documentation in the "doc/" directory with Sphinx sphinx: - configuration: docs/conf.py + configuration: doc/conf.py fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub @@ -26,4 +26,4 @@ sphinx: # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: install: - - requirements: docs/requirements.txt \ No newline at end of file + - requirements: doc/requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index 211b1f09..14b2531d 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,23 @@
- NetworkX - ArangoDB - RAPIDS - NVIDIA + + NetworkX + + + ArangoDB + + + RAPIDS + + + NVIDIA +

-Open In Colab +Open In Colab [![CircleCI](https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main) [![CodeQL](https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml/badge.svg)](https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml) [![Docs](https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml/badge.svg)](https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml) @@ -44,7 +52,7 @@ Benefits of having ArangoDB as a backend to NetworkX include: 6. Access to efficient distribution of graph data ([ArangoDB SmartGraphs](https://docs.arangodb.com/stable/graphs/smartgraphs/)).

- +

@@ -169,7 +177,7 @@ nx.config.backends.arangodb.use_gpu = True ```

- +

diff --git a/docs/Makefile b/doc/Makefile similarity index 100% rename from docs/Makefile rename to doc/Makefile diff --git a/docs/_static/dispatch.png b/doc/_static/dispatch.png similarity index 100% rename from docs/_static/dispatch.png rename to doc/_static/dispatch.png diff --git a/docs/_static/nxadb.png b/doc/_static/nxadb.png similarity index 100% rename from docs/_static/nxadb.png rename to doc/_static/nxadb.png diff --git a/doc/algorithms/index.rst b/doc/algorithms/index.rst new file mode 100644 index 00000000..9adf6f7d --- /dev/null +++ b/doc/algorithms/index.rst @@ -0,0 +1,99 @@ +.. _algorithms: + +********** +Algorithms +********** + +As NetworkX-ArangoDB is primarily a **Storage Backend** to NetworkX, its primary focus is on persisting and reloading graphs from ArangoDB. + +However, running algorithms on the graph is also still possible. + +There are 3 ways to run algorithms on the graph: + +1. **NetworkX**: The traditional way of running algorithms on Graphs. +2. **NetworkX-cuGraph**: The GPU-accelerated way of running algorithms on Graphs. +3. **ArangoDB**: The database way of running algorithms on Graphs. + +Currently, Options 1 & 2 are supported, whereas Option 3 is a work-in-progress. + +Running algorithms with Option 2 requires ``nx-cugraph`` to be installed on a system with a compatible GPU: + +.. code-block:: + + pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com + +When running algorithms with Option 2, the graph is converted to a ``nx-cugraph`` graph, and the algorithm is run on the GPU. + +This is only possible if ``nx-cugraph`` has implemented the algorithm you want to run. + +- For a list of algorithms that are supported by ``nx-cugraph``, refer to the `nx-cugraph README `_. +- For a list of algorithms that are supported by ``networkx``, refer to the `NetworkX Documentation `_. + +``nx-arangodb`` will automatically dispatch algorithm calls to either CPU or GPU based on if ``nx-cugraph`` is installed. We rely on a rust-based library called `phenolrs `_ to retrieve ArangoDB Graphs as fast as possible. + +You can also force-run algorithms on CPU even if ``nx-cugraph`` is installed: + +.. code-block:: python + + import os + import networkx as nx + import nx_arangodb as nxadb + + # os.environ ... + + G = nxadb.Graph(name="MyGraph") + + nx.config.backends.arangodb.use_gpu = False + + nx.pagerank(G) + nx.betweenness_centrality(G) + # ... + + nx.config.backends.arangodb.use_gpu = True + + +.. image:: ../_static/dispatch.png + :align: center + :alt: nx-arangodb dispatching + :height: 200px + + +**Tip**: If you're running multiple CPU algorithms, it's recommended to rely on invoking ``nxadb.convert.nxadb_to_nx`` to convert the graph to a NetworkX Graph before running the algorithms. +This is because we currently load the entire graph into memory before running *each* algorithm, which can be slow for large graphs. + +.. code-block:: python + + import networkx as nx + import nx_arangodb as nxadb + + G_adb = nxadb.Graph(name="MyGraph") + + G_nx = nxadb.convert.nxadb_to_nx(G) + + nx.pagerank(G_nx) + nx.betweenness_centrality(G_nx) + # ... + + +**Option 3** + +This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs. +The goal is to provide a set of algorithms that can be delegated to the server for processing, +rather than having to pull all the data to the client and process it there. + +Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`. +This is simply to demonstrate the potential of the module and to provide a starting point for further development. + +.. code-block:: python + + import os + import networkx as nx + from nx_arangodb as nxadb + + # os.environ ... + + G = nxadb.Graph(name="MyGraph") + + nx.pagerank(G) # Runs on the client + nx.shortest_path(G, source="A", target="B") # Runs on the DB server + nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client diff --git a/doc/classes/digraph.rst b/doc/classes/digraph.rst new file mode 100644 index 00000000..c1b03c11 --- /dev/null +++ b/doc/classes/digraph.rst @@ -0,0 +1,89 @@ +.. _digraph: + +======= +DiGraph +======= + +Overview +======== +.. currentmodule:: nx_arangodb +.. autoclass:: DiGraph + :members: query, chat + + +Methods +======= + +Adding and removing nodes and edges +----------------------------------- + +.. autosummary:: + :toctree: generated/ + + DiGraph.__init__ + DiGraph.add_node + DiGraph.add_nodes_from + DiGraph.remove_node + DiGraph.remove_nodes_from + DiGraph.add_edge + DiGraph.add_edges_from + DiGraph.add_weighted_edges_from + DiGraph.remove_edge + DiGraph.remove_edges_from + DiGraph.update + DiGraph.clear + DiGraph.clear_edges + + + +Reporting nodes edges and neighbors +----------------------------------- +.. autosummary:: + :toctree: generated/ + + DiGraph.nodes + DiGraph.__iter__ + DiGraph.has_node + DiGraph.__contains__ + DiGraph.edges + DiGraph.out_edges + DiGraph.in_edges + DiGraph.has_edge + DiGraph.get_edge_data + DiGraph.neighbors + DiGraph.adj + DiGraph.__getitem__ + DiGraph.successors + DiGraph.succ + DiGraph.predecessors + DiGraph.pred + DiGraph.adjacency + DiGraph.nbunch_iter + + +Counting nodes edges and neighbors +---------------------------------- +.. autosummary:: + :toctree: generated/ + + DiGraph.order + DiGraph.number_of_nodes + DiGraph.__len__ + DiGraph.degree + DiGraph.in_degree + DiGraph.out_degree + DiGraph.size + DiGraph.number_of_edges + + +Making copies and subgraphs +--------------------------- +.. autosummary:: + :toctree: generated/ + + DiGraph.copy + DiGraph.to_undirected + DiGraph.to_directed + DiGraph.subgraph + DiGraph.edge_subgraph + DiGraph.reverse diff --git a/doc/classes/graph.rst b/doc/classes/graph.rst new file mode 100644 index 00000000..870d975c --- /dev/null +++ b/doc/classes/graph.rst @@ -0,0 +1,81 @@ +.. _graph: + +===== +Graph +===== + +Overview +======== +.. currentmodule:: nx_arangodb +.. autoclass:: Graph + :members: query, chat + + +Methods +======= + +Adding and removing nodes and edges +----------------------------------- + +.. autosummary:: + :toctree: generated/ + + Graph.__init__ + Graph.add_node + Graph.add_nodes_from + Graph.remove_node + Graph.remove_nodes_from + Graph.add_edge + Graph.add_edges_from + Graph.add_weighted_edges_from + Graph.remove_edge + Graph.remove_edges_from + Graph.update + Graph.clear + Graph.clear_edges + + + +Reporting nodes edges and neighbors +----------------------------------- +.. autosummary:: + :toctree: generated/ + + Graph.nodes + Graph.__iter__ + Graph.has_node + Graph.__contains__ + Graph.edges + Graph.has_edge + Graph.get_edge_data + Graph.neighbors + Graph.adj + Graph.__getitem__ + Graph.adjacency + Graph.nbunch_iter + + + +Counting nodes edges and neighbors +---------------------------------- +.. autosummary:: + :toctree: generated/ + + Graph.order + Graph.number_of_nodes + Graph.__len__ + Graph.degree + Graph.size + Graph.number_of_edges + + +Making copies and subgraphs +--------------------------- +.. autosummary:: + :toctree: generated/ + + Graph.copy + Graph.to_undirected + Graph.to_directed + Graph.subgraph + Graph.edge_subgraph diff --git a/doc/classes/index.rst b/doc/classes/index.rst new file mode 100644 index 00000000..b5765295 --- /dev/null +++ b/doc/classes/index.rst @@ -0,0 +1,35 @@ +.. _classes: + +****** +Graphs +****** + +NetworkX provides data structures and methods for storing graphs. + +All NetworkX graph classes allow (hashable) Python objects as nodes +and any Python object can be assigned as an edge attribute. + +The choice of graph class depends on the structure of the +graph you want to represent. + +**Which graph class should I use?** + ++----------------+------------+--------------------+------------------------+ +| Networkx Class | Type | Self-loops allowed | Parallel edges allowed | ++================+============+====================+========================+ +| Graph | undirected | Yes | No | ++----------------+------------+--------------------+------------------------+ +| DiGraph | directed | Yes | No | ++----------------+------------+--------------------+------------------------+ +| MultiGraph | undirected | Yes | Yes | ++----------------+------------+--------------------+------------------------+ +| MultiDiGraph | directed | Yes | Yes | ++----------------+------------+--------------------+------------------------+ + +.. toctree:: + :maxdepth: 1 + + graph + digraph + multigraph + multidigraph diff --git a/doc/classes/multidigraph.rst b/doc/classes/multidigraph.rst new file mode 100644 index 00000000..f62af3fa --- /dev/null +++ b/doc/classes/multidigraph.rst @@ -0,0 +1,90 @@ +.. _multidigraph: + + +============ +MultiDiGraph +============ + +Overview +======== +.. currentmodule:: nx_arangodb +.. autoclass:: MultiDiGraph + :members: query, chat + + +Methods +======= + +Adding and Removing Nodes and Edges +----------------------------------- + +.. autosummary:: + :toctree: generated/ + + MultiDiGraph.__init__ + MultiDiGraph.add_node + MultiDiGraph.add_nodes_from + MultiDiGraph.remove_node + MultiDiGraph.remove_nodes_from + MultiDiGraph.add_edge + MultiDiGraph.add_edges_from + MultiDiGraph.add_weighted_edges_from + MultiDiGraph.new_edge_key + MultiDiGraph.remove_edge + MultiDiGraph.remove_edges_from + MultiDiGraph.update + MultiDiGraph.clear + MultiDiGraph.clear_edges + + + +Reporting nodes edges and neighbors +----------------------------------- +.. autosummary:: + :toctree: generated/ + + MultiDiGraph.nodes + MultiDiGraph.__iter__ + MultiDiGraph.has_node + MultiDiGraph.__contains__ + MultiDiGraph.edges + MultiDiGraph.out_edges + MultiDiGraph.in_edges + MultiDiGraph.has_edge + MultiDiGraph.get_edge_data + MultiDiGraph.neighbors + MultiDiGraph.adj + MultiDiGraph.__getitem__ + MultiDiGraph.successors + MultiDiGraph.succ + MultiDiGraph.predecessors + MultiDiGraph.pred + MultiDiGraph.adjacency + MultiDiGraph.nbunch_iter + + +Counting nodes edges and neighbors +---------------------------------- +.. autosummary:: + :toctree: generated/ + + MultiDiGraph.order + MultiDiGraph.number_of_nodes + MultiDiGraph.__len__ + MultiDiGraph.degree + MultiDiGraph.in_degree + MultiDiGraph.out_degree + MultiDiGraph.size + MultiDiGraph.number_of_edges + +Making copies and subgraphs +--------------------------- +.. autosummary:: + :toctree: generated/ + + MultiDiGraph.copy + MultiDiGraph.to_undirected + MultiDiGraph.to_directed + MultiDiGraph.subgraph + MultiDiGraph.edge_subgraph + MultiDiGraph.reverse diff --git a/doc/classes/multigraph.rst b/doc/classes/multigraph.rst new file mode 100644 index 00000000..2088d7a6 --- /dev/null +++ b/doc/classes/multigraph.rst @@ -0,0 +1,81 @@ +.. _multigraph: + +========== +MultiGraph +========== + +Overview +======== +.. currentmodule:: nx_arangodb +.. autoclass:: MultiGraph + :members: query, chat + +Methods +======= + +Adding and removing nodes and edges +----------------------------------- + +.. autosummary:: + :toctree: generated/ + + MultiGraph.__init__ + MultiGraph.add_node + MultiGraph.add_nodes_from + MultiGraph.remove_node + MultiGraph.remove_nodes_from + MultiGraph.add_edge + MultiGraph.add_edges_from + MultiGraph.add_weighted_edges_from + MultiGraph.new_edge_key + MultiGraph.remove_edge + MultiGraph.remove_edges_from + MultiGraph.update + MultiGraph.clear + MultiGraph.clear_edges + + + +Reporting nodes edges and neighbors +----------------------------------- +.. autosummary:: + :toctree: generated/ + + MultiGraph.nodes + MultiGraph.__iter__ + MultiGraph.has_node + MultiGraph.__contains__ + MultiGraph.edges + MultiGraph.has_edge + MultiGraph.get_edge_data + MultiGraph.neighbors + MultiGraph.adj + MultiGraph.__getitem__ + MultiGraph.adjacency + MultiGraph.nbunch_iter + + + +Counting nodes edges and neighbors +---------------------------------- +.. autosummary:: + :toctree: generated/ + + MultiGraph.order + MultiGraph.number_of_nodes + MultiGraph.__len__ + MultiGraph.degree + MultiGraph.size + MultiGraph.number_of_edges + + +Making copies and subgraphs +--------------------------- +.. autosummary:: + :toctree: generated/ + + MultiGraph.copy + MultiGraph.to_undirected + MultiGraph.to_directed + MultiGraph.subgraph + MultiGraph.edge_subgraph diff --git a/docs/conf.py b/doc/conf.py similarity index 86% rename from docs/conf.py rename to doc/conf.py index 6f05d9a7..fe5250b8 100644 --- a/docs/conf.py +++ b/doc/conf.py @@ -22,6 +22,8 @@ "sphinx_rtd_theme", "sphinx.ext.autodoc", "sphinx.ext.viewcode", + "sphinx.ext.autosummary", + "sphinx.ext.inheritance_diagram", ] templates_path = ['_templates'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] @@ -32,4 +34,6 @@ html_theme = 'sphinx_rtd_theme' html_static_path = ['_static'] -autodoc_member_order = "bysource" \ No newline at end of file +autodoc_member_order = "bysource" +autodoc_inherit_docstrings = True +autosummary_generate = True diff --git a/doc/dict/adj.rst b/doc/dict/adj.rst new file mode 100644 index 00000000..e0735fc8 --- /dev/null +++ b/doc/dict/adj.rst @@ -0,0 +1,18 @@ +.. _adj: + +========= +Adjacency +========= + + +.. currentmodule:: nx_arangodb.classes.dict.adj +.. autoclass:: AdjListOuterDict + +.. currentmodule:: nx_arangodb.classes.dict.adj +.. autoclass:: AdjListInnerDict + +.. currentmodule:: nx_arangodb.classes.dict.adj +.. autoclass:: EdgeKeyDict + +.. currentmodule:: nx_arangodb.classes.dict.adj +.. autoclass:: EdgeAttrDict \ No newline at end of file diff --git a/doc/dict/graph.rst b/doc/dict/graph.rst new file mode 100644 index 00000000..7444012d --- /dev/null +++ b/doc/dict/graph.rst @@ -0,0 +1,12 @@ +.. _graph: + +===== +Graph +===== + + +.. currentmodule:: nx_arangodb.classes.dict.graph +.. autoclass:: GraphDict + +.. currentmodule:: nx_arangodb.classes.dict.graph +.. autoclass:: GraphAttrDict \ No newline at end of file diff --git a/doc/dict/index.rst b/doc/dict/index.rst new file mode 100644 index 00000000..6172f236 --- /dev/null +++ b/doc/dict/index.rst @@ -0,0 +1,41 @@ +.. _dict: + +************ +Dictionaries +************ + +The ``dict`` module provides a set of ``UserDict``-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database. + +NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions: + +1. ``node_dict_factory`` +2. ``node_attr_dict_factory`` +3. ``adjlist_outer_dict_factory`` +4. ``adjlist_inner_dict_factory`` +5. ``edge_key_dict_factory`` (Only for MultiGraphs) +6. ``edge_attr_dict_factory`` +7. ``graph_attr_dict_factory`` + +These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself. + +This module contains the following classes: + +1. ``NodeDict`` +2. ``NodeAttrDict`` +3. ``AdjListOuterDict`` +4. ``AdjListInnerDict`` +5. ``EdgeKeyDict`` +6. ``EdgeAttrDict`` +7. ``GraphDict`` +8. ``GraphAttrDict`` + +Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the ``UserDict`` class. + +By using this strategy in addition to subclassing the ``nx.Graph`` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support. + +.. toctree:: + :maxdepth: 1 + + adj + node + graph diff --git a/doc/dict/node.rst b/doc/dict/node.rst new file mode 100644 index 00000000..2b1f061d --- /dev/null +++ b/doc/dict/node.rst @@ -0,0 +1,12 @@ +.. _node: + +==== +Node +==== + + +.. currentmodule:: nx_arangodb.classes.dict.node +.. autoclass:: NodeDict + +.. currentmodule:: nx_arangodb.classes.dict.node +.. autoclass:: NodeAttrDict diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 00000000..ebaff36d --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,115 @@ +nx-arangodb +============ + +.. raw:: html + +
+ + NetworkX + + + ArangoDB + + + RAPIDS + + + NVIDIA + +
+ +.. raw:: html + +
+ +.. image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/github/arangodb/nx-arangodb/blob/main/doc/nx_arangodb.ipynb + :alt: Open In Colab + +.. image:: https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg + :target: https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main + :alt: CircleCI + +.. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml/badge.svg + :target: https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml + :alt: CodeQL + +.. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml/badge.svg + :target: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml + :alt: Docs + +.. image:: https://img.shields.io/pypi/v/nx-arangodb?color=3775A9&style=for-the-badge&logo=pypi&logoColor=FFD43B + :target: https://pypi.org/project/nx-arangodb/ + :alt: PyPI version badge + +.. image:: https://img.shields.io/badge/3.10%2B-3776AB?style=for-the-badge&logo=python&logoColor=FFD43B&label=Python + :target: https://pypi.org/project/nx-arangodb/ + :alt: Python versions badge + +.. image:: https://img.shields.io/static/v1?style=for-the-badge&label=code%20style&message=black&color=black + :target: https://github.com/psf/black + :alt: Code style: black + +.. image:: https://img.shields.io/pepy/dt/nx-arangodb?style=for-the-badge&color=282661 + :target: https://pepy.tech/project/nx-arangodb + :alt: Downloads + +This is a `backend to NetworkX `_ that offers `ArangoDB `_ as a `Persistence Layer to NetworkX Graphs `_: + +1. Persist NetworkX Graphs to ArangoDB. +2. Reload NetworkX Graphs from ArangoDB. +3. Perform CRUD on ArangoDB Graphs via NetworkX. +4. Run algorithms (CPU & GPU) on ArangoDB Graphs via NetworkX. + +Benefits of having ArangoDB as a backend to NetworkX include: + +1. No need to re-create the graph every time you start a new session. +2. Access to GPU-accelerated graph analytics (`nx-cugraph `_). +3. Access to a database query language (`Arango Query Language `_). +4. Access to a visual interface for graph exploration (`ArangoDB Web UI `_). +5. Access to cross-collaboration on the same graph (`ArangoDB Cloud `_). +6. Access to efficient distribution of graph data (`ArangoDB SmartGraphs `_). + +.. image:: ./_static/nxadb.png + :align: center + :alt: nx-arangodb Diagram + :height: 200px + +Requirements +------------ +- Python 3.10+ +- NetworkX 3.0+ +- ArangoDB 3.10+ + +Installation +------------ + +Latest Release + +.. code-block:: + + pip install nx-arangodb + +Current State + +.. code-block:: + + pip install git+https://github.com/arangodb/nx-arangodb + +Contents +-------- + +The UX of NetworkX-ArangoDB is similar to that of NetworkX, but with the +added functionality of persisting graphs to ArangoDB. For an understanding +of how to use NetworkX, refer to the `NetworkX Documentation `_. + +Expect documentation to grow over time: + +.. toctree:: + :maxdepth: 2 + + quickstart + classes/index + dict/index + algorithms/index + views/index \ No newline at end of file diff --git a/docs/make.bat b/doc/make.bat similarity index 100% rename from docs/make.bat rename to doc/make.bat diff --git a/docs/nx_arangodb.ipynb b/doc/nx_arangodb.ipynb similarity index 100% rename from docs/nx_arangodb.ipynb rename to doc/nx_arangodb.ipynb diff --git a/doc/quickstart.rst b/doc/quickstart.rst new file mode 100644 index 00000000..f8bf628d --- /dev/null +++ b/doc/quickstart.rst @@ -0,0 +1,105 @@ +Quickstart +========== + +1. Set up ArangoDB +2. Set environment variables +3. Instantiate a NetworkX-ArangoDB Graph + +1. Set up ArangoDB +------------------ + +**Option A: Local Instance via Docker** + +Appears on ``localhost:8529`` with the user ``root`` & password ``openSesame``. + +More info: `arangodb.com/download-major `_. + +.. code-block:: bash + + docker run -e ARANGO_ROOT_PASSWORD=openSesame -p 8529:8529 arangodb/arangodb + +**Option B: ArangoDB Cloud Trial** + +`ArangoGraph `_ is ArangoDB's Cloud offering to use ArangoDB as a managed service. + +A 14-day trial is available upon sign up. + +**Option C: Temporary Cloud Instance via Python** + +A temporary cloud database can be provisioned using the `adb-cloud-connector `_ Python package. + +.. code-block:: bash + + pip install adb-cloud-connector + +.. code-block:: python + + from adb_cloud_connector import get_temp_credentials + + credentials = get_temp_credentials() + + print(credentials) + +2. Set environment variables +---------------------------- + +Connecting to ArangoDB requires the following environment variables: + +1. ``DATABASE_HOST``: The host URL of the ArangoDB instance. +2. ``DATABASE_USERNAME``: The username to connect to the ArangoDB instance. +3. ``DATABASE_PASSWORD``: The password to connect to the ArangoDB instance. +4. ``DATABASE_NAME``: The name of the database to connect to. + +For example, using Option 1 from above: + +.. code-block:: bash + + export DATABASE_HOST=http://localhost:8529 + export DATABASE_USERNAME=root + export DATABASE_PASSWORD=openSesame + export DATABASE_NAME=_system + +Or using Option 3 from above: + +.. code-block:: python + + import os + from adb_cloud_connector import get_temp_credentials + + credentials = get_temp_credentials() + + os.environ["DATABASE_HOST"] = credentials["url"] + os.environ["DATABASE_USERNAME"] = credentials["username"] + os.environ["DATABASE_PASSWORD"] = credentials["password"] + os.environ["DATABASE_NAME"] = credentials["dbName"] + +3. Instantiate a NetworkX-ArangoDB Graph +---------------------------------------- + +Instantiating a NetworkX-ArangoDB Graph is similar to instantiating a NetworkX Graph. + +Providing the ``name`` parameter will create a new graph in ArangoDB if it does not already exist. + +Providing the ``incoming_graph_data`` in combination with the ``name`` parameter will create a new graph in ArangoDB +with the provided data. If the graph already exists, an error will be raised. + +.. code-block:: python + + import networkx as nx + import nx_arangodb as nxadb + + G = nxadb.Graph(name="MyGraph") # New ArangoDB Graph + G2 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph()) # Regular NetworkX Graph + G3 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph(), name="KarateGraph") # New ArangoDB Graph + +From here, you can use the conventional NetworkX API to interact with the graph. + +Assuming you already have a graph in ArangoDB named `MyGraph`, you can reload it as follows: + +.. code-block:: python + + import nx_arangodb as nxadb + + G = nxadb.Graph(name="MyGraph") + + print(G.number_of_nodes(), G.number_of_edges()) diff --git a/docs/requirements.txt b/doc/requirements.txt similarity index 100% rename from docs/requirements.txt rename to doc/requirements.txt diff --git a/doc/views/coreviews.rst b/doc/views/coreviews.rst new file mode 100644 index 00000000..0f6f06cb --- /dev/null +++ b/doc/views/coreviews.rst @@ -0,0 +1,14 @@ +.. _coreviews: + +========= +Coreviews +========= + + +.. currentmodule:: nx_arangodb.classes.coreviews +.. autoclass:: ArangoAdjacencyView + :members: + +.. currentmodule:: nx_arangodb.classes.coreviews +.. autoclass:: ArangoAtlasView + :members: diff --git a/doc/views/index.rst b/doc/views/index.rst new file mode 100644 index 00000000..74c3adb3 --- /dev/null +++ b/doc/views/index.rst @@ -0,0 +1,33 @@ +.. _views: + +************** +ArangoDB Views +************** + +Having a database as a backend to NetworkX allows us to delegate +certain operations to the database. + +This can be applied to the concept of NetworkX Views. + +Below are a set of experimental overrides of the NetworkX Views that represent the +nodes and edges of the graph. Overriding these classes allows us to +implement custom logic for data filtering and updating in the database. + +These classes are a work-in-progress. The main goal is to try +to delegate data processing to ArangoDB, whenever possible. + +To use these experimental views, you must set **use_arango_views=True** +when creating a new graph object: + +.. code-block:: python + + import nx_arangodb as nxadb + + G = nxadb.Graph(name="MyGraph", use_arango_views=True) + + +.. toctree:: + :maxdepth: 1 + + coreviews + reportviews \ No newline at end of file diff --git a/doc/views/reportviews.rst b/doc/views/reportviews.rst new file mode 100644 index 00000000..bf506539 --- /dev/null +++ b/doc/views/reportviews.rst @@ -0,0 +1,22 @@ +.. _reportviews: + +=========== +Reportviews +=========== + + +.. currentmodule:: nx_arangodb.classes.reportviews +.. autoclass:: ArangoNodeView + :members: + +.. currentmodule:: nx_arangodb.classes.reportviews +.. autoclass:: ArangoNodeDataView + :members: + +.. currentmodule:: nx_arangodb.classes.reportviews +.. autoclass:: ArangoEdgeView + :members: + +.. currentmodule:: nx_arangodb.classes.reportviews +.. autoclass:: ArangoEdgeDataView + :members: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 5e1c309d..00000000 --- a/docs/index.rst +++ /dev/null @@ -1 +0,0 @@ -Hello World \ No newline at end of file diff --git a/nx_arangodb/algorithms/README.md b/nx_arangodb/algorithms/README.md new file mode 100644 index 00000000..02d78c15 --- /dev/null +++ b/nx_arangodb/algorithms/README.md @@ -0,0 +1,21 @@ +# algorithms + +This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs. The goal is to provide a set of algorithms that can be delegated to the server for processing, rather than having to pull all the data to the client and process it there. + +Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`. This is simply to demonstrate the potential of the module and to provide a starting point for further development. + +```python +import os +import networkx as nx +from nx_arangodb as nxadb + +# os.environ ... + +G = nxadb.Graph(name="MyGraph") + +nx.pagerank(G) # Runs on the client +nx.shortest_path(G, source="A", target="B") # Runs on the DB server +nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client +``` + +As ArangoDB continues to grow its Graph Analytics capabilities, this module will be updated to take advantage of those features. Stay tuned! \ No newline at end of file diff --git a/nx_arangodb/algorithms/shortest_paths/generic.py b/nx_arangodb/algorithms/shortest_paths/generic.py index 7328b257..0cec3ea2 100644 --- a/nx_arangodb/algorithms/shortest_paths/generic.py +++ b/nx_arangodb/algorithms/shortest_paths/generic.py @@ -4,7 +4,6 @@ import networkx as nx import nx_arangodb as nxadb -from nx_arangodb.exceptions import ShortestPathError from nx_arangodb.utils import _dtype_param, networkx_algorithm __all__ = ["shortest_path"] @@ -22,19 +21,66 @@ def shortest_path( *, dtype=None, ): - """limited version of nx.shortest_path""" + """A server-side implementation of the nx.shortest_path algorithm. - if not G.graph_exists_in_db: + This algorithm will invoke the original NetworkX algorithm if one + of the following conditions is met: + - The graph is not stored in the database. + - The method is not 'dijkstra'. + - The target or source is not specified. + + Parameters + ---------- + G : NetworkX graph + + source : node, optional + Starting node for path. If not specified, compute shortest + paths for each possible starting node. + + target : node, optional + Ending node for path. If not specified, compute shortest + paths to all possible nodes. + + weight : None, string or function, optional (default = None) + If None, every edge has weight/distance/cost 1. + If a string, use this edge attribute as the edge weight. + Any edge attribute not present defaults to 1. + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly + three positional arguments: the two endpoints of an edge and + the dictionary of edge attributes for that edge. + The function must return a number. + + method : string, optional (default = 'dijkstra') + The algorithm to use to compute the path. + Supported options: 'dijkstra', 'bellman-ford'. + Other inputs produce a ValueError. + If `weight` is None, unweighted graph methods are used, and this + suggestion is ignored. + + Returns + ------- + path : list + List of nodes in a shortest path. + + Raises + ------ + NodeNotFound + If `source` is not in `G`. + + ValueError + If `method` is not among the supported options. + """ + + graph_does_not_exist = not G.graph_exists_in_db + target_or_source_not_specified = target is None or source is None + method_not_dijkstra = method != "dijkstra" + + if any([graph_does_not_exist, target_or_source_not_specified, method_not_dijkstra]): return nx.shortest_path.orig_func( G, source=source, target=target, weight=weight, method=method ) - if target is None or source is None: - raise NotImplementedError("Both source and target must be specified for now") - - if method != "dijkstra": - raise NotImplementedError("Only dijkstra method is supported") - if isinstance(source, int): source = G.nodes[source]["_id"] diff --git a/nx_arangodb/classes/coreviews.py b/nx_arangodb/classes/coreviews.py index 794a648e..0df35fcc 100644 --- a/nx_arangodb/classes/coreviews.py +++ b/nx_arangodb/classes/coreviews.py @@ -1,16 +1,73 @@ +"""Experimental overrides of the NetworkX Views that represent the +core data structures such as nested Mappings (e.g. dict-of-dicts). + +Overriding these classes allows us to implement custom logic for +data filtering and updating in the database, instead of in Python. + +These classes are a work-in-progress. The main goal is to try +to delegate data processing to ArangoDB, whenever possible. + +To use these experimental views, you must set **use_arango_views=True** +when creating a new graph object: +>>> G = nxadb.Graph(name="MyGraph", use_arango_views=True) +""" + import networkx as nx -class CustomAdjacencyView(nx.classes.coreviews.AdjacencyView): +class ArangoAdjacencyView(nx.classes.coreviews.AdjacencyView): + """The ArangoAdjacencyView class is an experimental subclass of + the AdjacencyView class. + + Contrary to the original AdjacencyView class, the ArangoAdjacencyView + is writable to allow for bulk updates to the graph in the DB. + """ def update(self, data): + """Update a set of edges within the graph. + + The benefit of this method is that it allows for bulk API updates, + as opposed to `G.add_edges_from`, which currently makes + one API request per edge. + + Example + ------- + >>> G = nxadb.Graph(name="MyGraph") + >>> G.adj.update( + { + 'node/1': { + 'node/2': {"node_to_node/1", "foo": "bar"}, + 'node/3': {"node_to_node/2", "foo": "baz"}, + ... + }, + ... + }) + """ return self._atlas.update(data) def __getitem__(self, name): - return CustomAtlasView(self._atlas[name]) + return ArangoAtlasView(self._atlas[name]) -class CustomAtlasView(nx.classes.coreviews.AtlasView): +class ArangoAtlasView(nx.classes.coreviews.AtlasView): + """The ArangoAtlasView class is an experimental subclass of the + AtlasView class. + + Contrary to the original AtlasView class, the ArangoAtlasView is + writable to allow for bulk updates to the graph in the DB. + """ def update(self, data): + """Update a set of edges within the graph for a specific node. + + Example + ------- + >>> G = nxadb.Graph(name="MyGraph") + >>> G.adj['node/1'].update( + { + 'node/2': {"node_to_node/1", "foo": "bar"}, + 'node/3': {"node_to_node/2", "foo": "baz"}, + ... + }) + """ return self._atlas.update(data) diff --git a/nx_arangodb/classes/dict/README.md b/nx_arangodb/classes/dict/README.md new file mode 100644 index 00000000..289509fd --- /dev/null +++ b/nx_arangodb/classes/dict/README.md @@ -0,0 +1,30 @@ +# dict + +The `dict` module provides a set of `UserDict`-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database. + +NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions: + +1. `node_dict_factory` +2. `node_attr_dict_factory` +3. `adjlist_outer_dict_factory` +4. `adjlist_inner_dict_factory` +5. `edge_key_dict_factory` (Only for MultiGraphs) +5. `edge_attr_dict_factory` +6. `graph_attr_dict_factory` + +These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself. + +This module contains the following classes: + +1. `NodeDict` +2. `NodeAttrDict` +3. `AdjListOuterDict` +4. `AdjListInnerDict` +5. `EdgeKeyDict` +6. `EdgeAttrDict` +7. `GraphDict` +8. `GraphAttrDict` + +Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the `UserDict` class. + +By using this strategy in addition to subclassing the `nx.Graph` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support. \ No newline at end of file diff --git a/nx_arangodb/classes/dict/adj.py b/nx_arangodb/classes/dict/adj.py index a97791c9..71268975 100644 --- a/nx_arangodb/classes/dict/adj.py +++ b/nx_arangodb/classes/dict/adj.py @@ -4,7 +4,7 @@ from collections import UserDict from collections.abc import Iterator from itertools import islice -from typing import Any, Callable, Dict, List +from typing import Any, Callable, Dict, List, Union from arango.database import StandardDatabase from arango.exceptions import DocumentDeleteError @@ -32,7 +32,7 @@ aql_edge_get, aql_edge_id, aql_fetch_data_edge, - check_list_for_errors, + check_update_list_for_errors, doc_insert, doc_update, edge_get, @@ -52,6 +52,8 @@ upsert_collection_edges, ) +AdjDict = Union[GraphAdjDict, DiGraphAdjDict, MultiGraphAdjDict, MultiDiGraphAdjDict] + ############# # Factories # ############# @@ -60,6 +62,7 @@ def edge_attr_dict_factory( db: StandardDatabase, graph: Graph ) -> Callable[..., EdgeAttrDict]: + """Factory function for creating an EdgeAttrDict.""" return lambda: EdgeAttrDict(db, graph) @@ -71,6 +74,7 @@ def edge_key_dict_factory( is_directed: bool, adjlist_inner_dict: AdjListInnerDict | None = None, ) -> Callable[..., EdgeKeyDict]: + """Factory function for creating an EdgeKeyDict.""" return lambda: EdgeKeyDict( db, graph, edge_type_key, edge_type_func, is_directed, adjlist_inner_dict ) @@ -85,6 +89,7 @@ def adjlist_inner_dict_factory( graph_type: str, adjlist_outer_dict: AdjListOuterDict | None = None, ) -> Callable[..., AdjListInnerDict]: + """Factory function for creating an AdjListInnerDict.""" return lambda: AdjListInnerDict( db, graph, @@ -105,6 +110,7 @@ def adjlist_outer_dict_factory( graph_type: str, symmetrize_edges_if_directed: bool, ) -> Callable[..., AdjListOuterDict]: + """Factory function for creating an AdjListOuterDict.""" return lambda: AdjListOuterDict( db, graph, @@ -129,10 +135,17 @@ def build_edge_attr_dict_data( It's possible that **value** is a nested dict, so we need to recursively build a EdgeAttrDict for each nested dict. - :param parent: The parent EdgeAttrDict. - :type parent: EdgeAttrDict - :param data: The data to build the EdgeAttrDict from. - :type data: dict[str, Any] + Parameters + ---------- + parent : EdgeAttrDict + The parent EdgeAttrDict. + data : dict[str, Any] + The data to build the EdgeAttrDict from. + + Returns + ------- + dict[str, Any | EdgeAttrDict] + The data for the new EdgeAttrDict. """ edge_attr_dict_data = {} for key, value in data.items(): @@ -143,6 +156,25 @@ def build_edge_attr_dict_data( def process_edge_attr_dict_value(parent: EdgeAttrDict, key: str, value: Any) -> Any: + """Process the value of a particular key in an EdgeAttrDict. + + If the value is a dict, then we need to recursively build an EdgeAttrDict. + Otherwise, we return the value as is. + + Parameters + ---------- + parent : EdgeAttrDict + The parent EdgeAttrDict. + key : str + The key of the value. + value : Any + The value to process. + + Returns + ------- + Any + The processed value. + """ if not isinstance(value, dict): return value @@ -161,10 +193,20 @@ class EdgeAttrDict(UserDict[str, Any]): EdgeAttrDict is keyed by the edge attribute key. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + Examples + -------- + >>> g = nxadb.Graph(name="MyGraph") + >>> g.add_edge("node/1", "node/2", foo="bar") + >>> g["node/1"]["node/2"] + EdgeAttrDict({'foo': 'bar', '_key': ..., '_id': ...}) """ def __init__( @@ -179,7 +221,7 @@ def __init__( self.db = db self.graph = graph - self.edge_id: str | None = None + self.edge_id: str | None = None # established in __setitem__ # EdgeAttrDict may be a child of another EdgeAttrDict # e.g G._adj['node/1']['node/2']['object']['foo'] = 'bar' @@ -191,7 +233,10 @@ def clear(self) -> None: raise NotImplementedError("Cannot clear EdgeAttrDict") def copy(self) -> Any: - return self.data.copy() + return { + key: value.copy() if hasattr(value, "copy") else value + for key, value in self.data.items() + } @key_is_string def __contains__(self, key: str) -> bool: @@ -275,10 +320,33 @@ class EdgeKeyDict(UserDict[str, EdgeAttrDict]): - keys must be ArangoDB Edge IDs - key-to-edge mapping is 1-to-1 - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + edge_type_key : str + The key used to store the edge type in the edge attribute dictionary. + + edge_type_func : Callable[[str, str], str] + The function to generate the edge type from the source and + destination node types. + + is_directed : bool + Whether the graph is directed or not. + + adjlist_inner_dict : AdjListInnerDict | None + The parent AdjListInnerDict. + + Examples + -------- + >>> g = nxadb.MultiGraph(name="MyGraph") + >>> edge_id = g.add_edge("node/1", "node/2", foo="bar") + >>> g["node/1"]["node/2"][edge_id] + EdgeAttrDict({'foo': 'bar', '_key': ..., '_id': ...}) """ def __init__( @@ -352,14 +420,14 @@ def __process_int_edge_key(self, key: int) -> str: return next(islice(self.data.keys(), key, key + 1)) def __is_valid_edge_outbound(self, edge: dict[str, Any]) -> bool: - return bool( - edge["_from"] == self.src_node_id and edge["_to"] == self.dst_node_id - ) + a = edge["_from"] == self.src_node_id + b = edge["_to"] == self.dst_node_id + return bool(a and b) def __is_valid_edge_inbound(self, edge: dict[str, Any]) -> bool: - return bool( - edge["_from"] == self.dst_node_id and edge["_to"] == self.src_node_id - ) + a = edge["_from"] == self.dst_node_id + b = edge["_to"] == self.src_node_id + return bool(a and b) def __is_valid_edge_any(self, edge: dict[str, Any]) -> bool: return self.__is_valid_edge_outbound(edge) or self.__is_valid_edge_inbound(edge) @@ -381,10 +449,15 @@ def __get_mirrored_edge_attr(self, edge_id: str) -> EdgeAttrDict | None: - The "mirror" is the "reverse" adjlist_outer_dict because the adjacency list is different in both directions (i.e _pred and _succ) - :param dst_node_id: The destination node ID. - :type dst_node_id: str - :return: The edge attribute dictionary if it exists. - :rtype: EdgeAttrDict | None + Parameters + ---------- + edge_id : str + The edge ID. + + Returns + ------- + EdgeAttrDict | None + The edge attribute dictionary if it exists. """ if self.adjlist_inner_dict is None: return None @@ -426,8 +499,10 @@ def __str__(self) -> str: @key_is_adb_id_or_int def __contains__(self, key: str | int) -> bool: """ - 'edge/1' in G._adj['node/1']['node/2'] - 0 in G._adj['node/1']['node/2'] + Examples + -------- + >>> 'edge/1' in G._adj['node/1']['node/2'] + >>> 0 in G._adj['node/1']['node/2'] """ # HACK: This is a workaround for the fact that # nxadb.MultiGraph does not yet support custom edge keys @@ -459,11 +534,17 @@ def __contains__(self, key: str | int) -> bool: # the entire edge from the database to check if it is valid. edge_attr_dict = self._create_edge_attr_dict(edge) self.data[key] = edge_attr_dict + return True @key_is_adb_id_or_int def __getitem__(self, key: str | int) -> EdgeAttrDict: - """G._adj['node/1']['node/2']['edge/1']""" + """ + Examples + -------- + >>> G._adj['node/1']['node/2']['edge/1'] + >>> G._adj['node/1']['node/2'][0] + """ # HACK: This is a workaround for the fact that # nxadb.MultiGraph does not yet support custom edge keys if key == "-1": @@ -553,11 +634,18 @@ def __setitem__(self, key: int, edge_attr_dict: EdgeAttrDict) -> None: # type: # for any nested EdgeAttrDicts within edge_attr_dict edge_id = edge["_id"] edge_attr_dict = self._create_edge_attr_dict(edge_data) + self.data[edge_id] = edge_attr_dict + del self.data[str(key)] def __delitem__(self, key: str) -> None: - """del G._adj['node/1']['node/2']['edge/1']""" + """ + Examples + -------- + >>> del G._adj['node/1']['node/2']['edge/1'] + >>> del G._adj['node/1']['node/2'][0] + """ if isinstance(key, int): key = self.__process_int_edge_key(key) @@ -704,14 +792,36 @@ class AdjListInnerDict(UserDict[str, EdgeAttrDict | EdgeKeyDict]): AdjListInnerDict is keyed by the node ID of the destination node. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph - :param default_node_type: The default node type. - :type default_node_type: str - :param edge_type_func: The function to generate the edge type. - :type edge_type_func: Callable[[str, str], str] + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + default_node_type : str + The default node type. + + edge_type_key : str + The key used to store the edge type in the edge attribute dictionary. + + edge_type_func : Callable[[str, str], str] + The function to generate the edge type from the source and + destination node types. + + graph_type : str + The type of graph (e.g. 'Graph', 'DiGraph', 'MultiGraph', 'MultiDiGraph'). + + adjlist_outer_dict : AdjListOuterDict | None + The parent AdjListOuterDict. + + Examples + -------- + >>> g = nxadb.Graph(name="MyGraph") + >>> g.add_edge("node/1", "node/2", foo="bar") + >>> g['node/1'] + AdjListInnerDict('node/1') """ def __init__( @@ -824,10 +934,15 @@ def __get_mirrored_edge_attr_or_key_dict( - The "mirror" is the "reverse" adjlist_outer_dict because the adjacency list is different in both directions (i.e _pred and _succ) - :param dst_node_id: The destination node ID. - :type dst_node_id: str - :return: The edge attribute dictionary if it exists. - :rtype: EdgeAttrDict | None + Parameters + ---------- + dst_node_id : str + The destination node ID. + + Returns + ------- + EdgeAttrDict | EdgeKeyDict | None + The edge attribute dictionary or key dictionary if it exists. """ if self.adjlist_outer_dict is None: return None @@ -1205,7 +1320,7 @@ def update(self, edges: dict[str, dict[str, Any]]) -> None: # perform write to ArangoDB result = upsert_collection_edges(self.db, to_upsert) - all_good = check_list_for_errors(result) + all_good = check_update_list_for_errors(result) if all_good: # Means no single operation failed, in this case we update the local cache self.__set_adj_elements(edges) @@ -1315,14 +1430,36 @@ class AdjListOuterDict(UserDict[str, AdjListInnerDict]): AdjListOuterDict is keyed by the node ID of the source node. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph - :param default_node_type: The default node type. - :type default_node_type: str - :param edge_type_func: The function to generate the edge type. - :type edge_type_func: Callable[[str, str], str] + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + default_node_type : str + The default node type. + + edge_type_key : str + The key used to store the edge type in the edge attribute dictionary. + + edge_type_func : Callable[[str, str], str] + The function to generate the edge type from the source and + destination node types. + + graph_type : str + The type of graph (e.g. 'Graph', 'DiGraph', 'MultiGraph', 'MultiDiGraph'). + + symmetrize_edges_if_directed : bool + Whether to add the reverse edge if the graph is directed. + + Example + ------- + >>> g = nxadb.Graph(name="MyGraph") + >>> g.add_edge("node/1", "node/2", foo="bar") + >>> g._adj + AdjListOuterDict('MyGraph') """ def __init__( @@ -1539,7 +1676,7 @@ def update(self, edges: Any) -> None: ) result = upsert_collection_edges(self.db, separated_by_edge_collection) - all_good = check_list_for_errors(result) + all_good = check_update_list_for_errors(result) if all_good: # Means no single operation failed, in this case we update the local cache self.__set_adj_elements(edges) @@ -1578,11 +1715,7 @@ def items(self, data: str | None = None, default: Any | None = None) -> Any: yield from aql_fetch_data_edge(self.db, e_cols, data, default) def __set_adj_elements( - self, - adj_dict: ( - GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict - ), - node_dict: NodeDict | None = None, + self, adj_dict: AdjDict, node_dict: NodeDict | None = None ) -> None: def set_edge_graph( src_node_id: str, dst_node_id: str, edge: dict[str, Any] diff --git a/nx_arangodb/classes/dict/graph.py b/nx_arangodb/classes/dict/graph.py index c5cf0786..249cafca 100644 --- a/nx_arangodb/classes/dict/graph.py +++ b/nx_arangodb/classes/dict/graph.py @@ -24,12 +24,14 @@ def graph_dict_factory(db: StandardDatabase, graph: Graph) -> Callable[..., GraphDict]: + """Factory function for creating a GraphDict.""" return lambda: GraphDict(db, graph) def graph_attr_dict_factory( db: StandardDatabase, graph: Graph, graph_id: str ) -> Callable[..., GraphAttrDict]: + """Factory function for creating a GraphAttrDict.""" return lambda: GraphAttrDict(db, graph, graph_id) @@ -41,12 +43,22 @@ def graph_attr_dict_factory( def build_graph_attr_dict_data( parent: GraphAttrDict, data: dict[str, Any] ) -> dict[str, Any | GraphAttrDict]: - """Recursively build a GraphAttrDict from a dict. + """Recursively build an GraphAttrDict from a dict. It's possible that **value** is a nested dict, so we need to recursively build a GraphAttrDict for each nested dict. - Returns the parent GraphAttrDict. + Parameters + ---------- + parent : GraphAttrDict + The parent GraphAttrDict. + data : dict[str, Any] + The data to build the GraphAttrDict from. + + Returns + ------- + dict[str, Any | GraphAttrDict] + The data for the new GraphAttrDict. """ graph_attr_dict_data = {} for key, value in data.items(): @@ -57,6 +69,25 @@ def build_graph_attr_dict_data( def process_graph_attr_dict_value(parent: GraphAttrDict, key: str, value: Any) -> Any: + """Process the value of a particular key in an GraphAttrDict. + + If the value is a dict, then we need to recursively build an GraphAttrDict. + Otherwise, we return the value as is. + + Parameters + ---------- + parent : GraphAttrDict + The parent GraphAttrDict. + key : str + The key of the value. + value : Any + The value to process. + + Returns + ------- + Any + The processed value. + """ if not isinstance(value, dict): return value @@ -73,10 +104,23 @@ class GraphDict(UserDict[str, Any]): Given that ArangoDB does not have a concept of graph attributes, this class stores the attributes in a collection with the graph name as the document key. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph_name: The graph name. - :type graph_name: str + For now, the collection is called 'nxadb_graphs'. + + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + Example + ------- + >>> G = nxadb.Graph(name='MyGraph', foo='bar') + >>> G.graph['foo'] + 'bar' + >>> G.graph['foo'] = 'baz' + >>> del G.graph['foo'] """ def __init__(self, db: StandardDatabase, graph: Graph, *args: Any, **kwargs: Any): @@ -178,12 +222,23 @@ class GraphAttrDict(UserDict[str, Any]): Only used if the value associated with a GraphDict key is a dict. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph - :param graph_id: The ArangoDB graph ID. - :type graph_id: str + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph. + + graph_id : str + The ArangoDB document ID of the graph. + + Example + ------- + >>> G = nxadb.Graph(name='MyGraph', foo={'bar': 'baz'}) + >>> G.graph['foo']['bar'] + 'baz' + >>> G.graph['foo']['bar'] = 'qux' """ def __init__( diff --git a/nx_arangodb/classes/dict/node.py b/nx_arangodb/classes/dict/node.py index e55c5171..0ef179d7 100644 --- a/nx_arangodb/classes/dict/node.py +++ b/nx_arangodb/classes/dict/node.py @@ -15,7 +15,7 @@ aql_doc_get_key, aql_doc_has_key, aql_fetch_data, - check_list_for_errors, + check_update_list_for_errors, doc_delete, doc_insert, doc_update, @@ -42,12 +42,14 @@ def node_dict_factory( db: StandardDatabase, graph: Graph, default_node_type: str ) -> Callable[..., NodeDict]: + """Factory function for creating a NodeDict.""" return lambda: NodeDict(db, graph, default_node_type) def node_attr_dict_factory( db: StandardDatabase, graph: Graph ) -> Callable[..., NodeAttrDict]: + """Factory function for creating a NodeAttrDict.""" return lambda: NodeAttrDict(db, graph) @@ -64,7 +66,17 @@ def build_node_attr_dict_data( It's possible that **value** is a nested dict, so we need to recursively build a NodeAttrDict for each nested dict. - Returns the parent NodeAttrDict. + Parameters + ---------- + parent : NodeAttrDict + The parent NodeAttrDict. + data : dict[str, Any] + The data to build the NodeAttrDict from. + + Returns + ------- + dict[str, Any | NodeAttrDict] + The data for the new NodeAttrDict. """ node_attr_dict_data = {} for key, value in data.items(): @@ -75,6 +87,25 @@ def build_node_attr_dict_data( def process_node_attr_dict_value(parent: NodeAttrDict, key: str, value: Any) -> Any: + """Process the value of a particular key in a NodeAttrDict. + + If the value is a dict, then we need to recursively build an NodeAttrDict. + Otherwise, we return the value as is. + + Parameters + ---------- + parent : NodeAttrDict + The parent NodeAttrDict. + key : str + The key of the value. + value : Any + The value to process. + + Returns + ------- + Any + The processed value. + """ if not isinstance(value, dict): return value @@ -91,10 +122,20 @@ class NodeAttrDict(UserDict[str, Any]): """The inner-level of the dict of dict structure representing the nodes (vertices) of a graph. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph object. + + Example + ------- + >>> G = nxadb.Graph("MyGraph") + >>> G.add_node('node/1', foo='bar') + >>> G.nodes['node/1']['foo'] + 'bar' """ def __init__(self, db: StandardDatabase, graph: Graph, *args: Any, **kwargs: Any): @@ -197,13 +238,22 @@ class NodeDict(UserDict[str, NodeAttrDict]): The outer dict is keyed by ArangoDB Vertex IDs and the inner dict is keyed by Vertex attributes. - :param db: The ArangoDB database. - :type db: StandardDatabase - :param graph: The ArangoDB graph. - :type graph: Graph - :param default_node_type: The default node type. Used if the node ID - is not formatted as 'type/id'. - :type default_node_type: str + Parameters + ---------- + db : arango.database.StandardDatabase + The ArangoDB database. + + graph : arango.graph.Graph + The ArangoDB graph object. + + default_node_type : str + The default node type for the graph. + + Example + ------- + >>> G = nxadb.Graph("MyGraph") + >>> G.add_node('node/1', foo='bar') + >>> G.nodes """ def __init__( @@ -370,7 +420,7 @@ def update(self, nodes: Any) -> None: result = upsert_collection_documents(self.db, separated_by_collection) - all_good = check_list_for_errors(result) + all_good = check_update_list_for_errors(result) if all_good: # Means no single operation failed, in this case we update the local cache self.__update_local_nodes(nodes) diff --git a/nx_arangodb/classes/digraph.py b/nx_arangodb/classes/digraph.py index ccf7d65f..9477c60c 100644 --- a/nx_arangodb/classes/digraph.py +++ b/nx_arangodb/classes/digraph.py @@ -17,6 +17,121 @@ class DiGraph(Graph, nx.DiGraph): + """ + Base class for directed graphs. + + Subclasses ``nxadb.Graph`` and ``nx.DiGraph``. + + In order to connect to an ArangoDB instance, the following environment + variables must be set: + + 1. ``DATABASE_HOST`` + 2. ``DATABASE_USERNAME`` + 3. ``DATABASE_PASSWORD`` + 4. ``DATABASE_NAME`` + + Furthermore, the ``name`` parameter is required to create a new graph + or to connect to an existing graph in the database. + + Example + ------- + >>> import os + >>> import networkx as nx + >>> import nx_arangodb as nxadb + >>> + >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" + >>> os.environ["DATABASE_USERNAME"] = "root" + >>> os.environ["DATABASE_PASSWORD"] = "openSesame" + >>> os.environ["DATABASE_NAME"] = "_system" + >>> + >>> G = nxadb.DiGraph(name="MyGraph") + >>> ... + + + Parameters + ---------- + incoming_graph_data : input graph (optional, default: None) + Data to initialize graph. If None (default) an empty + graph is created. Must be used in conjunction with **name** if + the user wants to persist the graph in ArangoDB. NOTE: It is + recommended for incoming_graph_data to be a NetworkX graph due + to faster loading times. + + name : str (optional, default: None) + Name of the graph in the database. If the graph already exists, + the user can pass the name of the graph to connect to it. If + the graph does not exist, the user can create a new graph by + passing the name. NOTE: Must be used in conjunction with + **incoming_graph_data** if the user wants to persist the graph + in ArangoDB. + + default_node_type : str (optional, default: None) + Default node type for the graph. In ArangoDB terms, this is the + default vertex collection. If the graph already exists, the user can + omit this parameter and the default node type will be set to the + first vertex collection in the graph. If the graph does not exist, + the user can pass the default node type to create the default vertex + collection. + + edge_type_key : str (optional, default: "_edge_type") + Key used to store the edge type when inserting edges into the graph. + Useful for working with Heterogeneous Graphs. + + edge_type_func : Callable[[str, str], str] (optional, default: None) + Function to determine the edge type between two nodes. If the graph + already exists, the user can omit this parameter and the edge type + function will be set based on the existing edge definitions. If the + graph does not exist, the user can pass a function that determines + the edge type between two nodes. + + edge_collections_attributes : set[str] (optional, default: None) + Set of edge attributes to fetch when executing a NetworkX algorithm. + Useful if the user has edge weights or other edge attributes that + they want to use in a NetworkX algorithm. + + db : arango.database.StandardDatabase (optional, default: None) + ArangoDB database object. If the user has an existing python-arango + connection to the database, they can pass the database object to the graph. + If not provided, a database object will be created using the environment + variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and + DATABASE_NAME. + + read_parallelism : int (optional, default: 10) + Number of parallel threads to use when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + read_batch_size : int (optional, default: 100000) + Number of documents to fetch in a single batch when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + write_batch_size : int (optional, default: 50000) + Number of documents to insert in a single batch when writing data to ArangoDB. + Used for inserting node and edge data into the database if and only if + **incoming_graph_data** is a NetworkX graph. + + write_async : bool (optional, default: True) + Whether to insert data into ArangoDB asynchronously. Used for inserting + node and edge data into the database if and only if **incoming_graph_data** + is a NetworkX graph. + + symmetrize_edges : bool (optional, default: False) + Whether to symmetrize the edges in the graph when fetched from the database. + Only applies to directed graphs, thereby converting them to undirected graphs. + + use_arango_views : bool (optional, default: False) + Whether to use experimental work-in-progress ArangoDB Views for the + nodes, adjacency list, and edges. These views are designed to improve + data processing performance by delegating CRUD operations to the database + whenever possible. NOTE: This feature is experimental and may not work + as expected. + + args: positional arguments for nx.Graph + Additional arguments passed to nx.Graph. + + kwargs: keyword arguments for nx.Graph + Additional arguments passed to nx.Graph. + """ + __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 @@ -38,7 +153,7 @@ def __init__( write_batch_size: int = 50000, write_async: bool = True, symmetrize_edges: bool = False, - use_experimental_views: bool = False, + use_arango_views: bool = False, *args: Any, **kwargs: Any, ): @@ -55,7 +170,7 @@ def __init__( write_batch_size, write_async, symmetrize_edges, - use_experimental_views, + use_arango_views, *args, **kwargs, ) diff --git a/nx_arangodb/classes/function.py b/nx_arangodb/classes/function.py index c9b73822..993db901 100644 --- a/nx_arangodb/classes/function.py +++ b/nx_arangodb/classes/function.py @@ -1,12 +1,12 @@ """ -A collection of CRUD functions for the ArangoDB graph database. -Used by the nx_arangodb Graph, DiGraph, MultiGraph, and MultiDiGraph classes. +A collection of CRUD functions for ArangoDB Graphs. + +Used across the nx_arangodb package to interact with ArangoDB. """ from __future__ import annotations -from collections import UserDict -from typing import Any, Callable, Generator, Optional, Tuple +from typing import Any, Callable, Generator, Tuple import networkx as nx from arango import ArangoError, DocumentInsertError @@ -33,12 +33,7 @@ from ..exceptions import AQLMultipleResultsFound, InvalidTraversalDirection from .enum import GraphType - -def do_load_all_edge_attributes(attributes: set[str]) -> bool: - if len(attributes) == 0: - return True - - return False +RESERVED_KEYS = {"_id", "_key", "_rev", "_from", "_to"} def get_arangodb_graph( @@ -61,15 +56,74 @@ def get_arangodb_graph( ArangoIDtoIndex, EdgeValuesDict, ]: - """Pulls the graph from the database, assuming the graph exists. - - Returns the following representations: - - Node dictionary (nx.Graph) - - Adjacency dictionary (nx.Graph) - - Source Indices (COO) - - Destination Indices (COO) - - Node-ID-to-index mapping (COO) + """Pulls ArangoDB Graph Data from the database using + `phenolrs.networkx.NetworkXLoader`. + + Parameters + ---------- + adb_graph : Graph + The ArangoDB Graph object from python-arango. + + load_node_dict : bool + Whether to load the Node dictionary representation. + + load_adj_dict : bool + Whether to load the Adjacency dictionary representation. + + load_coo : bool + Whether to load the COO representation. + + edge_collections_attributes : set[str] + The set of edge attributes to load. Can be empty. + + load_all_vertex_attributes : bool + Whether to load all vertex attributes. + + load_all_edge_attributes : bool + Whether to load all edge attributes. Cannot be True if + **edge_collections_attributes** is not empty. + + is_directed : bool + Whether to load the graph as directed or undirected. + + is_multigraph : bool + Whether to load the graph as a MultiGraph or Graph. + + symmetrize_edges_if_directed : bool + Whether to duplicate edges in the adjacency dictionary if the graph is directed. + + Returns + ------- + Tuple[ + NodeDict, + GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict, + SrcIndices, + DstIndices, + EdgeIndices, + ArangoIDtoIndex, + EdgeValuesDict + ] + A tuple containing the different representations of the graph. + + Raises + ------ + ValueError + If **load_all_edge_attributes** is True and + **edge_collections_attributes** is not empty. + + ValueError + If none of the load flags are True. + + PhenolrsError + If an error occurs while loading the graph. """ + if len(edge_collections_attributes) != 0 and load_all_edge_attributes: + raise ValueError( + "You have specified to load at least one specific edge attribute" + " and at the same time set the parameter `load_all_edge_attributes`" + " to true. This combination is not allowed." + ) + v_cols = adb_graph.vertex_collections() edge_definitions = adb_graph.edge_definitions() e_cols = {c["edge_collection"] for c in edge_definitions} @@ -94,21 +148,6 @@ def get_arangodb_graph( assert config.username assert config.password - res_do_load_all_edge_attributes = do_load_all_edge_attributes( - edge_collections_attributes - ) - - if res_do_load_all_edge_attributes is not load_all_edge_attributes: - if len(edge_collections_attributes) > 0: - raise ValueError( - "You have specified to load at least one specific edge attribute" - " and at the same time set the parameter `load_all_vertex_attributes`" - " to true. This combination is not allowed." - ) - else: - # We need this case as the user wants by purpose to not load any edge data - res_do_load_all_edge_attributes = load_all_edge_attributes - ( node_dict, adj_dict, @@ -126,7 +165,7 @@ def get_arangodb_graph( load_adj_dict=load_adj_dict, load_coo=load_coo, load_all_vertex_attributes=load_all_vertex_attributes, - load_all_edge_attributes=res_do_load_all_edge_attributes, + load_all_edge_attributes=load_all_edge_attributes, is_directed=is_directed, is_multigraph=is_multigraph, symmetrize_edges_if_directed=symmetrize_edges_if_directed, @@ -146,6 +185,10 @@ def get_arangodb_graph( def json_serializable(cls): + """Decorator to make a class JSON serializable. Only used for + the NodeAttrDict, EdgeAttrDict, and GraphAttrDict classes. + """ + def to_dict(self): return { key: dict(value) if isinstance(value, cls) else value @@ -157,10 +200,11 @@ def to_dict(self): def key_is_string(func: Callable[..., Any]) -> Any: - """Decorator to check if the key is a string.""" + """Decorator to check if the key is a string. + Will attempt to cast the key to a string if it is not. + """ def wrapper(self: Any, key: Any, *args: Any, **kwargs: Any) -> Any: - """""" if key is None: raise ValueError("Key cannot be None.") @@ -210,7 +254,9 @@ def wrapper(self: Any, key: Any, *args: Any, **kwargs: Any) -> Any: def keys_are_strings(func: Callable[..., Any]) -> Any: - """Decorator to check if the keys are strings.""" + """Decorator to check if the keys are strings. + Will attempt to cast the keys to strings if they are not. + """ def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any: data_dict = {} @@ -237,9 +283,6 @@ def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any: return wrapper -RESERVED_KEYS = {"_id", "_key", "_rev", "_from", "_to"} - - def key_is_not_reserved(func: Callable[..., Any]) -> Any: """Decorator to check if the key is not reserved.""" @@ -255,9 +298,7 @@ def wrapper(self: Any, key: str, *args: Any, **kwargs: Any) -> Any: def keys_are_not_reserved(func: Any) -> Any: """Decorator to check if the keys are not reserved.""" - def wrapper( - self: Any, data: dict[Any, Any] | zip[Any], *args: Any, **kwargs: Any - ) -> Any: + def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any: keys: Any if isinstance(data, dict): keys = data.keys() @@ -304,6 +345,7 @@ def aql_single( ) -> Any | None: """Executes an AQL query and returns the first result.""" result = aql_as_list(db, query, bind_vars) + if len(result) == 0: return None @@ -374,6 +416,7 @@ def aql_edge_exists( graph_name: str, direction: str, ) -> bool | None: + """Checks if an edge exists between two nodes.""" return aql_edge( db, src_node_id, @@ -394,6 +437,7 @@ def aql_edge_get( direction: str, can_return_multiple: bool = False, ) -> Any | None: + """Gets an edge between two nodes.""" return_clause = "UNSET(e, '_rev')" if direction == "ANY": return_clause = f"DISTINCT {return_clause}" @@ -418,6 +462,7 @@ def aql_edge_id( direction: str, can_return_multiple: bool = False, ) -> Any | None: + """Gets the edge ID between two nodes.""" return_clause = "DISTINCT e._id" if direction == "ANY" else "e._id" return aql_edge( db, @@ -437,6 +482,7 @@ def aql_edge_count_src( graph_name: str, direction: str, ) -> int: + """Counts the number of edges from a source node.""" query = f""" FOR v, e IN 1..1 {direction} @src_node_id GRAPH @graph_name COLLECT id = e._id @@ -461,6 +507,7 @@ def aql_edge_count_src_dst( graph_name: str, direction: str, ) -> int: + """Counts the number of edges between two nodes.""" filter_clause = aql_edge_direction_filter(direction) query = f""" @@ -483,6 +530,7 @@ def aql_edge_count_src_dst( def aql_edge_direction_filter(direction: str) -> str: + """Returns the AQL filter clause for the edge direction.""" if direction == "INBOUND": return "e._from == @dst_node_id" if direction == "OUTBOUND": @@ -505,6 +553,7 @@ def aql_edge( limit_one: bool, can_return_multiple: bool, ) -> Any | None: + """Fetches an edge between two nodes.""" if limit_one and can_return_multiple: raise ValueError("Cannot return multiple results limit_one=True.") @@ -537,6 +586,7 @@ def aql_fetch_data( data: str, default: Any, ) -> Generator[dict[str, Any], None, None]: + """Fetches data from a collection (assumed to be vertex).""" bind_vars = {"data": data, "default": default} query = """ FOR doc IN @@collection @@ -554,6 +604,7 @@ def aql_fetch_data_edge( data: str, default: Any, ) -> Generator[tuple[str, str, Any], None, None]: + """Fetches data from an edge collection.""" bind_vars = {"data": data, "default": default} query = """ FOR doc IN @@collection @@ -581,6 +632,7 @@ def doc_delete(db: StandardDatabase, id: str, **kwargs: Any) -> None: def edges_delete( db: StandardDatabase, graph: Graph, src_node_id: str, **kwargs: Any ) -> None: + """Deletes all edges from a source node.""" remove_statements = "\n".join( f"REMOVE e IN `{edge_def['edge_collection']}` OPTIONS {{ignoreErrors: true}}" # noqa for edge_def in graph.edge_definitions() @@ -656,11 +708,12 @@ def edge_link( def is_arangodb_id(key): + """Checks if the key is an ArangoDB ID.""" return "/" in key def get_node_type(key: str, default_node_type: str) -> str: - """Gets the node type.""" + """Gets the collection of a node.""" return key.split("/")[0] if is_arangodb_id(key) else default_node_type @@ -670,7 +723,7 @@ def get_node_id(key: str, default_node_type: str) -> str: def get_node_type_and_id(key: str, default_node_type: str) -> tuple[str, str]: - """Gets the node type and ID.""" + """Gets the node collection (i.e type) and ID.""" return ( (key.split("/")[0], key) if is_arangodb_id(key) @@ -690,6 +743,9 @@ def get_node_type_and_key(key: str, default_node_type: str) -> tuple[str, str]: def get_update_dict( parent_keys: list[str], update_dict: dict[str, Any] ) -> dict[str, Any]: + """Builds the update dictionary for nested documents. + Useful for updating nested documents in ArangoDB. + """ if parent_keys: for key in reversed(parent_keys): update_dict = {key: update_dict} @@ -698,6 +754,8 @@ def get_update_dict( class ArangoDBBatchError(ArangoError): + """Custom exception for batch errors.""" + def __init__(self, errors): self.errors = errors super().__init__(self._format_errors()) @@ -706,11 +764,11 @@ def _format_errors(self): return "\n".join(str(error) for error in self.errors) -def check_list_for_errors(lst): +def check_update_list_for_errors(lst): + """Checks if a list contains any errors.""" for element in lst: - if element is type(bool): - if element is False: - return False + if element is False: + return False elif isinstance(element, list): for sub_element in element: @@ -722,18 +780,12 @@ def check_list_for_errors(lst): def separate_nodes_by_collections( nodes: dict[str, Any], default_collection: str -) -> Any: - """ - Separate the dictionary into collections based on whether keys contain '/'. - :param nodes: - The input dictionary with keys that may or may not contain '/'. - :param default_collection: - The name of the default collection for keys without '/'. - :return: A dictionary where the keys are collection names and the - values are dictionaries of key-value pairs belonging to those - collections. +) -> dict[str, dict[str, Any]]: + """Separate the dictionary into collections based on whether IDs contain '/'. + Returns dictionary where the keys are collection names and the values are + dictionaries of key-value pairs belonging to those collections. """ - separated: Any = {} + separated: dict[str, dict[str, Any]] = {} for key, value in nodes.items(): collection, doc_key = get_node_type_and_key(key, default_collection) @@ -746,15 +798,14 @@ def separate_nodes_by_collections( return separated -def transform_local_documents_for_adb(original_documents): +def transform_local_documents_for_adb( + original_documents: dict[str, Any] +) -> list[dict[str, Any]]: + """Transform original documents into a format suitable for UPSERT + operations in ArangoDB. Returns a list of documents with '_key' attribute + and additional attributes. """ - Transform original documents into a format suitable for UPSERT - operations in ArangoDB. - :param original_documents: Original documents in the format - {'key': {'any-attr-key': 'any-attr-value'}}. - :return: List of documents with '_key' attribute and additional attributes. - """ - transformed_documents = [] + transformed_documents: list[dict[str, Any]] = [] for key, values in original_documents.items(): transformed_doc = {"_key": key} @@ -764,18 +815,13 @@ def transform_local_documents_for_adb(original_documents): return transformed_documents -def upsert_collection_documents(db: StandardDatabase, separated: Any) -> Any: - """ - Process each collection in the separated dictionary. - :param db: The ArangoDB database object. - :param separated: A dictionary where the keys are collection names and the - values are dictionaries - of key-value pairs belonging to those collections. - :return: A list of results from the insert_many operation. - If inserting a document fails, the exception is not raised but - returned as an object in the result list. +def upsert_collection_documents( + db: StandardDatabase, separated: dict[str, dict[str, Any]] +) -> list[Any]: + """Process each collection in the separated dictionary. + If inserting a document fails, the exception is not raised but + returned as an object in the result list. """ - results = [] for collection_name, documents in separated.items(): @@ -790,15 +836,14 @@ def upsert_collection_documents(db: StandardDatabase, separated: Any) -> Any: return results -def separate_edges_by_collections_graph(edges: Any, default_node_type: str) -> Any: +def separate_edges_by_collections_graph( + edges: GraphAdjDict, default_node_type: str +) -> dict[str, list[dict[str, Any]]]: + """Separate the dictionary into collections for Graph and DiGraph types. + Returns a dictionary where the keys are collection names and the + values are dictionaries of key-value pairs belonging to those collections. """ - Separate the dictionary into collections for Graph and DiGraph types. - :param edges: The input dictionary with keys that must contain the real doc id. - :param default_node_type: The name of the default collection for keys without '/'. - :return: A dictionary where the keys are collection names and the - values are dictionaries of key-value pairs belonging to those collections. - """ - separated: Any = {} + separated: dict[str, list[dict[str, Any]]] = {} for from_doc_id, target_dict in edges.items(): for to_doc_id, edge_doc in target_dict.items(): @@ -818,15 +863,15 @@ def separate_edges_by_collections_graph(edges: Any, default_node_type: str) -> A return separated -def separate_edges_by_collections_multigraph(edges: Any, default_node_type: str) -> Any: +def separate_edges_by_collections_multigraph( + edges: MultiGraphAdjDict, default_node_type: str +) -> Any: """ Separate the dictionary into collections for MultiGraph and MultiDiGraph types. - :param edges: The input dictionary with keys that must contain the real doc id. - :param default_node_type: The name of the default collection for keys without '/'. - :return: A dictionary where the keys are collection names and the - values are dictionaries of key-value pairs belonging to those collections. + Returns a dictionary where the keys are collection names and the + values are dictionaries of key-value pairs belonging to those collections. """ - separated: Any = {} + separated: dict[str, list[dict[str, Any]]] = {} for from_doc_id, target_dict in edges.items(): for to_doc_id, edge_doc in target_dict.items(): @@ -849,15 +894,12 @@ def separate_edges_by_collections_multigraph(edges: Any, default_node_type: str) def separate_edges_by_collections( - edges: Any, graph_type: str, default_node_type: str + edges: GraphAdjDict | MultiGraphAdjDict, graph_type: str, default_node_type: str ) -> Any: """ Wrapper function to separate the dictionary into collections based on graph type. - :param edges: The input dictionary with keys that must contain the real doc id. - :param graph_type: The type of graph to create. - :param default_node_type: The name of the default collection for keys without '/'. - :return: A dictionary where the keys are collection names and the - values are dictionaries of key-value pairs belonging to those collections. + Returns a dictionary where the keys are collection names and the + values are dictionaries of key-value pairs belonging to those collections. """ if graph_type in [GraphType.Graph.name, GraphType.DiGraph.name]: return separate_edges_by_collections_graph(edges, default_node_type) @@ -867,16 +909,13 @@ def separate_edges_by_collections( raise ValueError(f"Unsupported graph type: {graph_type}") -def upsert_collection_edges(db: StandardDatabase, separated: Any) -> Any: - """ - Process each collection in the separated dictionary. - :param db: The ArangoDB database object. - :param separated: A dictionary where the keys are collection names and the - values are dictionaries - of key-value pairs belonging to those collections. - :return: A list of results from the insert_many operation. - If inserting a document fails, the exception is not raised but - returned as an object in the result list. +def upsert_collection_edges( + db: StandardDatabase, separated: dict[str, list[dict[str, Any]]] +) -> Any: + """Process each collection in the separated dictionary. + Returns a list of results from the insert_many operation. + If inserting a document fails, the exception is not raised but + returned as an object in the result list. """ results = [] diff --git a/nx_arangodb/classes/graph.py b/nx_arangodb/classes/graph.py index 2bb23831..9cadb069 100644 --- a/nx_arangodb/classes/graph.py +++ b/nx_arangodb/classes/graph.py @@ -18,7 +18,7 @@ ) from nx_arangodb.logger import logger -from .coreviews import CustomAdjacencyView +from .coreviews import ArangoAdjacencyView from .dict import ( adjlist_inner_dict_factory, adjlist_outer_dict_factory, @@ -28,7 +28,7 @@ node_dict_factory, ) from .function import get_node_id -from .reportviews import CustomEdgeView, CustomNodeView +from .reportviews import ArangoEdgeView, ArangoNodeView networkx_api = nxadb.utils.decorators.networkx_class(nx.Graph) # type: ignore @@ -49,6 +49,121 @@ class BaseLanguageModel: # type: ignore[no-redef] class Graph(nx.Graph): + """ + Base class for undirected graphs. Designed to work with ArangoDB graphs. + + Subclasses ``nx.Graph``. + + In order to connect to an ArangoDB instance, the following environment + variables must be set: + + 1. ``DATABASE_HOST`` + 2. ``DATABASE_USERNAME`` + 3. ``DATABASE_PASSWORD`` + 4. ``DATABASE_NAME`` + + Furthermore, the ``name`` parameter is required to create a new graph + or to connect to an existing graph in the database. + + Example + ------- + >>> import os + >>> import networkx as nx + >>> import nx_arangodb as nxadb + >>> + >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" + >>> os.environ["DATABASE_USERNAME"] = "root" + >>> os.environ["DATABASE_PASSWORD"] = "openSesame" + >>> os.environ["DATABASE_NAME"] = "_system" + >>> + >>> G = nxadb.Graph(name="MyGraph") + >>> ... + + + Parameters + ---------- + incoming_graph_data : input graph (optional, default: None) + Data to initialize graph. If None (default) an empty + graph is created. Must be used in conjunction with **name** if + the user wants to persist the graph in ArangoDB. NOTE: It is + recommended for incoming_graph_data to be a NetworkX graph due + to faster loading times. + + name : str (optional, default: None) + Name of the graph in the database. If the graph already exists, + the user can pass the name of the graph to connect to it. If + the graph does not exist, the user can create a new graph by + passing the name. NOTE: Must be used in conjunction with + **incoming_graph_data** if the user wants to persist the graph + in ArangoDB. + + default_node_type : str (optional, default: None) + Default node type for the graph. In ArangoDB terms, this is the + default vertex collection. If the graph already exists, the user can + omit this parameter and the default node type will be set to the + first vertex collection in the graph. If the graph does not exist, + the user can pass the default node type to create the default vertex + collection. + + edge_type_key : str (optional, default: "_edge_type") + Key used to store the edge type when inserting edges into the graph. + Useful for working with Heterogeneous Graphs. + + edge_type_func : Callable[[str, str], str] (optional, default: None) + Function to determine the edge type between two nodes. If the graph + already exists, the user can omit this parameter and the edge type + function will be set based on the existing edge definitions. If the + graph does not exist, the user can pass a function that determines + the edge type between two nodes. + + edge_collections_attributes : set[str] (optional, default: None) + Set of edge attributes to fetch when executing a NetworkX algorithm. + Useful if the user has edge weights or other edge attributes that + they want to use in a NetworkX algorithm. + + db : arango.database.StandardDatabase (optional, default: None) + ArangoDB database object. If the user has an existing python-arango + connection to the database, they can pass the database object to the graph. + If not provided, a database object will be created using the environment + variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and + DATABASE_NAME. + + read_parallelism : int (optional, default: 10) + Number of parallel threads to use when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + read_batch_size : int (optional, default: 100000) + Number of documents to fetch in a single batch when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + write_batch_size : int (optional, default: 50000) + Number of documents to insert in a single batch when writing data to ArangoDB. + Used for inserting node and edge data into the database if and only if + **incoming_graph_data** is a NetworkX graph. + + write_async : bool (optional, default: True) + Whether to insert data into ArangoDB asynchronously. Used for inserting + node and edge data into the database if and only if **incoming_graph_data** + is a NetworkX graph. + + symmetrize_edges : bool (optional, default: False) + Whether to symmetrize the edges in the graph when fetched from the database. + Only applies to directed graphs, thereby converting them to undirected graphs. + + use_arango_views : bool (optional, default: False) + Whether to use experimental work-in-progress ArangoDB Views for the + nodes, adjacency list, and edges. These views are designed to improve + data processing performance by delegating CRUD operations to the database + whenever possible. NOTE: This feature is experimental and may not work + as expected. + + args: positional arguments for nx.Graph + Additional arguments passed to nx.Graph. + + kwargs: keyword arguments for nx.Graph + Additional arguments passed to nx.Graph. + """ + __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 @@ -70,13 +185,13 @@ def __init__( write_batch_size: int = 50000, write_async: bool = True, symmetrize_edges: bool = False, - use_experimental_views: bool = False, + use_arango_views: bool = False, *args: Any, **kwargs: Any, ): self.__db = None self.__name = None - self.__use_experimental_views = use_experimental_views + self.__use_arango_views = use_arango_views self.__graph_exists_in_db = False self.__set_db(db) @@ -313,7 +428,7 @@ def __set_graph_name(self, name: Any = None) -> None: m = "Cannot set graph name without setting the database first" raise DatabaseNotSet(m) - if name is None: + if not name: self.__graph_exists_in_db = False logger.warning(f"**name** not set for {self.__class__.__name__}") return @@ -378,6 +493,27 @@ def clear_nxcg_cache(self): def query( self, query: str, bind_vars: dict[str, Any] = {}, **kwargs: Any ) -> Cursor: + """Execute an AQL query on the graph. + + Read more about AQL here: + https://www.arangodb.com/docs/stable/aql/ + + Parameters + ---------- + query : str + AQL query to execute. + + bind_vars : dict[str, Any] (optional, default: {}) + Bind variables to pass to the query. + + kwargs : dict[str, Any] + Additional keyword arguments to pass to the query. + + Returns + ------- + arango.cursor.Cursor + Cursor object containing the results of the query. + """ return nxadb.classes.function.aql(self.db, query, bind_vars, **kwargs) # def pull(self) -> None: @@ -389,6 +525,26 @@ def query( def chat( self, prompt: str, verbose: bool = False, llm: BaseLanguageModel | None = None ) -> str: + """Chat with the graph using an LLM. Use at your own risk. + + Parameters + ---------- + prompt : str + Prompt to chat with the graph. + + verbose : bool (optional, default: False) + Whether to print the intermediate steps of the conversation. + + llm : langchain_core.language_models.BaseLanguageModel (optional, default: None) + Language model to use for the conversation. If None, the default + language model is ChatOpenAI with the GPT-4 model, which expects the + OpenAI API key to be set in the environment variable OPENAI_API_KEY. + + Returns + ------- + str + Response from the Language Model. + """ if not LLM_AVAILABLE: m = "LLM dependencies not installed. Install with **pip install nx-arangodb[llm]**" # noqa: E501 raise ModuleNotFoundError(m) @@ -416,33 +572,33 @@ def chat( @cached_property def nodes(self): - if self.__use_experimental_views and self.graph_exists_in_db: - logger.warning("nxadb.CustomNodeView is currently EXPERIMENTAL") - return CustomNodeView(self) + if self.__use_arango_views and self.graph_exists_in_db: + logger.warning("nxadb.ArangoNodeView is currently EXPERIMENTAL") + return ArangoNodeView(self) return super().nodes @cached_property def adj(self): - if self.__use_experimental_views and self.graph_exists_in_db: - logger.warning("nxadb.CustomAdjacencyView is currently EXPERIMENTAL") - return CustomAdjacencyView(self._adj) + if self.__use_arango_views and self.graph_exists_in_db: + logger.warning("nxadb.ArangoAdjacencyView is currently EXPERIMENTAL") + return ArangoAdjacencyView(self._adj) return super().adj @cached_property def edges(self): - if self.__use_experimental_views and self.graph_exists_in_db: + if self.__use_arango_views and self.graph_exists_in_db: if self.is_directed(): - logger.warning("CustomEdgeView for DiGraphs not yet implemented") + logger.warning("ArangoEdgeView for DiGraphs not yet implemented") return super().edges if self.is_multigraph(): - logger.warning("CustomEdgeView for MultiGraphs not yet implemented") + logger.warning("ArangoEdgeView for MultiGraphs not yet implemented") return super().edges - logger.warning("nxadb.CustomEdgeView is currently EXPERIMENTAL") - return CustomEdgeView(self) + logger.warning("nxadb.ArangoEdgeView is currently EXPERIMENTAL") + return ArangoEdgeView(self) return super().edges diff --git a/nx_arangodb/classes/multidigraph.py b/nx_arangodb/classes/multidigraph.py index fe25eb93..dc05e592 100644 --- a/nx_arangodb/classes/multidigraph.py +++ b/nx_arangodb/classes/multidigraph.py @@ -14,6 +14,134 @@ class MultiDiGraph(MultiGraph, DiGraph, nx.MultiDiGraph): + """ + A directed graph class that can store multiedges. + + Subclasses ``nxadb.MultiGraph``, ``nxadb.Digraph``, and ``nx.MultiGraph``. + + In order to connect to an ArangoDB instance, the following environment + variables must be set: + + 1. ``DATABASE_HOST`` + 2. ``DATABASE_USERNAME`` + 3. ``DATABASE_PASSWORD`` + 4. ``DATABASE_NAME`` + + Furthermore, the ``name`` parameter is required to create a new graph + or to connect to an existing graph in the database. + + Example + ------- + >>> import os + >>> import networkx as nx + >>> import nx_arangodb as nxadb + >>> + >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" + >>> os.environ["DATABASE_USERNAME"] = "root" + >>> os.environ["DATABASE_PASSWORD"] = "openSesame" + >>> os.environ["DATABASE_NAME"] = "_system" + >>> + >>> G = nxadb.DiGraph(name="MyGraph") + >>> ... + + + Parameters + ---------- + incoming_graph_data : input graph (optional, default: None) + Data to initialize graph. If None (default) an empty + graph is created. Must be used in conjunction with **name** if + the user wants to persist the graph in ArangoDB. NOTE: It is + recommended for incoming_graph_data to be a NetworkX graph due + to faster loading times. + + multigraph_input : bool or None (default None) + Note: Only used when `incoming_graph_data` is a dict. + If True, `incoming_graph_data` is assumed to be a + dict-of-dict-of-dict-of-dict structure keyed by + node to neighbor to edge keys to edge data for multi-edges. + A NetworkXError is raised if this is not the case. + If False, :func:`to_networkx_graph` is used to try to determine + the dict's graph data structure as either a dict-of-dict-of-dict + keyed by node to neighbor to edge data, or a dict-of-iterable + keyed by node to neighbors. + If None, the treatment for True is tried, but if it fails, + the treatment for False is tried. + + name : str (optional, default: None) + Name of the graph in the database. If the graph already exists, + the user can pass the name of the graph to connect to it. If + the graph does not exist, the user can create a new graph by + passing the name. NOTE: Must be used in conjunction with + **incoming_graph_data** if the user wants to persist the graph + in ArangoDB. + + default_node_type : str (optional, default: None) + Default node type for the graph. In ArangoDB terms, this is the + default vertex collection. If the graph already exists, the user can + omit this parameter and the default node type will be set to the + first vertex collection in the graph. If the graph does not exist, + the user can pass the default node type to create the default vertex + collection. + + edge_type_key : str (optional, default: "_edge_type") + Key used to store the edge type when inserting edges into the graph. + Useful for working with Heterogeneous Graphs. + + edge_type_func : Callable[[str, str], str] (optional, default: None) + Function to determine the edge type between two nodes. If the graph + already exists, the user can omit this parameter and the edge type + function will be set based on the existing edge definitions. If the + graph does not exist, the user can pass a function that determines + the edge type between two nodes. + + edge_collections_attributes : set[str] (optional, default: None) + Set of edge attributes to fetch when executing a NetworkX algorithm. + Useful if the user has edge weights or other edge attributes that + they want to use in a NetworkX algorithm. + + db : arango.database.StandardDatabase (optional, default: None) + ArangoDB database object. If the user has an existing python-arango + connection to the database, they can pass the database object to the graph. + If not provided, a database object will be created using the environment + variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and + DATABASE_NAME. + + read_parallelism : int (optional, default: 10) + Number of parallel threads to use when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + read_batch_size : int (optional, default: 100000) + Number of documents to fetch in a single batch when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + write_batch_size : int (optional, default: 50000) + Number of documents to insert in a single batch when writing data to ArangoDB. + Used for inserting node and edge data into the database if and only if + **incoming_graph_data** is a NetworkX graph. + + write_async : bool (optional, default: True) + Whether to insert data into ArangoDB asynchronously. Used for inserting + node and edge data into the database if and only if **incoming_graph_data** + is a NetworkX graph. + + symmetrize_edges : bool (optional, default: False) + Whether to symmetrize the edges in the graph when fetched from the database. + Only applies to directed graphs, thereby converting them to undirected graphs. + + use_arango_views : bool (optional, default: False) + Whether to use experimental work-in-progress ArangoDB Views for the + nodes, adjacency list, and edges. These views are designed to improve + data processing performance by delegating CRUD operations to the database + whenever possible. NOTE: This feature is experimental and may not work + as expected. + + args: positional arguments for nx.Graph + Additional arguments passed to nx.Graph. + + kwargs: keyword arguments for nx.Graph + Additional arguments passed to nx.Graph. + """ + __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 @@ -36,7 +164,7 @@ def __init__( write_batch_size: int = 50000, write_async: bool = True, symmetrize_edges: bool = False, - use_experimental_views: bool = False, + use_arango_views: bool = False, *args: Any, **kwargs: Any, ): @@ -54,7 +182,7 @@ def __init__( write_batch_size, write_async, symmetrize_edges, - use_experimental_views, + use_arango_views, *args, **kwargs, ) diff --git a/nx_arangodb/classes/multigraph.py b/nx_arangodb/classes/multigraph.py index 4f6d6b79..07c30b7f 100644 --- a/nx_arangodb/classes/multigraph.py +++ b/nx_arangodb/classes/multigraph.py @@ -15,6 +15,134 @@ class MultiGraph(Graph, nx.MultiGraph): + """ + An undirected graph class that can store multiedges. + + Subclasses ``nxadb.Graph`` and ``nx.MultiGraph``. + + In order to connect to an ArangoDB instance, the following environment + variables must be set: + + 1. ``DATABASE_HOST`` + 2. ``DATABASE_USERNAME`` + 3. ``DATABASE_PASSWORD`` + 4. ``DATABASE_NAME`` + + Furthermore, the ``name`` parameter is required to create a new graph + or to connect to an existing graph in the database. + + Example + ------- + >>> import os + >>> import networkx as nx + >>> import nx_arangodb as nxadb + >>> + >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" + >>> os.environ["DATABASE_USERNAME"] = "root" + >>> os.environ["DATABASE_PASSWORD"] = "openSesame" + >>> os.environ["DATABASE_NAME"] = "_system" + >>> + >>> G = nxadb.DiGraph(name="MyGraph") + >>> ... + + + Parameters + ---------- + incoming_graph_data : input graph (optional, default: None) + Data to initialize graph. If None (default) an empty + graph is created. Must be used in conjunction with **name** if + the user wants to persist the graph in ArangoDB. NOTE: It is + recommended for incoming_graph_data to be a NetworkX graph due + to faster loading times. + + multigraph_input : bool or None (default None) + Note: Only used when `incoming_graph_data` is a dict. + If True, `incoming_graph_data` is assumed to be a + dict-of-dict-of-dict-of-dict structure keyed by + node to neighbor to edge keys to edge data for multi-edges. + A NetworkXError is raised if this is not the case. + If False, :func:`to_networkx_graph` is used to try to determine + the dict's graph data structure as either a dict-of-dict-of-dict + keyed by node to neighbor to edge data, or a dict-of-iterable + keyed by node to neighbors. + If None, the treatment for True is tried, but if it fails, + the treatment for False is tried. + + name : str (optional, default: None) + Name of the graph in the database. If the graph already exists, + the user can pass the name of the graph to connect to it. If + the graph does not exist, the user can create a new graph by + passing the name. NOTE: Must be used in conjunction with + **incoming_graph_data** if the user wants to persist the graph + in ArangoDB. + + default_node_type : str (optional, default: None) + Default node type for the graph. In ArangoDB terms, this is the + default vertex collection. If the graph already exists, the user can + omit this parameter and the default node type will be set to the + first vertex collection in the graph. If the graph does not exist, + the user can pass the default node type to create the default vertex + collection. + + edge_type_key : str (optional, default: "_edge_type") + Key used to store the edge type when inserting edges into the graph. + Useful for working with Heterogeneous Graphs. + + edge_type_func : Callable[[str, str], str] (optional, default: None) + Function to determine the edge type between two nodes. If the graph + already exists, the user can omit this parameter and the edge type + function will be set based on the existing edge definitions. If the + graph does not exist, the user can pass a function that determines + the edge type between two nodes. + + edge_collections_attributes : set[str] (optional, default: None) + Set of edge attributes to fetch when executing a NetworkX algorithm. + Useful if the user has edge weights or other edge attributes that + they want to use in a NetworkX algorithm. + + db : arango.database.StandardDatabase (optional, default: None) + ArangoDB database object. If the user has an existing python-arango + connection to the database, they can pass the database object to the graph. + If not provided, a database object will be created using the environment + variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and + DATABASE_NAME. + + read_parallelism : int (optional, default: 10) + Number of parallel threads to use when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + read_batch_size : int (optional, default: 100000) + Number of documents to fetch in a single batch when reading data from ArangoDB. + Used for fetching node and edge data from the database. + + write_batch_size : int (optional, default: 50000) + Number of documents to insert in a single batch when writing data to ArangoDB. + Used for inserting node and edge data into the database if and only if + **incoming_graph_data** is a NetworkX graph. + + write_async : bool (optional, default: True) + Whether to insert data into ArangoDB asynchronously. Used for inserting + node and edge data into the database if and only if **incoming_graph_data** + is a NetworkX graph. + + symmetrize_edges : bool (optional, default: False) + Whether to symmetrize the edges in the graph when fetched from the database. + Only applies to directed graphs, thereby converting them to undirected graphs. + + use_arango_views : bool (optional, default: False) + Whether to use experimental work-in-progress ArangoDB Views for the + nodes, adjacency list, and edges. These views are designed to improve + data processing performance by delegating CRUD operations to the database + whenever possible. NOTE: This feature is experimental and may not work + as expected. + + args: positional arguments for nx.Graph + Additional arguments passed to nx.Graph. + + kwargs: keyword arguments for nx.Graph + Additional arguments passed to nx.Graph. + """ + __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 @@ -37,7 +165,7 @@ def __init__( write_batch_size: int = 50000, write_async: bool = True, symmetrize_edges: bool = False, - use_experimental_views: bool = False, + use_arango_views: bool = False, *args: Any, **kwargs: Any, ): @@ -54,7 +182,7 @@ def __init__( write_batch_size, write_async, symmetrize_edges, - use_experimental_views, + use_arango_views, *args, **kwargs, ) diff --git a/nx_arangodb/classes/reportviews.py b/nx_arangodb/classes/reportviews.py index 9741272b..ae8bff1d 100644 --- a/nx_arangodb/classes/reportviews.py +++ b/nx_arangodb/classes/reportviews.py @@ -1,6 +1,15 @@ -""" -An override of the NodeView, NodeDataView, EdgeView, and EdgeDataView classes -to allow for custom data filtering in the database instead of in Python. +"""Experimental overrides of the NetworkX Views that represent the +nodes and edges of the graph. + +Overriding these classes allows us to implement custom logic for +data filtering and updating in the database, instead of in Python. + +These classes are a work-in-progress. The main goal is to try +to delegate data processing to ArangoDB, whenever possible. + +To use these experimental views, you must set **use_arango_views=True** +when creating a new graph object: +>>> G = nxadb.Graph(name="MyGraph", use_arango_views=True) """ from __future__ import annotations @@ -9,25 +18,86 @@ import nx_arangodb as nxadb -from .function import get_node_id +class ArangoNodeView(nx.classes.reportviews.NodeView): + """The ArangoNodeView class is an experimental subclass of the + NodeView class. -class CustomNodeView(nx.classes.reportviews.NodeView): + Contrary to the original NodeView class, the ArangoNodeView is + writable to allow for bulk updates to the graph in the DB. + """ + + # DataView method def __call__(self, data=False, default=None): if data is False: return self - return CustomNodeDataView(self._nodes, data, default) + return ArangoNodeDataView(self._nodes, data, default) def data(self, data=True, default=None): + """Return a read-only view of node data. + + Parameters + ---------- + data : bool or node data key, default=True + If ``data=True`` (the default), return a `NodeDataView` object that + maps each node to *all* of its attributes. `data` may also be an + arbitrary key, in which case the `NodeDataView` maps each node to + the value for the keyed attribute. In this case, if a node does + not have the `data` attribute, the `default` value is used. + default : object, default=None + The value used when a node does not have a specific attribute. + + Returns + ------- + NodeDataView + The layout of the returned NodeDataView depends on the value of the + `data` parameter. + + Notes + ----- + If ``data=False``, returns a `NodeView` object without data. + + See Also + -------- + NodeDataView + """ if data is False: return self - return CustomNodeDataView(self._nodes, data, default) + return ArangoNodeDataView(self._nodes, data, default) def update(self, data): + """Update a set of nodes within the graph. + + The benefit of this method is that it allows for bulk API updates, + as opposed to `G.add_nodes_from`, which currently makes + one API request per node. + + Example + ------- + >>> G = nxadb.Graph(name="MyGraph") + >>> G.nodes.update( + { + 'node/1': {"node/1", "foo": "bar"}, + 'node/2': {"node/2", "foo": "baz"}, + ... + }) + """ return self._nodes.update(data) -class CustomNodeDataView(nx.classes.reportviews.NodeDataView): +class ArangoNodeDataView(nx.classes.reportviews.NodeDataView): + """The ArangoNodeDataView class is an experimental subclass of the + NodeDataView class. + + The main use for this class is to iterate through node-data pairs. + The data can be the entire data-dictionary for each node, or it + can be a specific attribute (with default) for each node. + + In the event that the data is a specific attribute, the data is + filtered server-side, instead of in Python. This is done by using + the ArangoDB Query Language (AQL) to filter the data. + """ + def __iter__(self): data = self._data if data is False: @@ -54,13 +124,23 @@ def __iter__(self): ########################### -class CustomEdgeDataView(nx.classes.reportviews.EdgeDataView): +class ArangoEdgeDataView(nx.classes.reportviews.EdgeDataView): + """The ArangoEdgeDataView class is an experimental subclass of the + EdgeDataView class. - ###################### - # NOTE: Monkey Patch # - ###################### + This view is primarily used to iterate over the edges reporting + edges as node-tuples with edge data optionally reported. + + In the event that the data is a specific attribute, the data is + filtered server-side, instead of in Python. This is done by using + the ArangoDB Query Language (AQL) to filter the data. + """ def __iter__(self): + ###################### + # NOTE: Monkey Patch # + ###################### + if self._nbunch is None and self._data not in [None, True, False]: # Reason: We can utilize AQL to filter the data we # want to return, instead of filtering it in Python @@ -70,14 +150,21 @@ def __iter__(self): # is the AdjListOuterDict object that has a custom # items() method that can filter data with AQL. - # Filter for self._data server-side yield from self._adjdict.items(data=self._data, default=self._default) else: yield from super().__iter__() -class CustomEdgeView(nx.classes.reportviews.EdgeView): - dataview = CustomEdgeDataView +class ArangoEdgeView(nx.classes.reportviews.EdgeView): + """The ArangoEdgeView class is an experimental subclass of the + EdgeView class. + + The __len__ method is overridden to count the number of edges + in the graph by querying the database, instead of iterating + through the edges in Python. + """ + + dataview = ArangoEdgeDataView def __len__(self): diff --git a/nx_arangodb/convert.py b/nx_arangodb/convert.py index 17458b90..09cfa973 100644 --- a/nx_arangodb/convert.py +++ b/nx_arangodb/convert.py @@ -1,3 +1,21 @@ +"""Functions to convert between NetworkX, NetworkX-ArangoDB, +and NetworkX-cuGraph. + +Examples +-------- +>>> import networkx as nx +>>> import nx_arangodb as nxadb +>>> import nx_cugraph as nxcg +>>> +>>> G = nx.Graph() +>>> G.add_edge(1, 2, weight=3.0) +>>> G.add_edge(2, 3, weight=7.5) +>>> +>>> G_ADB = nxadb.convert._to_nxadb_graph(G) +>>> G_CG = nxadb.convert._to_nxcg_graph(G_ADB) +>>> G_NX = nxadb.convert._to_nx_graph(G_ADB) +""" + from __future__ import annotations import time @@ -8,7 +26,6 @@ import nx_arangodb as nxadb from nx_arangodb.classes.dict.adj import AdjListOuterDict from nx_arangodb.classes.dict.node import NodeDict -from nx_arangodb.classes.function import do_load_all_edge_attributes from nx_arangodb.logger import logger try: @@ -29,6 +46,22 @@ def _to_nx_graph(G: Any, *args: Any, **kwargs: Any) -> nx.Graph: + """Convert a graph to a NetworkX graph. + + Parameters + ---------- + G : Any + The graph to convert. + + Currently supported types: + - nx.Graph + - nxadb.Graph + + Returns + ------- + nx.Graph + The converted graph. + """ logger.debug(f"_to_nx_graph for {G.__class__.__name__}") if isinstance(G, nxadb.Graph): @@ -41,11 +74,28 @@ def _to_nx_graph(G: Any, *args: Any, **kwargs: Any) -> nx.Graph: def _to_nxadb_graph( - G: Any, - *args: Any, - as_directed: bool = False, - **kwargs: Any, + G: Any, *args: Any, as_directed: bool = False, **kwargs: Any ) -> nxadb.Graph: + """Convert a graph to a NetworkX-ArangoDB graph. + + Parameters + ---------- + G : Any + The graph to convert. + + Currently supported types: + - nx.Graph + - nxadb.Graph + + as_directed : bool, optional + Whether to convert the graph to a directed graph. + Default is False. + + Returns + ------- + nxadb.Graph + The converted graph. + """ logger.debug(f"_to_nxadb_graph for {G.__class__.__name__}") if isinstance(G, nxadb.Graph): @@ -60,6 +110,28 @@ def _to_nxadb_graph( if GPU_AVAILABLE: def _to_nxcg_graph(G: Any, as_directed: bool = False) -> nxcg.Graph: + """Convert a graph to a NetworkX-cuGraph graph. + + NOTE: Only supported if NetworkX-cuGraph is installed. + + Parameters + ---------- + G : Any + The graph to convert. + + Currently supported types: + - nxadb.Graph + - nxcg.Graph + + as_directed : bool, optional + Whether to convert the graph to a directed graph. + Default is False. + + Returns + ------- + nxcg.Graph + The converted graph. + """ logger.debug(f"_to_nxcg_graph for {G.__class__.__name__}") if isinstance(G, nxcg.Graph): @@ -87,8 +159,23 @@ def nx_to_nxadb( *args: Any, as_directed: bool = False, **kwargs: Any, - # name: str | None = None, ) -> nxadb.Graph: + """Convert a NetworkX graph to a NetworkX-ArangoDB graph. + + Parameters + ---------- + graph : nx.Graph + The NetworkX graph to convert. + + as_directed : bool, optional + Whether to convert the graph to a directed graph. + Default is False. + + Returns + ------- + nxadb.Graph + The converted graph. + """ logger.debug(f"from_networkx for {graph.__class__.__name__}") klass: type[nxadb.Graph] @@ -104,11 +191,48 @@ def nx_to_nxadb( else: klass = nxadb.Graph - # name=kwargs.get("name") ? return klass(incoming_graph_data=graph) def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph: + """Convert a NetworkX-ArangoDB graph to a NetworkX graph. + + This function will pull the graph from the database if it does + not exist in the cache. A new NetworkX graph will be created + using the node and adjacency dictionaries that are fetched. + + NOTE: The current downside of this approach is that we are not + able to take advantage of the custom Dictionary classes that we + have implemented in nx_arangodb.classes.dict. This is because + the node and adjacency dictionaries are fetched as regular + Python dictionaries. Furthermore, we don't cache the dictionaries + themselves, so we have to fetch them every time we convert the + graph, which is currently being invoked on *every* algorithm + call. See the note below for a potential solution. As a temporary + workaround, users can do the following: + + ``` + import networkx as nx + import nx_arangodb as nxadb + + G_ADB = nxadb.Graph(name="MyGraph") # Connect to the graph + G_NX = nxadb.convert._to_nx_graph(G_ADB) # Pull the graph + + nx.pagerank(G_NX) + nx.betweenness_centrality(G_NX) + ... + ``` + + Parameters + ---------- + G : nxadb.Graph + The NetworkX-ArangoDB graph to convert. + + Returns + ------- + nx.Graph + The converted graph. + """ if not G.graph_exists_in_db: # Since nxadb.Graph is a subclass of nx.Graph, we can return it as is. # This only applies if the graph does not exist in the database. @@ -128,7 +252,7 @@ def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph: load_coo=False, edge_collections_attributes=G.edge_attributes, load_all_vertex_attributes=False, - load_all_edge_attributes=do_load_all_edge_attributes(G.edge_attributes), + load_all_edge_attributes=len(G.edge_attributes) == 0, is_directed=G.is_directed(), is_multigraph=G.is_multigraph(), symmetrize_edges_if_directed=G.symmetrize_edges if G.is_directed() else False, @@ -163,6 +287,27 @@ def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph: if GPU_AVAILABLE: def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph: + """Convert a NetworkX-ArangoDB graph to a NetworkX-cuGraph graph. + + This function will pull the graph from the database if it does + not exist in the cache. A new NetworkX-cuGraph graph will be + created using the COO format that is fetched. The created graph + will be cached in the nxadb.Graph object for future use. + + Parameters + ---------- + G : nxadb.Graph + The NetworkX-ArangoDB graph to convert. + + as_directed : bool, optional + Whether to convert the graph to a directed graph. + Default is False. + + Returns + ------- + nxcg.Graph + The converted graph. + """ if G.use_nxcg_cache and G.nxcg_graph is not None: m = "**use_nxcg_cache** is enabled. using cached NXCG Graph. no pull required." # noqa logger.debug(m) @@ -186,7 +331,7 @@ def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph: load_coo=True, edge_collections_attributes=G.edge_attributes, load_all_vertex_attributes=False, # not used - load_all_edge_attributes=do_load_all_edge_attributes(G.edge_attributes), + load_all_edge_attributes=len(G.edge_attributes) == 0, is_directed=G.is_directed(), is_multigraph=G.is_multigraph(), symmetrize_edges_if_directed=( diff --git a/nx_arangodb/exceptions.py b/nx_arangodb/exceptions.py index 4c72402e..35e538e4 100644 --- a/nx_arangodb/exceptions.py +++ b/nx_arangodb/exceptions.py @@ -30,10 +30,6 @@ class ArangoDBAlgorithmError(NetworkXArangoDBException): pass -class ShortestPathError(ArangoDBAlgorithmError): - pass - - class MultipleEdgesFound(NetworkXArangoDBException): pass diff --git a/tests/test.py b/tests/test.py index 58ea73f8..6a191434 100644 --- a/tests/test.py +++ b/tests/test.py @@ -397,7 +397,7 @@ def test_node_dict_update_existing_single_collection( ) -> None: # This tests uses the existing nodes and updates each # of them using the update method using a single collection - G_1 = nxadb.Graph(name="KarateGraph", foo="bar", use_experimental_views=True) + G_1 = nxadb.Graph(name="KarateGraph", foo="bar", use_arango_views=True) nodes_ids_list = G_1.nodes local_nodes_dict = {} @@ -447,9 +447,7 @@ def test_node_dict_update_multiple_collections( assert db.collection(e_1_name).count() == 0 assert db.collection(e_2_name).count() == 0 - G_1 = graph_cls( - name=graph_name, default_node_type=v_1_name, use_experimental_views=True - ) + G_1 = graph_cls(name=graph_name, default_node_type=v_1_name, use_arango_views=True) assert len(G_1.nodes) == 0 assert len(G_1.edges) == 0 @@ -489,7 +487,7 @@ def test_node_dict_update_multiple_collections( def test_edge_adj_dict_update_existing_single_collection_graph_and_digraph( load_karate_graph: Any, graph_cls: type[nxadb.Graph] ) -> None: - G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True) + G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True) local_adj = G_1.adj local_edges_dict: Union[GraphAdjDict | DiGraphAdjDict] = {} @@ -563,7 +561,7 @@ def test_edge_adj_dict_update_existing_single_collection_graph_and_digraph( def test_edge_adj_dict_update_existing_single_collection_MultiGraph_and_MultiDiGraph( load_karate_graph: Any, graph_cls: type[nxadb.Graph] ) -> None: - G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True) + G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True) local_adj = G_1.adj local_edges_dict: Union[MultiGraphAdjDict | MultiDiGraphAdjDict] = {} @@ -647,7 +645,7 @@ def test_edge_dict_update_multiple_collections(load_two_relation_graph: Any) -> assert db.collection(e_2_name).count() == 0 G_1 = nxadb.Graph( - name=graph_name, default_node_type=v_1_name, use_experimental_views=True + name=graph_name, default_node_type=v_1_name, use_arango_views=True ) assert len(G_1.nodes) == 0 assert len(G_1.edges) == 0 @@ -704,7 +702,7 @@ def test_edge_dict_update_multiple_collections(load_two_relation_graph: Any) -> def test_edge_adj_inner_dict_update_existing_single_collection( load_karate_graph: Any, graph_cls: type[nxadb.Graph] ) -> None: - G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True) + G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True) local_adj = G_1.adj local_inner_edges_dict: GraphAdjDict = {} @@ -749,7 +747,7 @@ def test_edge_adj_inner_dict_update_existing_single_collection( def test_edge_adj_inner_dict_update_existing_single_collection_multi_graphs( load_karate_graph: Any, graph_cls: type[nxadb.Graph] ) -> None: - G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True) + G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True) local_adj = G_1.adj local_inner_edges_dict: GraphAdjDict = {}