From e97863693ceadc4f6776fd95d12572634545ebeb Mon Sep 17 00:00:00 2001
From: Anthony Mahanna <43019056+aMahanna@users.noreply.github.com>
Date: Tue, 3 Sep 2024 11:15:10 -0400
Subject: [PATCH] RTD Prep (#55)
* docs | wip
* fix: `nx_to_nxadb`
* fix: doc
* checkpoint
* checkpoint 2
* fix: docstrings
* checkpoint 3
* fix: hyperlinks
* mv: workflows
---
.../docs.yaml | 4 +-
.../release.yml | 0
.gitignore | 11 +-
.readthedocs.yaml | 6 +-
README.md | 22 +-
{docs => doc}/Makefile | 0
{docs => doc}/_static/dispatch.png | Bin
{docs => doc}/_static/nxadb.png | Bin
doc/algorithms/index.rst | 99 +++++++
doc/classes/digraph.rst | 89 ++++++
doc/classes/graph.rst | 81 ++++++
doc/classes/index.rst | 35 +++
doc/classes/multidigraph.rst | 90 ++++++
doc/classes/multigraph.rst | 81 ++++++
{docs => doc}/conf.py | 6 +-
doc/dict/adj.rst | 18 ++
doc/dict/graph.rst | 12 +
doc/dict/index.rst | 41 +++
doc/dict/node.rst | 12 +
doc/index.rst | 115 ++++++++
{docs => doc}/make.bat | 0
{docs => doc}/nx_arangodb.ipynb | 0
doc/quickstart.rst | 105 +++++++
{docs => doc}/requirements.txt | 0
doc/views/coreviews.rst | 14 +
doc/views/index.rst | 33 +++
doc/views/reportviews.rst | 22 ++
docs/index.rst | 1 -
nx_arangodb/algorithms/README.md | 21 ++
.../algorithms/shortest_paths/generic.py | 64 ++++-
nx_arangodb/classes/coreviews.py | 63 ++++-
nx_arangodb/classes/dict/README.md | 30 ++
nx_arangodb/classes/dict/adj.py | 247 +++++++++++++----
nx_arangodb/classes/dict/graph.py | 79 +++++-
nx_arangodb/classes/dict/node.py | 78 +++++-
nx_arangodb/classes/digraph.py | 119 +++++++-
nx_arangodb/classes/function.py | 257 ++++++++++--------
nx_arangodb/classes/graph.py | 188 +++++++++++--
nx_arangodb/classes/multidigraph.py | 132 ++++++++-
nx_arangodb/classes/multigraph.py | 132 ++++++++-
nx_arangodb/classes/reportviews.py | 117 +++++++-
nx_arangodb/convert.py | 163 ++++++++++-
nx_arangodb/exceptions.py | 4 -
tests/test.py | 16 +-
44 files changed, 2329 insertions(+), 278 deletions(-)
rename .github/{disabled-workflows => workflows}/docs.yaml (80%)
rename .github/{disabled-workflows => workflows}/release.yml (100%)
rename {docs => doc}/Makefile (100%)
rename {docs => doc}/_static/dispatch.png (100%)
rename {docs => doc}/_static/nxadb.png (100%)
create mode 100644 doc/algorithms/index.rst
create mode 100644 doc/classes/digraph.rst
create mode 100644 doc/classes/graph.rst
create mode 100644 doc/classes/index.rst
create mode 100644 doc/classes/multidigraph.rst
create mode 100644 doc/classes/multigraph.rst
rename {docs => doc}/conf.py (86%)
create mode 100644 doc/dict/adj.rst
create mode 100644 doc/dict/graph.rst
create mode 100644 doc/dict/index.rst
create mode 100644 doc/dict/node.rst
create mode 100644 doc/index.rst
rename {docs => doc}/make.bat (100%)
rename {docs => doc}/nx_arangodb.ipynb (100%)
create mode 100644 doc/quickstart.rst
rename {docs => doc}/requirements.txt (100%)
create mode 100644 doc/views/coreviews.rst
create mode 100644 doc/views/index.rst
create mode 100644 doc/views/reportviews.rst
delete mode 100644 docs/index.rst
create mode 100644 nx_arangodb/algorithms/README.md
create mode 100644 nx_arangodb/classes/dict/README.md
diff --git a/.github/disabled-workflows/docs.yaml b/.github/workflows/docs.yaml
similarity index 80%
rename from .github/disabled-workflows/docs.yaml
rename to .github/workflows/docs.yaml
index 0b19163f..7067b613 100644
--- a/.github/disabled-workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -20,7 +20,7 @@ jobs:
python-version: '3.10'
- name: Install dependencies
- run: pip install .[dev] && pip install -r docs/requirements.txt
+ run: pip install .[dev] && pip install -r doc/requirements.txt
- name: Generate Sphinx HTML
- run: cd docs && make html
\ No newline at end of file
+ run: cd doc && make html
\ No newline at end of file
diff --git a/.github/disabled-workflows/release.yml b/.github/workflows/release.yml
similarity index 100%
rename from .github/disabled-workflows/release.yml
rename to .github/workflows/release.yml
diff --git a/.gitignore b/.gitignore
index 752223e5..52f3a861 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,15 @@ wheels/
*.egg
MANIFEST
+# Generated while building documentation.
+doc/auto_examples
+doc/modules
+doc/generated
+doc/algorithms/generated
+doc/classes/generated
+doc/readwrite/generated
+doc/path.to.file
+
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -64,7 +73,7 @@ instance/
.scrapy
# Sphinx documentation
-docs/_build/
+doc/_build/
# PyBuilder
target/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 47b44e5a..06a9138a 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -11,9 +11,9 @@ build:
tools:
python: "3.12"
-# Build documentation in the "docs/" directory with Sphinx
+# Build documentation in the "doc/" directory with Sphinx
sphinx:
- configuration: docs/conf.py
+ configuration: doc/conf.py
fail_on_warning: true
# Optionally build your docs in additional formats such as PDF and ePub
@@ -26,4 +26,4 @@ sphinx:
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- - requirements: docs/requirements.txt
\ No newline at end of file
+ - requirements: doc/requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index 211b1f09..14b2531d 100644
--- a/README.md
+++ b/README.md
@@ -2,15 +2,23 @@
-
+
[![CircleCI](https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main)
[![CodeQL](https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml/badge.svg)](https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml)
[![Docs](https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml/badge.svg)](https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml)
@@ -44,7 +52,7 @@ Benefits of having ArangoDB as a backend to NetworkX include:
6. Access to efficient distribution of graph data ([ArangoDB SmartGraphs](https://docs.arangodb.com/stable/graphs/smartgraphs/)).
-
+
@@ -169,7 +177,7 @@ nx.config.backends.arangodb.use_gpu = True
```
-
+
diff --git a/docs/Makefile b/doc/Makefile
similarity index 100%
rename from docs/Makefile
rename to doc/Makefile
diff --git a/docs/_static/dispatch.png b/doc/_static/dispatch.png
similarity index 100%
rename from docs/_static/dispatch.png
rename to doc/_static/dispatch.png
diff --git a/docs/_static/nxadb.png b/doc/_static/nxadb.png
similarity index 100%
rename from docs/_static/nxadb.png
rename to doc/_static/nxadb.png
diff --git a/doc/algorithms/index.rst b/doc/algorithms/index.rst
new file mode 100644
index 00000000..9adf6f7d
--- /dev/null
+++ b/doc/algorithms/index.rst
@@ -0,0 +1,99 @@
+.. _algorithms:
+
+**********
+Algorithms
+**********
+
+As NetworkX-ArangoDB is primarily a **Storage Backend** to NetworkX, its primary focus is on persisting and reloading graphs from ArangoDB.
+
+However, running algorithms on the graph is also still possible.
+
+There are 3 ways to run algorithms on the graph:
+
+1. **NetworkX**: The traditional way of running algorithms on Graphs.
+2. **NetworkX-cuGraph**: The GPU-accelerated way of running algorithms on Graphs.
+3. **ArangoDB**: The database way of running algorithms on Graphs.
+
+Currently, Options 1 & 2 are supported, whereas Option 3 is a work-in-progress.
+
+Running algorithms with Option 2 requires ``nx-cugraph`` to be installed on a system with a compatible GPU:
+
+.. code-block::
+
+ pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com
+
+When running algorithms with Option 2, the graph is converted to a ``nx-cugraph`` graph, and the algorithm is run on the GPU.
+
+This is only possible if ``nx-cugraph`` has implemented the algorithm you want to run.
+
+- For a list of algorithms that are supported by ``nx-cugraph``, refer to the `nx-cugraph README `_.
+- For a list of algorithms that are supported by ``networkx``, refer to the `NetworkX Documentation `_.
+
+``nx-arangodb`` will automatically dispatch algorithm calls to either CPU or GPU based on if ``nx-cugraph`` is installed. We rely on a rust-based library called `phenolrs `_ to retrieve ArangoDB Graphs as fast as possible.
+
+You can also force-run algorithms on CPU even if ``nx-cugraph`` is installed:
+
+.. code-block:: python
+
+ import os
+ import networkx as nx
+ import nx_arangodb as nxadb
+
+ # os.environ ...
+
+ G = nxadb.Graph(name="MyGraph")
+
+ nx.config.backends.arangodb.use_gpu = False
+
+ nx.pagerank(G)
+ nx.betweenness_centrality(G)
+ # ...
+
+ nx.config.backends.arangodb.use_gpu = True
+
+
+.. image:: ../_static/dispatch.png
+ :align: center
+ :alt: nx-arangodb dispatching
+ :height: 200px
+
+
+**Tip**: If you're running multiple CPU algorithms, it's recommended to rely on invoking ``nxadb.convert.nxadb_to_nx`` to convert the graph to a NetworkX Graph before running the algorithms.
+This is because we currently load the entire graph into memory before running *each* algorithm, which can be slow for large graphs.
+
+.. code-block:: python
+
+ import networkx as nx
+ import nx_arangodb as nxadb
+
+ G_adb = nxadb.Graph(name="MyGraph")
+
+ G_nx = nxadb.convert.nxadb_to_nx(G)
+
+ nx.pagerank(G_nx)
+ nx.betweenness_centrality(G_nx)
+ # ...
+
+
+**Option 3**
+
+This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs.
+The goal is to provide a set of algorithms that can be delegated to the server for processing,
+rather than having to pull all the data to the client and process it there.
+
+Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`.
+This is simply to demonstrate the potential of the module and to provide a starting point for further development.
+
+.. code-block:: python
+
+ import os
+ import networkx as nx
+ from nx_arangodb as nxadb
+
+ # os.environ ...
+
+ G = nxadb.Graph(name="MyGraph")
+
+ nx.pagerank(G) # Runs on the client
+ nx.shortest_path(G, source="A", target="B") # Runs on the DB server
+ nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client
diff --git a/doc/classes/digraph.rst b/doc/classes/digraph.rst
new file mode 100644
index 00000000..c1b03c11
--- /dev/null
+++ b/doc/classes/digraph.rst
@@ -0,0 +1,89 @@
+.. _digraph:
+
+=======
+DiGraph
+=======
+
+Overview
+========
+.. currentmodule:: nx_arangodb
+.. autoclass:: DiGraph
+ :members: query, chat
+
+
+Methods
+=======
+
+Adding and removing nodes and edges
+-----------------------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ DiGraph.__init__
+ DiGraph.add_node
+ DiGraph.add_nodes_from
+ DiGraph.remove_node
+ DiGraph.remove_nodes_from
+ DiGraph.add_edge
+ DiGraph.add_edges_from
+ DiGraph.add_weighted_edges_from
+ DiGraph.remove_edge
+ DiGraph.remove_edges_from
+ DiGraph.update
+ DiGraph.clear
+ DiGraph.clear_edges
+
+
+
+Reporting nodes edges and neighbors
+-----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ DiGraph.nodes
+ DiGraph.__iter__
+ DiGraph.has_node
+ DiGraph.__contains__
+ DiGraph.edges
+ DiGraph.out_edges
+ DiGraph.in_edges
+ DiGraph.has_edge
+ DiGraph.get_edge_data
+ DiGraph.neighbors
+ DiGraph.adj
+ DiGraph.__getitem__
+ DiGraph.successors
+ DiGraph.succ
+ DiGraph.predecessors
+ DiGraph.pred
+ DiGraph.adjacency
+ DiGraph.nbunch_iter
+
+
+Counting nodes edges and neighbors
+----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ DiGraph.order
+ DiGraph.number_of_nodes
+ DiGraph.__len__
+ DiGraph.degree
+ DiGraph.in_degree
+ DiGraph.out_degree
+ DiGraph.size
+ DiGraph.number_of_edges
+
+
+Making copies and subgraphs
+---------------------------
+.. autosummary::
+ :toctree: generated/
+
+ DiGraph.copy
+ DiGraph.to_undirected
+ DiGraph.to_directed
+ DiGraph.subgraph
+ DiGraph.edge_subgraph
+ DiGraph.reverse
diff --git a/doc/classes/graph.rst b/doc/classes/graph.rst
new file mode 100644
index 00000000..870d975c
--- /dev/null
+++ b/doc/classes/graph.rst
@@ -0,0 +1,81 @@
+.. _graph:
+
+=====
+Graph
+=====
+
+Overview
+========
+.. currentmodule:: nx_arangodb
+.. autoclass:: Graph
+ :members: query, chat
+
+
+Methods
+=======
+
+Adding and removing nodes and edges
+-----------------------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ Graph.__init__
+ Graph.add_node
+ Graph.add_nodes_from
+ Graph.remove_node
+ Graph.remove_nodes_from
+ Graph.add_edge
+ Graph.add_edges_from
+ Graph.add_weighted_edges_from
+ Graph.remove_edge
+ Graph.remove_edges_from
+ Graph.update
+ Graph.clear
+ Graph.clear_edges
+
+
+
+Reporting nodes edges and neighbors
+-----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ Graph.nodes
+ Graph.__iter__
+ Graph.has_node
+ Graph.__contains__
+ Graph.edges
+ Graph.has_edge
+ Graph.get_edge_data
+ Graph.neighbors
+ Graph.adj
+ Graph.__getitem__
+ Graph.adjacency
+ Graph.nbunch_iter
+
+
+
+Counting nodes edges and neighbors
+----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ Graph.order
+ Graph.number_of_nodes
+ Graph.__len__
+ Graph.degree
+ Graph.size
+ Graph.number_of_edges
+
+
+Making copies and subgraphs
+---------------------------
+.. autosummary::
+ :toctree: generated/
+
+ Graph.copy
+ Graph.to_undirected
+ Graph.to_directed
+ Graph.subgraph
+ Graph.edge_subgraph
diff --git a/doc/classes/index.rst b/doc/classes/index.rst
new file mode 100644
index 00000000..b5765295
--- /dev/null
+++ b/doc/classes/index.rst
@@ -0,0 +1,35 @@
+.. _classes:
+
+******
+Graphs
+******
+
+NetworkX provides data structures and methods for storing graphs.
+
+All NetworkX graph classes allow (hashable) Python objects as nodes
+and any Python object can be assigned as an edge attribute.
+
+The choice of graph class depends on the structure of the
+graph you want to represent.
+
+**Which graph class should I use?**
+
++----------------+------------+--------------------+------------------------+
+| Networkx Class | Type | Self-loops allowed | Parallel edges allowed |
++================+============+====================+========================+
+| Graph | undirected | Yes | No |
++----------------+------------+--------------------+------------------------+
+| DiGraph | directed | Yes | No |
++----------------+------------+--------------------+------------------------+
+| MultiGraph | undirected | Yes | Yes |
++----------------+------------+--------------------+------------------------+
+| MultiDiGraph | directed | Yes | Yes |
++----------------+------------+--------------------+------------------------+
+
+.. toctree::
+ :maxdepth: 1
+
+ graph
+ digraph
+ multigraph
+ multidigraph
diff --git a/doc/classes/multidigraph.rst b/doc/classes/multidigraph.rst
new file mode 100644
index 00000000..f62af3fa
--- /dev/null
+++ b/doc/classes/multidigraph.rst
@@ -0,0 +1,90 @@
+.. _multidigraph:
+
+
+============
+MultiDiGraph
+============
+
+Overview
+========
+.. currentmodule:: nx_arangodb
+.. autoclass:: MultiDiGraph
+ :members: query, chat
+
+
+Methods
+=======
+
+Adding and Removing Nodes and Edges
+-----------------------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ MultiDiGraph.__init__
+ MultiDiGraph.add_node
+ MultiDiGraph.add_nodes_from
+ MultiDiGraph.remove_node
+ MultiDiGraph.remove_nodes_from
+ MultiDiGraph.add_edge
+ MultiDiGraph.add_edges_from
+ MultiDiGraph.add_weighted_edges_from
+ MultiDiGraph.new_edge_key
+ MultiDiGraph.remove_edge
+ MultiDiGraph.remove_edges_from
+ MultiDiGraph.update
+ MultiDiGraph.clear
+ MultiDiGraph.clear_edges
+
+
+
+Reporting nodes edges and neighbors
+-----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiDiGraph.nodes
+ MultiDiGraph.__iter__
+ MultiDiGraph.has_node
+ MultiDiGraph.__contains__
+ MultiDiGraph.edges
+ MultiDiGraph.out_edges
+ MultiDiGraph.in_edges
+ MultiDiGraph.has_edge
+ MultiDiGraph.get_edge_data
+ MultiDiGraph.neighbors
+ MultiDiGraph.adj
+ MultiDiGraph.__getitem__
+ MultiDiGraph.successors
+ MultiDiGraph.succ
+ MultiDiGraph.predecessors
+ MultiDiGraph.pred
+ MultiDiGraph.adjacency
+ MultiDiGraph.nbunch_iter
+
+
+Counting nodes edges and neighbors
+----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiDiGraph.order
+ MultiDiGraph.number_of_nodes
+ MultiDiGraph.__len__
+ MultiDiGraph.degree
+ MultiDiGraph.in_degree
+ MultiDiGraph.out_degree
+ MultiDiGraph.size
+ MultiDiGraph.number_of_edges
+
+Making copies and subgraphs
+---------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiDiGraph.copy
+ MultiDiGraph.to_undirected
+ MultiDiGraph.to_directed
+ MultiDiGraph.subgraph
+ MultiDiGraph.edge_subgraph
+ MultiDiGraph.reverse
diff --git a/doc/classes/multigraph.rst b/doc/classes/multigraph.rst
new file mode 100644
index 00000000..2088d7a6
--- /dev/null
+++ b/doc/classes/multigraph.rst
@@ -0,0 +1,81 @@
+.. _multigraph:
+
+==========
+MultiGraph
+==========
+
+Overview
+========
+.. currentmodule:: nx_arangodb
+.. autoclass:: MultiGraph
+ :members: query, chat
+
+Methods
+=======
+
+Adding and removing nodes and edges
+-----------------------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ MultiGraph.__init__
+ MultiGraph.add_node
+ MultiGraph.add_nodes_from
+ MultiGraph.remove_node
+ MultiGraph.remove_nodes_from
+ MultiGraph.add_edge
+ MultiGraph.add_edges_from
+ MultiGraph.add_weighted_edges_from
+ MultiGraph.new_edge_key
+ MultiGraph.remove_edge
+ MultiGraph.remove_edges_from
+ MultiGraph.update
+ MultiGraph.clear
+ MultiGraph.clear_edges
+
+
+
+Reporting nodes edges and neighbors
+-----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiGraph.nodes
+ MultiGraph.__iter__
+ MultiGraph.has_node
+ MultiGraph.__contains__
+ MultiGraph.edges
+ MultiGraph.has_edge
+ MultiGraph.get_edge_data
+ MultiGraph.neighbors
+ MultiGraph.adj
+ MultiGraph.__getitem__
+ MultiGraph.adjacency
+ MultiGraph.nbunch_iter
+
+
+
+Counting nodes edges and neighbors
+----------------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiGraph.order
+ MultiGraph.number_of_nodes
+ MultiGraph.__len__
+ MultiGraph.degree
+ MultiGraph.size
+ MultiGraph.number_of_edges
+
+
+Making copies and subgraphs
+---------------------------
+.. autosummary::
+ :toctree: generated/
+
+ MultiGraph.copy
+ MultiGraph.to_undirected
+ MultiGraph.to_directed
+ MultiGraph.subgraph
+ MultiGraph.edge_subgraph
diff --git a/docs/conf.py b/doc/conf.py
similarity index 86%
rename from docs/conf.py
rename to doc/conf.py
index 6f05d9a7..fe5250b8 100644
--- a/docs/conf.py
+++ b/doc/conf.py
@@ -22,6 +22,8 @@
"sphinx_rtd_theme",
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.inheritance_diagram",
]
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
@@ -32,4 +34,6 @@
html_theme = 'sphinx_rtd_theme'
html_static_path = ['_static']
-autodoc_member_order = "bysource"
\ No newline at end of file
+autodoc_member_order = "bysource"
+autodoc_inherit_docstrings = True
+autosummary_generate = True
diff --git a/doc/dict/adj.rst b/doc/dict/adj.rst
new file mode 100644
index 00000000..e0735fc8
--- /dev/null
+++ b/doc/dict/adj.rst
@@ -0,0 +1,18 @@
+.. _adj:
+
+=========
+Adjacency
+=========
+
+
+.. currentmodule:: nx_arangodb.classes.dict.adj
+.. autoclass:: AdjListOuterDict
+
+.. currentmodule:: nx_arangodb.classes.dict.adj
+.. autoclass:: AdjListInnerDict
+
+.. currentmodule:: nx_arangodb.classes.dict.adj
+.. autoclass:: EdgeKeyDict
+
+.. currentmodule:: nx_arangodb.classes.dict.adj
+.. autoclass:: EdgeAttrDict
\ No newline at end of file
diff --git a/doc/dict/graph.rst b/doc/dict/graph.rst
new file mode 100644
index 00000000..7444012d
--- /dev/null
+++ b/doc/dict/graph.rst
@@ -0,0 +1,12 @@
+.. _graph:
+
+=====
+Graph
+=====
+
+
+.. currentmodule:: nx_arangodb.classes.dict.graph
+.. autoclass:: GraphDict
+
+.. currentmodule:: nx_arangodb.classes.dict.graph
+.. autoclass:: GraphAttrDict
\ No newline at end of file
diff --git a/doc/dict/index.rst b/doc/dict/index.rst
new file mode 100644
index 00000000..6172f236
--- /dev/null
+++ b/doc/dict/index.rst
@@ -0,0 +1,41 @@
+.. _dict:
+
+************
+Dictionaries
+************
+
+The ``dict`` module provides a set of ``UserDict``-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database.
+
+NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions:
+
+1. ``node_dict_factory``
+2. ``node_attr_dict_factory``
+3. ``adjlist_outer_dict_factory``
+4. ``adjlist_inner_dict_factory``
+5. ``edge_key_dict_factory`` (Only for MultiGraphs)
+6. ``edge_attr_dict_factory``
+7. ``graph_attr_dict_factory``
+
+These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself.
+
+This module contains the following classes:
+
+1. ``NodeDict``
+2. ``NodeAttrDict``
+3. ``AdjListOuterDict``
+4. ``AdjListInnerDict``
+5. ``EdgeKeyDict``
+6. ``EdgeAttrDict``
+7. ``GraphDict``
+8. ``GraphAttrDict``
+
+Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the ``UserDict`` class.
+
+By using this strategy in addition to subclassing the ``nx.Graph`` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support.
+
+.. toctree::
+ :maxdepth: 1
+
+ adj
+ node
+ graph
diff --git a/doc/dict/node.rst b/doc/dict/node.rst
new file mode 100644
index 00000000..2b1f061d
--- /dev/null
+++ b/doc/dict/node.rst
@@ -0,0 +1,12 @@
+.. _node:
+
+====
+Node
+====
+
+
+.. currentmodule:: nx_arangodb.classes.dict.node
+.. autoclass:: NodeDict
+
+.. currentmodule:: nx_arangodb.classes.dict.node
+.. autoclass:: NodeAttrDict
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 00000000..ebaff36d
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,115 @@
+nx-arangodb
+============
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+.. image:: https://colab.research.google.com/assets/colab-badge.svg
+ :target: https://colab.research.google.com/github/arangodb/nx-arangodb/blob/main/doc/nx_arangodb.ipynb
+ :alt: Open In Colab
+
+.. image:: https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg
+ :target: https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main
+ :alt: CircleCI
+
+.. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml/badge.svg
+ :target: https://github.com/arangodb/nx-arangodb/actions/workflows/analyzee.yaml
+ :alt: CodeQL
+
+.. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml/badge.svg
+ :target: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml
+ :alt: Docs
+
+.. image:: https://img.shields.io/pypi/v/nx-arangodb?color=3775A9&style=for-the-badge&logo=pypi&logoColor=FFD43B
+ :target: https://pypi.org/project/nx-arangodb/
+ :alt: PyPI version badge
+
+.. image:: https://img.shields.io/badge/3.10%2B-3776AB?style=for-the-badge&logo=python&logoColor=FFD43B&label=Python
+ :target: https://pypi.org/project/nx-arangodb/
+ :alt: Python versions badge
+
+.. image:: https://img.shields.io/static/v1?style=for-the-badge&label=code%20style&message=black&color=black
+ :target: https://github.com/psf/black
+ :alt: Code style: black
+
+.. image:: https://img.shields.io/pepy/dt/nx-arangodb?style=for-the-badge&color=282661
+ :target: https://pepy.tech/project/nx-arangodb
+ :alt: Downloads
+
+This is a `backend to NetworkX `_ that offers `ArangoDB `_ as a `Persistence Layer to NetworkX Graphs `_:
+
+1. Persist NetworkX Graphs to ArangoDB.
+2. Reload NetworkX Graphs from ArangoDB.
+3. Perform CRUD on ArangoDB Graphs via NetworkX.
+4. Run algorithms (CPU & GPU) on ArangoDB Graphs via NetworkX.
+
+Benefits of having ArangoDB as a backend to NetworkX include:
+
+1. No need to re-create the graph every time you start a new session.
+2. Access to GPU-accelerated graph analytics (`nx-cugraph `_).
+3. Access to a database query language (`Arango Query Language `_).
+4. Access to a visual interface for graph exploration (`ArangoDB Web UI `_).
+5. Access to cross-collaboration on the same graph (`ArangoDB Cloud `_).
+6. Access to efficient distribution of graph data (`ArangoDB SmartGraphs `_).
+
+.. image:: ./_static/nxadb.png
+ :align: center
+ :alt: nx-arangodb Diagram
+ :height: 200px
+
+Requirements
+------------
+- Python 3.10+
+- NetworkX 3.0+
+- ArangoDB 3.10+
+
+Installation
+------------
+
+Latest Release
+
+.. code-block::
+
+ pip install nx-arangodb
+
+Current State
+
+.. code-block::
+
+ pip install git+https://github.com/arangodb/nx-arangodb
+
+Contents
+--------
+
+The UX of NetworkX-ArangoDB is similar to that of NetworkX, but with the
+added functionality of persisting graphs to ArangoDB. For an understanding
+of how to use NetworkX, refer to the `NetworkX Documentation `_.
+
+Expect documentation to grow over time:
+
+.. toctree::
+ :maxdepth: 2
+
+ quickstart
+ classes/index
+ dict/index
+ algorithms/index
+ views/index
\ No newline at end of file
diff --git a/docs/make.bat b/doc/make.bat
similarity index 100%
rename from docs/make.bat
rename to doc/make.bat
diff --git a/docs/nx_arangodb.ipynb b/doc/nx_arangodb.ipynb
similarity index 100%
rename from docs/nx_arangodb.ipynb
rename to doc/nx_arangodb.ipynb
diff --git a/doc/quickstart.rst b/doc/quickstart.rst
new file mode 100644
index 00000000..f8bf628d
--- /dev/null
+++ b/doc/quickstart.rst
@@ -0,0 +1,105 @@
+Quickstart
+==========
+
+1. Set up ArangoDB
+2. Set environment variables
+3. Instantiate a NetworkX-ArangoDB Graph
+
+1. Set up ArangoDB
+------------------
+
+**Option A: Local Instance via Docker**
+
+Appears on ``localhost:8529`` with the user ``root`` & password ``openSesame``.
+
+More info: `arangodb.com/download-major `_.
+
+.. code-block:: bash
+
+ docker run -e ARANGO_ROOT_PASSWORD=openSesame -p 8529:8529 arangodb/arangodb
+
+**Option B: ArangoDB Cloud Trial**
+
+`ArangoGraph `_ is ArangoDB's Cloud offering to use ArangoDB as a managed service.
+
+A 14-day trial is available upon sign up.
+
+**Option C: Temporary Cloud Instance via Python**
+
+A temporary cloud database can be provisioned using the `adb-cloud-connector `_ Python package.
+
+.. code-block:: bash
+
+ pip install adb-cloud-connector
+
+.. code-block:: python
+
+ from adb_cloud_connector import get_temp_credentials
+
+ credentials = get_temp_credentials()
+
+ print(credentials)
+
+2. Set environment variables
+----------------------------
+
+Connecting to ArangoDB requires the following environment variables:
+
+1. ``DATABASE_HOST``: The host URL of the ArangoDB instance.
+2. ``DATABASE_USERNAME``: The username to connect to the ArangoDB instance.
+3. ``DATABASE_PASSWORD``: The password to connect to the ArangoDB instance.
+4. ``DATABASE_NAME``: The name of the database to connect to.
+
+For example, using Option 1 from above:
+
+.. code-block:: bash
+
+ export DATABASE_HOST=http://localhost:8529
+ export DATABASE_USERNAME=root
+ export DATABASE_PASSWORD=openSesame
+ export DATABASE_NAME=_system
+
+Or using Option 3 from above:
+
+.. code-block:: python
+
+ import os
+ from adb_cloud_connector import get_temp_credentials
+
+ credentials = get_temp_credentials()
+
+ os.environ["DATABASE_HOST"] = credentials["url"]
+ os.environ["DATABASE_USERNAME"] = credentials["username"]
+ os.environ["DATABASE_PASSWORD"] = credentials["password"]
+ os.environ["DATABASE_NAME"] = credentials["dbName"]
+
+3. Instantiate a NetworkX-ArangoDB Graph
+----------------------------------------
+
+Instantiating a NetworkX-ArangoDB Graph is similar to instantiating a NetworkX Graph.
+
+Providing the ``name`` parameter will create a new graph in ArangoDB if it does not already exist.
+
+Providing the ``incoming_graph_data`` in combination with the ``name`` parameter will create a new graph in ArangoDB
+with the provided data. If the graph already exists, an error will be raised.
+
+.. code-block:: python
+
+ import networkx as nx
+ import nx_arangodb as nxadb
+
+ G = nxadb.Graph(name="MyGraph") # New ArangoDB Graph
+ G2 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph()) # Regular NetworkX Graph
+ G3 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph(), name="KarateGraph") # New ArangoDB Graph
+
+From here, you can use the conventional NetworkX API to interact with the graph.
+
+Assuming you already have a graph in ArangoDB named `MyGraph`, you can reload it as follows:
+
+.. code-block:: python
+
+ import nx_arangodb as nxadb
+
+ G = nxadb.Graph(name="MyGraph")
+
+ print(G.number_of_nodes(), G.number_of_edges())
diff --git a/docs/requirements.txt b/doc/requirements.txt
similarity index 100%
rename from docs/requirements.txt
rename to doc/requirements.txt
diff --git a/doc/views/coreviews.rst b/doc/views/coreviews.rst
new file mode 100644
index 00000000..0f6f06cb
--- /dev/null
+++ b/doc/views/coreviews.rst
@@ -0,0 +1,14 @@
+.. _coreviews:
+
+=========
+Coreviews
+=========
+
+
+.. currentmodule:: nx_arangodb.classes.coreviews
+.. autoclass:: ArangoAdjacencyView
+ :members:
+
+.. currentmodule:: nx_arangodb.classes.coreviews
+.. autoclass:: ArangoAtlasView
+ :members:
diff --git a/doc/views/index.rst b/doc/views/index.rst
new file mode 100644
index 00000000..74c3adb3
--- /dev/null
+++ b/doc/views/index.rst
@@ -0,0 +1,33 @@
+.. _views:
+
+**************
+ArangoDB Views
+**************
+
+Having a database as a backend to NetworkX allows us to delegate
+certain operations to the database.
+
+This can be applied to the concept of NetworkX Views.
+
+Below are a set of experimental overrides of the NetworkX Views that represent the
+nodes and edges of the graph. Overriding these classes allows us to
+implement custom logic for data filtering and updating in the database.
+
+These classes are a work-in-progress. The main goal is to try
+to delegate data processing to ArangoDB, whenever possible.
+
+To use these experimental views, you must set **use_arango_views=True**
+when creating a new graph object:
+
+.. code-block:: python
+
+ import nx_arangodb as nxadb
+
+ G = nxadb.Graph(name="MyGraph", use_arango_views=True)
+
+
+.. toctree::
+ :maxdepth: 1
+
+ coreviews
+ reportviews
\ No newline at end of file
diff --git a/doc/views/reportviews.rst b/doc/views/reportviews.rst
new file mode 100644
index 00000000..bf506539
--- /dev/null
+++ b/doc/views/reportviews.rst
@@ -0,0 +1,22 @@
+.. _reportviews:
+
+===========
+Reportviews
+===========
+
+
+.. currentmodule:: nx_arangodb.classes.reportviews
+.. autoclass:: ArangoNodeView
+ :members:
+
+.. currentmodule:: nx_arangodb.classes.reportviews
+.. autoclass:: ArangoNodeDataView
+ :members:
+
+.. currentmodule:: nx_arangodb.classes.reportviews
+.. autoclass:: ArangoEdgeView
+ :members:
+
+.. currentmodule:: nx_arangodb.classes.reportviews
+.. autoclass:: ArangoEdgeDataView
+ :members:
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 5e1c309d..00000000
--- a/docs/index.rst
+++ /dev/null
@@ -1 +0,0 @@
-Hello World
\ No newline at end of file
diff --git a/nx_arangodb/algorithms/README.md b/nx_arangodb/algorithms/README.md
new file mode 100644
index 00000000..02d78c15
--- /dev/null
+++ b/nx_arangodb/algorithms/README.md
@@ -0,0 +1,21 @@
+# algorithms
+
+This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs. The goal is to provide a set of algorithms that can be delegated to the server for processing, rather than having to pull all the data to the client and process it there.
+
+Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`. This is simply to demonstrate the potential of the module and to provide a starting point for further development.
+
+```python
+import os
+import networkx as nx
+from nx_arangodb as nxadb
+
+# os.environ ...
+
+G = nxadb.Graph(name="MyGraph")
+
+nx.pagerank(G) # Runs on the client
+nx.shortest_path(G, source="A", target="B") # Runs on the DB server
+nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client
+```
+
+As ArangoDB continues to grow its Graph Analytics capabilities, this module will be updated to take advantage of those features. Stay tuned!
\ No newline at end of file
diff --git a/nx_arangodb/algorithms/shortest_paths/generic.py b/nx_arangodb/algorithms/shortest_paths/generic.py
index 7328b257..0cec3ea2 100644
--- a/nx_arangodb/algorithms/shortest_paths/generic.py
+++ b/nx_arangodb/algorithms/shortest_paths/generic.py
@@ -4,7 +4,6 @@
import networkx as nx
import nx_arangodb as nxadb
-from nx_arangodb.exceptions import ShortestPathError
from nx_arangodb.utils import _dtype_param, networkx_algorithm
__all__ = ["shortest_path"]
@@ -22,19 +21,66 @@ def shortest_path(
*,
dtype=None,
):
- """limited version of nx.shortest_path"""
+ """A server-side implementation of the nx.shortest_path algorithm.
- if not G.graph_exists_in_db:
+ This algorithm will invoke the original NetworkX algorithm if one
+ of the following conditions is met:
+ - The graph is not stored in the database.
+ - The method is not 'dijkstra'.
+ - The target or source is not specified.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+
+ source : node, optional
+ Starting node for path. If not specified, compute shortest
+ paths for each possible starting node.
+
+ target : node, optional
+ Ending node for path. If not specified, compute shortest
+ paths to all possible nodes.
+
+ weight : None, string or function, optional (default = None)
+ If None, every edge has weight/distance/cost 1.
+ If a string, use this edge attribute as the edge weight.
+ Any edge attribute not present defaults to 1.
+ If this is a function, the weight of an edge is the value
+ returned by the function. The function must accept exactly
+ three positional arguments: the two endpoints of an edge and
+ the dictionary of edge attributes for that edge.
+ The function must return a number.
+
+ method : string, optional (default = 'dijkstra')
+ The algorithm to use to compute the path.
+ Supported options: 'dijkstra', 'bellman-ford'.
+ Other inputs produce a ValueError.
+ If `weight` is None, unweighted graph methods are used, and this
+ suggestion is ignored.
+
+ Returns
+ -------
+ path : list
+ List of nodes in a shortest path.
+
+ Raises
+ ------
+ NodeNotFound
+ If `source` is not in `G`.
+
+ ValueError
+ If `method` is not among the supported options.
+ """
+
+ graph_does_not_exist = not G.graph_exists_in_db
+ target_or_source_not_specified = target is None or source is None
+ method_not_dijkstra = method != "dijkstra"
+
+ if any([graph_does_not_exist, target_or_source_not_specified, method_not_dijkstra]):
return nx.shortest_path.orig_func(
G, source=source, target=target, weight=weight, method=method
)
- if target is None or source is None:
- raise NotImplementedError("Both source and target must be specified for now")
-
- if method != "dijkstra":
- raise NotImplementedError("Only dijkstra method is supported")
-
if isinstance(source, int):
source = G.nodes[source]["_id"]
diff --git a/nx_arangodb/classes/coreviews.py b/nx_arangodb/classes/coreviews.py
index 794a648e..0df35fcc 100644
--- a/nx_arangodb/classes/coreviews.py
+++ b/nx_arangodb/classes/coreviews.py
@@ -1,16 +1,73 @@
+"""Experimental overrides of the NetworkX Views that represent the
+core data structures such as nested Mappings (e.g. dict-of-dicts).
+
+Overriding these classes allows us to implement custom logic for
+data filtering and updating in the database, instead of in Python.
+
+These classes are a work-in-progress. The main goal is to try
+to delegate data processing to ArangoDB, whenever possible.
+
+To use these experimental views, you must set **use_arango_views=True**
+when creating a new graph object:
+>>> G = nxadb.Graph(name="MyGraph", use_arango_views=True)
+"""
+
import networkx as nx
-class CustomAdjacencyView(nx.classes.coreviews.AdjacencyView):
+class ArangoAdjacencyView(nx.classes.coreviews.AdjacencyView):
+ """The ArangoAdjacencyView class is an experimental subclass of
+ the AdjacencyView class.
+
+ Contrary to the original AdjacencyView class, the ArangoAdjacencyView
+ is writable to allow for bulk updates to the graph in the DB.
+ """
def update(self, data):
+ """Update a set of edges within the graph.
+
+ The benefit of this method is that it allows for bulk API updates,
+ as opposed to `G.add_edges_from`, which currently makes
+ one API request per edge.
+
+ Example
+ -------
+ >>> G = nxadb.Graph(name="MyGraph")
+ >>> G.adj.update(
+ {
+ 'node/1': {
+ 'node/2': {"node_to_node/1", "foo": "bar"},
+ 'node/3': {"node_to_node/2", "foo": "baz"},
+ ...
+ },
+ ...
+ })
+ """
return self._atlas.update(data)
def __getitem__(self, name):
- return CustomAtlasView(self._atlas[name])
+ return ArangoAtlasView(self._atlas[name])
-class CustomAtlasView(nx.classes.coreviews.AtlasView):
+class ArangoAtlasView(nx.classes.coreviews.AtlasView):
+ """The ArangoAtlasView class is an experimental subclass of the
+ AtlasView class.
+
+ Contrary to the original AtlasView class, the ArangoAtlasView is
+ writable to allow for bulk updates to the graph in the DB.
+ """
def update(self, data):
+ """Update a set of edges within the graph for a specific node.
+
+ Example
+ -------
+ >>> G = nxadb.Graph(name="MyGraph")
+ >>> G.adj['node/1'].update(
+ {
+ 'node/2': {"node_to_node/1", "foo": "bar"},
+ 'node/3': {"node_to_node/2", "foo": "baz"},
+ ...
+ })
+ """
return self._atlas.update(data)
diff --git a/nx_arangodb/classes/dict/README.md b/nx_arangodb/classes/dict/README.md
new file mode 100644
index 00000000..289509fd
--- /dev/null
+++ b/nx_arangodb/classes/dict/README.md
@@ -0,0 +1,30 @@
+# dict
+
+The `dict` module provides a set of `UserDict`-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database.
+
+NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions:
+
+1. `node_dict_factory`
+2. `node_attr_dict_factory`
+3. `adjlist_outer_dict_factory`
+4. `adjlist_inner_dict_factory`
+5. `edge_key_dict_factory` (Only for MultiGraphs)
+5. `edge_attr_dict_factory`
+6. `graph_attr_dict_factory`
+
+These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself.
+
+This module contains the following classes:
+
+1. `NodeDict`
+2. `NodeAttrDict`
+3. `AdjListOuterDict`
+4. `AdjListInnerDict`
+5. `EdgeKeyDict`
+6. `EdgeAttrDict`
+7. `GraphDict`
+8. `GraphAttrDict`
+
+Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the `UserDict` class.
+
+By using this strategy in addition to subclassing the `nx.Graph` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support.
\ No newline at end of file
diff --git a/nx_arangodb/classes/dict/adj.py b/nx_arangodb/classes/dict/adj.py
index a97791c9..71268975 100644
--- a/nx_arangodb/classes/dict/adj.py
+++ b/nx_arangodb/classes/dict/adj.py
@@ -4,7 +4,7 @@
from collections import UserDict
from collections.abc import Iterator
from itertools import islice
-from typing import Any, Callable, Dict, List
+from typing import Any, Callable, Dict, List, Union
from arango.database import StandardDatabase
from arango.exceptions import DocumentDeleteError
@@ -32,7 +32,7 @@
aql_edge_get,
aql_edge_id,
aql_fetch_data_edge,
- check_list_for_errors,
+ check_update_list_for_errors,
doc_insert,
doc_update,
edge_get,
@@ -52,6 +52,8 @@
upsert_collection_edges,
)
+AdjDict = Union[GraphAdjDict, DiGraphAdjDict, MultiGraphAdjDict, MultiDiGraphAdjDict]
+
#############
# Factories #
#############
@@ -60,6 +62,7 @@
def edge_attr_dict_factory(
db: StandardDatabase, graph: Graph
) -> Callable[..., EdgeAttrDict]:
+ """Factory function for creating an EdgeAttrDict."""
return lambda: EdgeAttrDict(db, graph)
@@ -71,6 +74,7 @@ def edge_key_dict_factory(
is_directed: bool,
adjlist_inner_dict: AdjListInnerDict | None = None,
) -> Callable[..., EdgeKeyDict]:
+ """Factory function for creating an EdgeKeyDict."""
return lambda: EdgeKeyDict(
db, graph, edge_type_key, edge_type_func, is_directed, adjlist_inner_dict
)
@@ -85,6 +89,7 @@ def adjlist_inner_dict_factory(
graph_type: str,
adjlist_outer_dict: AdjListOuterDict | None = None,
) -> Callable[..., AdjListInnerDict]:
+ """Factory function for creating an AdjListInnerDict."""
return lambda: AdjListInnerDict(
db,
graph,
@@ -105,6 +110,7 @@ def adjlist_outer_dict_factory(
graph_type: str,
symmetrize_edges_if_directed: bool,
) -> Callable[..., AdjListOuterDict]:
+ """Factory function for creating an AdjListOuterDict."""
return lambda: AdjListOuterDict(
db,
graph,
@@ -129,10 +135,17 @@ def build_edge_attr_dict_data(
It's possible that **value** is a nested dict, so we need to
recursively build a EdgeAttrDict for each nested dict.
- :param parent: The parent EdgeAttrDict.
- :type parent: EdgeAttrDict
- :param data: The data to build the EdgeAttrDict from.
- :type data: dict[str, Any]
+ Parameters
+ ----------
+ parent : EdgeAttrDict
+ The parent EdgeAttrDict.
+ data : dict[str, Any]
+ The data to build the EdgeAttrDict from.
+
+ Returns
+ -------
+ dict[str, Any | EdgeAttrDict]
+ The data for the new EdgeAttrDict.
"""
edge_attr_dict_data = {}
for key, value in data.items():
@@ -143,6 +156,25 @@ def build_edge_attr_dict_data(
def process_edge_attr_dict_value(parent: EdgeAttrDict, key: str, value: Any) -> Any:
+ """Process the value of a particular key in an EdgeAttrDict.
+
+ If the value is a dict, then we need to recursively build an EdgeAttrDict.
+ Otherwise, we return the value as is.
+
+ Parameters
+ ----------
+ parent : EdgeAttrDict
+ The parent EdgeAttrDict.
+ key : str
+ The key of the value.
+ value : Any
+ The value to process.
+
+ Returns
+ -------
+ Any
+ The processed value.
+ """
if not isinstance(value, dict):
return value
@@ -161,10 +193,20 @@ class EdgeAttrDict(UserDict[str, Any]):
EdgeAttrDict is keyed by the edge attribute key.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ Examples
+ --------
+ >>> g = nxadb.Graph(name="MyGraph")
+ >>> g.add_edge("node/1", "node/2", foo="bar")
+ >>> g["node/1"]["node/2"]
+ EdgeAttrDict({'foo': 'bar', '_key': ..., '_id': ...})
"""
def __init__(
@@ -179,7 +221,7 @@ def __init__(
self.db = db
self.graph = graph
- self.edge_id: str | None = None
+ self.edge_id: str | None = None # established in __setitem__
# EdgeAttrDict may be a child of another EdgeAttrDict
# e.g G._adj['node/1']['node/2']['object']['foo'] = 'bar'
@@ -191,7 +233,10 @@ def clear(self) -> None:
raise NotImplementedError("Cannot clear EdgeAttrDict")
def copy(self) -> Any:
- return self.data.copy()
+ return {
+ key: value.copy() if hasattr(value, "copy") else value
+ for key, value in self.data.items()
+ }
@key_is_string
def __contains__(self, key: str) -> bool:
@@ -275,10 +320,33 @@ class EdgeKeyDict(UserDict[str, EdgeAttrDict]):
- keys must be ArangoDB Edge IDs
- key-to-edge mapping is 1-to-1
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ edge_type_key : str
+ The key used to store the edge type in the edge attribute dictionary.
+
+ edge_type_func : Callable[[str, str], str]
+ The function to generate the edge type from the source and
+ destination node types.
+
+ is_directed : bool
+ Whether the graph is directed or not.
+
+ adjlist_inner_dict : AdjListInnerDict | None
+ The parent AdjListInnerDict.
+
+ Examples
+ --------
+ >>> g = nxadb.MultiGraph(name="MyGraph")
+ >>> edge_id = g.add_edge("node/1", "node/2", foo="bar")
+ >>> g["node/1"]["node/2"][edge_id]
+ EdgeAttrDict({'foo': 'bar', '_key': ..., '_id': ...})
"""
def __init__(
@@ -352,14 +420,14 @@ def __process_int_edge_key(self, key: int) -> str:
return next(islice(self.data.keys(), key, key + 1))
def __is_valid_edge_outbound(self, edge: dict[str, Any]) -> bool:
- return bool(
- edge["_from"] == self.src_node_id and edge["_to"] == self.dst_node_id
- )
+ a = edge["_from"] == self.src_node_id
+ b = edge["_to"] == self.dst_node_id
+ return bool(a and b)
def __is_valid_edge_inbound(self, edge: dict[str, Any]) -> bool:
- return bool(
- edge["_from"] == self.dst_node_id and edge["_to"] == self.src_node_id
- )
+ a = edge["_from"] == self.dst_node_id
+ b = edge["_to"] == self.src_node_id
+ return bool(a and b)
def __is_valid_edge_any(self, edge: dict[str, Any]) -> bool:
return self.__is_valid_edge_outbound(edge) or self.__is_valid_edge_inbound(edge)
@@ -381,10 +449,15 @@ def __get_mirrored_edge_attr(self, edge_id: str) -> EdgeAttrDict | None:
- The "mirror" is the "reverse" adjlist_outer_dict because
the adjacency list is different in both directions (i.e _pred and _succ)
- :param dst_node_id: The destination node ID.
- :type dst_node_id: str
- :return: The edge attribute dictionary if it exists.
- :rtype: EdgeAttrDict | None
+ Parameters
+ ----------
+ edge_id : str
+ The edge ID.
+
+ Returns
+ -------
+ EdgeAttrDict | None
+ The edge attribute dictionary if it exists.
"""
if self.adjlist_inner_dict is None:
return None
@@ -426,8 +499,10 @@ def __str__(self) -> str:
@key_is_adb_id_or_int
def __contains__(self, key: str | int) -> bool:
"""
- 'edge/1' in G._adj['node/1']['node/2']
- 0 in G._adj['node/1']['node/2']
+ Examples
+ --------
+ >>> 'edge/1' in G._adj['node/1']['node/2']
+ >>> 0 in G._adj['node/1']['node/2']
"""
# HACK: This is a workaround for the fact that
# nxadb.MultiGraph does not yet support custom edge keys
@@ -459,11 +534,17 @@ def __contains__(self, key: str | int) -> bool:
# the entire edge from the database to check if it is valid.
edge_attr_dict = self._create_edge_attr_dict(edge)
self.data[key] = edge_attr_dict
+
return True
@key_is_adb_id_or_int
def __getitem__(self, key: str | int) -> EdgeAttrDict:
- """G._adj['node/1']['node/2']['edge/1']"""
+ """
+ Examples
+ --------
+ >>> G._adj['node/1']['node/2']['edge/1']
+ >>> G._adj['node/1']['node/2'][0]
+ """
# HACK: This is a workaround for the fact that
# nxadb.MultiGraph does not yet support custom edge keys
if key == "-1":
@@ -553,11 +634,18 @@ def __setitem__(self, key: int, edge_attr_dict: EdgeAttrDict) -> None: # type:
# for any nested EdgeAttrDicts within edge_attr_dict
edge_id = edge["_id"]
edge_attr_dict = self._create_edge_attr_dict(edge_data)
+
self.data[edge_id] = edge_attr_dict
+
del self.data[str(key)]
def __delitem__(self, key: str) -> None:
- """del G._adj['node/1']['node/2']['edge/1']"""
+ """
+ Examples
+ --------
+ >>> del G._adj['node/1']['node/2']['edge/1']
+ >>> del G._adj['node/1']['node/2'][0]
+ """
if isinstance(key, int):
key = self.__process_int_edge_key(key)
@@ -704,14 +792,36 @@ class AdjListInnerDict(UserDict[str, EdgeAttrDict | EdgeKeyDict]):
AdjListInnerDict is keyed by the node ID of the destination node.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
- :param default_node_type: The default node type.
- :type default_node_type: str
- :param edge_type_func: The function to generate the edge type.
- :type edge_type_func: Callable[[str, str], str]
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ default_node_type : str
+ The default node type.
+
+ edge_type_key : str
+ The key used to store the edge type in the edge attribute dictionary.
+
+ edge_type_func : Callable[[str, str], str]
+ The function to generate the edge type from the source and
+ destination node types.
+
+ graph_type : str
+ The type of graph (e.g. 'Graph', 'DiGraph', 'MultiGraph', 'MultiDiGraph').
+
+ adjlist_outer_dict : AdjListOuterDict | None
+ The parent AdjListOuterDict.
+
+ Examples
+ --------
+ >>> g = nxadb.Graph(name="MyGraph")
+ >>> g.add_edge("node/1", "node/2", foo="bar")
+ >>> g['node/1']
+ AdjListInnerDict('node/1')
"""
def __init__(
@@ -824,10 +934,15 @@ def __get_mirrored_edge_attr_or_key_dict(
- The "mirror" is the "reverse" adjlist_outer_dict because
the adjacency list is different in both directions (i.e _pred and _succ)
- :param dst_node_id: The destination node ID.
- :type dst_node_id: str
- :return: The edge attribute dictionary if it exists.
- :rtype: EdgeAttrDict | None
+ Parameters
+ ----------
+ dst_node_id : str
+ The destination node ID.
+
+ Returns
+ -------
+ EdgeAttrDict | EdgeKeyDict | None
+ The edge attribute dictionary or key dictionary if it exists.
"""
if self.adjlist_outer_dict is None:
return None
@@ -1205,7 +1320,7 @@ def update(self, edges: dict[str, dict[str, Any]]) -> None:
# perform write to ArangoDB
result = upsert_collection_edges(self.db, to_upsert)
- all_good = check_list_for_errors(result)
+ all_good = check_update_list_for_errors(result)
if all_good:
# Means no single operation failed, in this case we update the local cache
self.__set_adj_elements(edges)
@@ -1315,14 +1430,36 @@ class AdjListOuterDict(UserDict[str, AdjListInnerDict]):
AdjListOuterDict is keyed by the node ID of the source node.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
- :param default_node_type: The default node type.
- :type default_node_type: str
- :param edge_type_func: The function to generate the edge type.
- :type edge_type_func: Callable[[str, str], str]
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ default_node_type : str
+ The default node type.
+
+ edge_type_key : str
+ The key used to store the edge type in the edge attribute dictionary.
+
+ edge_type_func : Callable[[str, str], str]
+ The function to generate the edge type from the source and
+ destination node types.
+
+ graph_type : str
+ The type of graph (e.g. 'Graph', 'DiGraph', 'MultiGraph', 'MultiDiGraph').
+
+ symmetrize_edges_if_directed : bool
+ Whether to add the reverse edge if the graph is directed.
+
+ Example
+ -------
+ >>> g = nxadb.Graph(name="MyGraph")
+ >>> g.add_edge("node/1", "node/2", foo="bar")
+ >>> g._adj
+ AdjListOuterDict('MyGraph')
"""
def __init__(
@@ -1539,7 +1676,7 @@ def update(self, edges: Any) -> None:
)
result = upsert_collection_edges(self.db, separated_by_edge_collection)
- all_good = check_list_for_errors(result)
+ all_good = check_update_list_for_errors(result)
if all_good:
# Means no single operation failed, in this case we update the local cache
self.__set_adj_elements(edges)
@@ -1578,11 +1715,7 @@ def items(self, data: str | None = None, default: Any | None = None) -> Any:
yield from aql_fetch_data_edge(self.db, e_cols, data, default)
def __set_adj_elements(
- self,
- adj_dict: (
- GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict
- ),
- node_dict: NodeDict | None = None,
+ self, adj_dict: AdjDict, node_dict: NodeDict | None = None
) -> None:
def set_edge_graph(
src_node_id: str, dst_node_id: str, edge: dict[str, Any]
diff --git a/nx_arangodb/classes/dict/graph.py b/nx_arangodb/classes/dict/graph.py
index c5cf0786..249cafca 100644
--- a/nx_arangodb/classes/dict/graph.py
+++ b/nx_arangodb/classes/dict/graph.py
@@ -24,12 +24,14 @@
def graph_dict_factory(db: StandardDatabase, graph: Graph) -> Callable[..., GraphDict]:
+ """Factory function for creating a GraphDict."""
return lambda: GraphDict(db, graph)
def graph_attr_dict_factory(
db: StandardDatabase, graph: Graph, graph_id: str
) -> Callable[..., GraphAttrDict]:
+ """Factory function for creating a GraphAttrDict."""
return lambda: GraphAttrDict(db, graph, graph_id)
@@ -41,12 +43,22 @@ def graph_attr_dict_factory(
def build_graph_attr_dict_data(
parent: GraphAttrDict, data: dict[str, Any]
) -> dict[str, Any | GraphAttrDict]:
- """Recursively build a GraphAttrDict from a dict.
+ """Recursively build an GraphAttrDict from a dict.
It's possible that **value** is a nested dict, so we need to
recursively build a GraphAttrDict for each nested dict.
- Returns the parent GraphAttrDict.
+ Parameters
+ ----------
+ parent : GraphAttrDict
+ The parent GraphAttrDict.
+ data : dict[str, Any]
+ The data to build the GraphAttrDict from.
+
+ Returns
+ -------
+ dict[str, Any | GraphAttrDict]
+ The data for the new GraphAttrDict.
"""
graph_attr_dict_data = {}
for key, value in data.items():
@@ -57,6 +69,25 @@ def build_graph_attr_dict_data(
def process_graph_attr_dict_value(parent: GraphAttrDict, key: str, value: Any) -> Any:
+ """Process the value of a particular key in an GraphAttrDict.
+
+ If the value is a dict, then we need to recursively build an GraphAttrDict.
+ Otherwise, we return the value as is.
+
+ Parameters
+ ----------
+ parent : GraphAttrDict
+ The parent GraphAttrDict.
+ key : str
+ The key of the value.
+ value : Any
+ The value to process.
+
+ Returns
+ -------
+ Any
+ The processed value.
+ """
if not isinstance(value, dict):
return value
@@ -73,10 +104,23 @@ class GraphDict(UserDict[str, Any]):
Given that ArangoDB does not have a concept of graph attributes, this class
stores the attributes in a collection with the graph name as the document key.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph_name: The graph name.
- :type graph_name: str
+ For now, the collection is called 'nxadb_graphs'.
+
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ Example
+ -------
+ >>> G = nxadb.Graph(name='MyGraph', foo='bar')
+ >>> G.graph['foo']
+ 'bar'
+ >>> G.graph['foo'] = 'baz'
+ >>> del G.graph['foo']
"""
def __init__(self, db: StandardDatabase, graph: Graph, *args: Any, **kwargs: Any):
@@ -178,12 +222,23 @@ class GraphAttrDict(UserDict[str, Any]):
Only used if the value associated with a GraphDict key is a dict.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
- :param graph_id: The ArangoDB graph ID.
- :type graph_id: str
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph.
+
+ graph_id : str
+ The ArangoDB document ID of the graph.
+
+ Example
+ -------
+ >>> G = nxadb.Graph(name='MyGraph', foo={'bar': 'baz'})
+ >>> G.graph['foo']['bar']
+ 'baz'
+ >>> G.graph['foo']['bar'] = 'qux'
"""
def __init__(
diff --git a/nx_arangodb/classes/dict/node.py b/nx_arangodb/classes/dict/node.py
index e55c5171..0ef179d7 100644
--- a/nx_arangodb/classes/dict/node.py
+++ b/nx_arangodb/classes/dict/node.py
@@ -15,7 +15,7 @@
aql_doc_get_key,
aql_doc_has_key,
aql_fetch_data,
- check_list_for_errors,
+ check_update_list_for_errors,
doc_delete,
doc_insert,
doc_update,
@@ -42,12 +42,14 @@
def node_dict_factory(
db: StandardDatabase, graph: Graph, default_node_type: str
) -> Callable[..., NodeDict]:
+ """Factory function for creating a NodeDict."""
return lambda: NodeDict(db, graph, default_node_type)
def node_attr_dict_factory(
db: StandardDatabase, graph: Graph
) -> Callable[..., NodeAttrDict]:
+ """Factory function for creating a NodeAttrDict."""
return lambda: NodeAttrDict(db, graph)
@@ -64,7 +66,17 @@ def build_node_attr_dict_data(
It's possible that **value** is a nested dict, so we need to
recursively build a NodeAttrDict for each nested dict.
- Returns the parent NodeAttrDict.
+ Parameters
+ ----------
+ parent : NodeAttrDict
+ The parent NodeAttrDict.
+ data : dict[str, Any]
+ The data to build the NodeAttrDict from.
+
+ Returns
+ -------
+ dict[str, Any | NodeAttrDict]
+ The data for the new NodeAttrDict.
"""
node_attr_dict_data = {}
for key, value in data.items():
@@ -75,6 +87,25 @@ def build_node_attr_dict_data(
def process_node_attr_dict_value(parent: NodeAttrDict, key: str, value: Any) -> Any:
+ """Process the value of a particular key in a NodeAttrDict.
+
+ If the value is a dict, then we need to recursively build an NodeAttrDict.
+ Otherwise, we return the value as is.
+
+ Parameters
+ ----------
+ parent : NodeAttrDict
+ The parent NodeAttrDict.
+ key : str
+ The key of the value.
+ value : Any
+ The value to process.
+
+ Returns
+ -------
+ Any
+ The processed value.
+ """
if not isinstance(value, dict):
return value
@@ -91,10 +122,20 @@ class NodeAttrDict(UserDict[str, Any]):
"""The inner-level of the dict of dict structure
representing the nodes (vertices) of a graph.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph object.
+
+ Example
+ -------
+ >>> G = nxadb.Graph("MyGraph")
+ >>> G.add_node('node/1', foo='bar')
+ >>> G.nodes['node/1']['foo']
+ 'bar'
"""
def __init__(self, db: StandardDatabase, graph: Graph, *args: Any, **kwargs: Any):
@@ -197,13 +238,22 @@ class NodeDict(UserDict[str, NodeAttrDict]):
The outer dict is keyed by ArangoDB Vertex IDs and the inner dict
is keyed by Vertex attributes.
- :param db: The ArangoDB database.
- :type db: StandardDatabase
- :param graph: The ArangoDB graph.
- :type graph: Graph
- :param default_node_type: The default node type. Used if the node ID
- is not formatted as 'type/id'.
- :type default_node_type: str
+ Parameters
+ ----------
+ db : arango.database.StandardDatabase
+ The ArangoDB database.
+
+ graph : arango.graph.Graph
+ The ArangoDB graph object.
+
+ default_node_type : str
+ The default node type for the graph.
+
+ Example
+ -------
+ >>> G = nxadb.Graph("MyGraph")
+ >>> G.add_node('node/1', foo='bar')
+ >>> G.nodes
"""
def __init__(
@@ -370,7 +420,7 @@ def update(self, nodes: Any) -> None:
result = upsert_collection_documents(self.db, separated_by_collection)
- all_good = check_list_for_errors(result)
+ all_good = check_update_list_for_errors(result)
if all_good:
# Means no single operation failed, in this case we update the local cache
self.__update_local_nodes(nodes)
diff --git a/nx_arangodb/classes/digraph.py b/nx_arangodb/classes/digraph.py
index ccf7d65f..9477c60c 100644
--- a/nx_arangodb/classes/digraph.py
+++ b/nx_arangodb/classes/digraph.py
@@ -17,6 +17,121 @@
class DiGraph(Graph, nx.DiGraph):
+ """
+ Base class for directed graphs.
+
+ Subclasses ``nxadb.Graph`` and ``nx.DiGraph``.
+
+ In order to connect to an ArangoDB instance, the following environment
+ variables must be set:
+
+ 1. ``DATABASE_HOST``
+ 2. ``DATABASE_USERNAME``
+ 3. ``DATABASE_PASSWORD``
+ 4. ``DATABASE_NAME``
+
+ Furthermore, the ``name`` parameter is required to create a new graph
+ or to connect to an existing graph in the database.
+
+ Example
+ -------
+ >>> import os
+ >>> import networkx as nx
+ >>> import nx_arangodb as nxadb
+ >>>
+ >>> os.environ["DATABASE_HOST"] = "http://localhost:8529"
+ >>> os.environ["DATABASE_USERNAME"] = "root"
+ >>> os.environ["DATABASE_PASSWORD"] = "openSesame"
+ >>> os.environ["DATABASE_NAME"] = "_system"
+ >>>
+ >>> G = nxadb.DiGraph(name="MyGraph")
+ >>> ...
+
+
+ Parameters
+ ----------
+ incoming_graph_data : input graph (optional, default: None)
+ Data to initialize graph. If None (default) an empty
+ graph is created. Must be used in conjunction with **name** if
+ the user wants to persist the graph in ArangoDB. NOTE: It is
+ recommended for incoming_graph_data to be a NetworkX graph due
+ to faster loading times.
+
+ name : str (optional, default: None)
+ Name of the graph in the database. If the graph already exists,
+ the user can pass the name of the graph to connect to it. If
+ the graph does not exist, the user can create a new graph by
+ passing the name. NOTE: Must be used in conjunction with
+ **incoming_graph_data** if the user wants to persist the graph
+ in ArangoDB.
+
+ default_node_type : str (optional, default: None)
+ Default node type for the graph. In ArangoDB terms, this is the
+ default vertex collection. If the graph already exists, the user can
+ omit this parameter and the default node type will be set to the
+ first vertex collection in the graph. If the graph does not exist,
+ the user can pass the default node type to create the default vertex
+ collection.
+
+ edge_type_key : str (optional, default: "_edge_type")
+ Key used to store the edge type when inserting edges into the graph.
+ Useful for working with Heterogeneous Graphs.
+
+ edge_type_func : Callable[[str, str], str] (optional, default: None)
+ Function to determine the edge type between two nodes. If the graph
+ already exists, the user can omit this parameter and the edge type
+ function will be set based on the existing edge definitions. If the
+ graph does not exist, the user can pass a function that determines
+ the edge type between two nodes.
+
+ edge_collections_attributes : set[str] (optional, default: None)
+ Set of edge attributes to fetch when executing a NetworkX algorithm.
+ Useful if the user has edge weights or other edge attributes that
+ they want to use in a NetworkX algorithm.
+
+ db : arango.database.StandardDatabase (optional, default: None)
+ ArangoDB database object. If the user has an existing python-arango
+ connection to the database, they can pass the database object to the graph.
+ If not provided, a database object will be created using the environment
+ variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and
+ DATABASE_NAME.
+
+ read_parallelism : int (optional, default: 10)
+ Number of parallel threads to use when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ read_batch_size : int (optional, default: 100000)
+ Number of documents to fetch in a single batch when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ write_batch_size : int (optional, default: 50000)
+ Number of documents to insert in a single batch when writing data to ArangoDB.
+ Used for inserting node and edge data into the database if and only if
+ **incoming_graph_data** is a NetworkX graph.
+
+ write_async : bool (optional, default: True)
+ Whether to insert data into ArangoDB asynchronously. Used for inserting
+ node and edge data into the database if and only if **incoming_graph_data**
+ is a NetworkX graph.
+
+ symmetrize_edges : bool (optional, default: False)
+ Whether to symmetrize the edges in the graph when fetched from the database.
+ Only applies to directed graphs, thereby converting them to undirected graphs.
+
+ use_arango_views : bool (optional, default: False)
+ Whether to use experimental work-in-progress ArangoDB Views for the
+ nodes, adjacency list, and edges. These views are designed to improve
+ data processing performance by delegating CRUD operations to the database
+ whenever possible. NOTE: This feature is experimental and may not work
+ as expected.
+
+ args: positional arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+
+ kwargs: keyword arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+ """
+
__networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2
__networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2
@@ -38,7 +153,7 @@ def __init__(
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
- use_experimental_views: bool = False,
+ use_arango_views: bool = False,
*args: Any,
**kwargs: Any,
):
@@ -55,7 +170,7 @@ def __init__(
write_batch_size,
write_async,
symmetrize_edges,
- use_experimental_views,
+ use_arango_views,
*args,
**kwargs,
)
diff --git a/nx_arangodb/classes/function.py b/nx_arangodb/classes/function.py
index c9b73822..993db901 100644
--- a/nx_arangodb/classes/function.py
+++ b/nx_arangodb/classes/function.py
@@ -1,12 +1,12 @@
"""
-A collection of CRUD functions for the ArangoDB graph database.
-Used by the nx_arangodb Graph, DiGraph, MultiGraph, and MultiDiGraph classes.
+A collection of CRUD functions for ArangoDB Graphs.
+
+Used across the nx_arangodb package to interact with ArangoDB.
"""
from __future__ import annotations
-from collections import UserDict
-from typing import Any, Callable, Generator, Optional, Tuple
+from typing import Any, Callable, Generator, Tuple
import networkx as nx
from arango import ArangoError, DocumentInsertError
@@ -33,12 +33,7 @@
from ..exceptions import AQLMultipleResultsFound, InvalidTraversalDirection
from .enum import GraphType
-
-def do_load_all_edge_attributes(attributes: set[str]) -> bool:
- if len(attributes) == 0:
- return True
-
- return False
+RESERVED_KEYS = {"_id", "_key", "_rev", "_from", "_to"}
def get_arangodb_graph(
@@ -61,15 +56,74 @@ def get_arangodb_graph(
ArangoIDtoIndex,
EdgeValuesDict,
]:
- """Pulls the graph from the database, assuming the graph exists.
-
- Returns the following representations:
- - Node dictionary (nx.Graph)
- - Adjacency dictionary (nx.Graph)
- - Source Indices (COO)
- - Destination Indices (COO)
- - Node-ID-to-index mapping (COO)
+ """Pulls ArangoDB Graph Data from the database using
+ `phenolrs.networkx.NetworkXLoader`.
+
+ Parameters
+ ----------
+ adb_graph : Graph
+ The ArangoDB Graph object from python-arango.
+
+ load_node_dict : bool
+ Whether to load the Node dictionary representation.
+
+ load_adj_dict : bool
+ Whether to load the Adjacency dictionary representation.
+
+ load_coo : bool
+ Whether to load the COO representation.
+
+ edge_collections_attributes : set[str]
+ The set of edge attributes to load. Can be empty.
+
+ load_all_vertex_attributes : bool
+ Whether to load all vertex attributes.
+
+ load_all_edge_attributes : bool
+ Whether to load all edge attributes. Cannot be True if
+ **edge_collections_attributes** is not empty.
+
+ is_directed : bool
+ Whether to load the graph as directed or undirected.
+
+ is_multigraph : bool
+ Whether to load the graph as a MultiGraph or Graph.
+
+ symmetrize_edges_if_directed : bool
+ Whether to duplicate edges in the adjacency dictionary if the graph is directed.
+
+ Returns
+ -------
+ Tuple[
+ NodeDict,
+ GraphAdjDict | DiGraphAdjDict | MultiGraphAdjDict | MultiDiGraphAdjDict,
+ SrcIndices,
+ DstIndices,
+ EdgeIndices,
+ ArangoIDtoIndex,
+ EdgeValuesDict
+ ]
+ A tuple containing the different representations of the graph.
+
+ Raises
+ ------
+ ValueError
+ If **load_all_edge_attributes** is True and
+ **edge_collections_attributes** is not empty.
+
+ ValueError
+ If none of the load flags are True.
+
+ PhenolrsError
+ If an error occurs while loading the graph.
"""
+ if len(edge_collections_attributes) != 0 and load_all_edge_attributes:
+ raise ValueError(
+ "You have specified to load at least one specific edge attribute"
+ " and at the same time set the parameter `load_all_edge_attributes`"
+ " to true. This combination is not allowed."
+ )
+
v_cols = adb_graph.vertex_collections()
edge_definitions = adb_graph.edge_definitions()
e_cols = {c["edge_collection"] for c in edge_definitions}
@@ -94,21 +148,6 @@ def get_arangodb_graph(
assert config.username
assert config.password
- res_do_load_all_edge_attributes = do_load_all_edge_attributes(
- edge_collections_attributes
- )
-
- if res_do_load_all_edge_attributes is not load_all_edge_attributes:
- if len(edge_collections_attributes) > 0:
- raise ValueError(
- "You have specified to load at least one specific edge attribute"
- " and at the same time set the parameter `load_all_vertex_attributes`"
- " to true. This combination is not allowed."
- )
- else:
- # We need this case as the user wants by purpose to not load any edge data
- res_do_load_all_edge_attributes = load_all_edge_attributes
-
(
node_dict,
adj_dict,
@@ -126,7 +165,7 @@ def get_arangodb_graph(
load_adj_dict=load_adj_dict,
load_coo=load_coo,
load_all_vertex_attributes=load_all_vertex_attributes,
- load_all_edge_attributes=res_do_load_all_edge_attributes,
+ load_all_edge_attributes=load_all_edge_attributes,
is_directed=is_directed,
is_multigraph=is_multigraph,
symmetrize_edges_if_directed=symmetrize_edges_if_directed,
@@ -146,6 +185,10 @@ def get_arangodb_graph(
def json_serializable(cls):
+ """Decorator to make a class JSON serializable. Only used for
+ the NodeAttrDict, EdgeAttrDict, and GraphAttrDict classes.
+ """
+
def to_dict(self):
return {
key: dict(value) if isinstance(value, cls) else value
@@ -157,10 +200,11 @@ def to_dict(self):
def key_is_string(func: Callable[..., Any]) -> Any:
- """Decorator to check if the key is a string."""
+ """Decorator to check if the key is a string.
+ Will attempt to cast the key to a string if it is not.
+ """
def wrapper(self: Any, key: Any, *args: Any, **kwargs: Any) -> Any:
- """"""
if key is None:
raise ValueError("Key cannot be None.")
@@ -210,7 +254,9 @@ def wrapper(self: Any, key: Any, *args: Any, **kwargs: Any) -> Any:
def keys_are_strings(func: Callable[..., Any]) -> Any:
- """Decorator to check if the keys are strings."""
+ """Decorator to check if the keys are strings.
+ Will attempt to cast the keys to strings if they are not.
+ """
def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any:
data_dict = {}
@@ -237,9 +283,6 @@ def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any:
return wrapper
-RESERVED_KEYS = {"_id", "_key", "_rev", "_from", "_to"}
-
-
def key_is_not_reserved(func: Callable[..., Any]) -> Any:
"""Decorator to check if the key is not reserved."""
@@ -255,9 +298,7 @@ def wrapper(self: Any, key: str, *args: Any, **kwargs: Any) -> Any:
def keys_are_not_reserved(func: Any) -> Any:
"""Decorator to check if the keys are not reserved."""
- def wrapper(
- self: Any, data: dict[Any, Any] | zip[Any], *args: Any, **kwargs: Any
- ) -> Any:
+ def wrapper(self: Any, data: Any, *args: Any, **kwargs: Any) -> Any:
keys: Any
if isinstance(data, dict):
keys = data.keys()
@@ -304,6 +345,7 @@ def aql_single(
) -> Any | None:
"""Executes an AQL query and returns the first result."""
result = aql_as_list(db, query, bind_vars)
+
if len(result) == 0:
return None
@@ -374,6 +416,7 @@ def aql_edge_exists(
graph_name: str,
direction: str,
) -> bool | None:
+ """Checks if an edge exists between two nodes."""
return aql_edge(
db,
src_node_id,
@@ -394,6 +437,7 @@ def aql_edge_get(
direction: str,
can_return_multiple: bool = False,
) -> Any | None:
+ """Gets an edge between two nodes."""
return_clause = "UNSET(e, '_rev')"
if direction == "ANY":
return_clause = f"DISTINCT {return_clause}"
@@ -418,6 +462,7 @@ def aql_edge_id(
direction: str,
can_return_multiple: bool = False,
) -> Any | None:
+ """Gets the edge ID between two nodes."""
return_clause = "DISTINCT e._id" if direction == "ANY" else "e._id"
return aql_edge(
db,
@@ -437,6 +482,7 @@ def aql_edge_count_src(
graph_name: str,
direction: str,
) -> int:
+ """Counts the number of edges from a source node."""
query = f"""
FOR v, e IN 1..1 {direction} @src_node_id GRAPH @graph_name
COLLECT id = e._id
@@ -461,6 +507,7 @@ def aql_edge_count_src_dst(
graph_name: str,
direction: str,
) -> int:
+ """Counts the number of edges between two nodes."""
filter_clause = aql_edge_direction_filter(direction)
query = f"""
@@ -483,6 +530,7 @@ def aql_edge_count_src_dst(
def aql_edge_direction_filter(direction: str) -> str:
+ """Returns the AQL filter clause for the edge direction."""
if direction == "INBOUND":
return "e._from == @dst_node_id"
if direction == "OUTBOUND":
@@ -505,6 +553,7 @@ def aql_edge(
limit_one: bool,
can_return_multiple: bool,
) -> Any | None:
+ """Fetches an edge between two nodes."""
if limit_one and can_return_multiple:
raise ValueError("Cannot return multiple results limit_one=True.")
@@ -537,6 +586,7 @@ def aql_fetch_data(
data: str,
default: Any,
) -> Generator[dict[str, Any], None, None]:
+ """Fetches data from a collection (assumed to be vertex)."""
bind_vars = {"data": data, "default": default}
query = """
FOR doc IN @@collection
@@ -554,6 +604,7 @@ def aql_fetch_data_edge(
data: str,
default: Any,
) -> Generator[tuple[str, str, Any], None, None]:
+ """Fetches data from an edge collection."""
bind_vars = {"data": data, "default": default}
query = """
FOR doc IN @@collection
@@ -581,6 +632,7 @@ def doc_delete(db: StandardDatabase, id: str, **kwargs: Any) -> None:
def edges_delete(
db: StandardDatabase, graph: Graph, src_node_id: str, **kwargs: Any
) -> None:
+ """Deletes all edges from a source node."""
remove_statements = "\n".join(
f"REMOVE e IN `{edge_def['edge_collection']}` OPTIONS {{ignoreErrors: true}}" # noqa
for edge_def in graph.edge_definitions()
@@ -656,11 +708,12 @@ def edge_link(
def is_arangodb_id(key):
+ """Checks if the key is an ArangoDB ID."""
return "/" in key
def get_node_type(key: str, default_node_type: str) -> str:
- """Gets the node type."""
+ """Gets the collection of a node."""
return key.split("/")[0] if is_arangodb_id(key) else default_node_type
@@ -670,7 +723,7 @@ def get_node_id(key: str, default_node_type: str) -> str:
def get_node_type_and_id(key: str, default_node_type: str) -> tuple[str, str]:
- """Gets the node type and ID."""
+ """Gets the node collection (i.e type) and ID."""
return (
(key.split("/")[0], key)
if is_arangodb_id(key)
@@ -690,6 +743,9 @@ def get_node_type_and_key(key: str, default_node_type: str) -> tuple[str, str]:
def get_update_dict(
parent_keys: list[str], update_dict: dict[str, Any]
) -> dict[str, Any]:
+ """Builds the update dictionary for nested documents.
+ Useful for updating nested documents in ArangoDB.
+ """
if parent_keys:
for key in reversed(parent_keys):
update_dict = {key: update_dict}
@@ -698,6 +754,8 @@ def get_update_dict(
class ArangoDBBatchError(ArangoError):
+ """Custom exception for batch errors."""
+
def __init__(self, errors):
self.errors = errors
super().__init__(self._format_errors())
@@ -706,11 +764,11 @@ def _format_errors(self):
return "\n".join(str(error) for error in self.errors)
-def check_list_for_errors(lst):
+def check_update_list_for_errors(lst):
+ """Checks if a list contains any errors."""
for element in lst:
- if element is type(bool):
- if element is False:
- return False
+ if element is False:
+ return False
elif isinstance(element, list):
for sub_element in element:
@@ -722,18 +780,12 @@ def check_list_for_errors(lst):
def separate_nodes_by_collections(
nodes: dict[str, Any], default_collection: str
-) -> Any:
- """
- Separate the dictionary into collections based on whether keys contain '/'.
- :param nodes:
- The input dictionary with keys that may or may not contain '/'.
- :param default_collection:
- The name of the default collection for keys without '/'.
- :return: A dictionary where the keys are collection names and the
- values are dictionaries of key-value pairs belonging to those
- collections.
+) -> dict[str, dict[str, Any]]:
+ """Separate the dictionary into collections based on whether IDs contain '/'.
+ Returns dictionary where the keys are collection names and the values are
+ dictionaries of key-value pairs belonging to those collections.
"""
- separated: Any = {}
+ separated: dict[str, dict[str, Any]] = {}
for key, value in nodes.items():
collection, doc_key = get_node_type_and_key(key, default_collection)
@@ -746,15 +798,14 @@ def separate_nodes_by_collections(
return separated
-def transform_local_documents_for_adb(original_documents):
+def transform_local_documents_for_adb(
+ original_documents: dict[str, Any]
+) -> list[dict[str, Any]]:
+ """Transform original documents into a format suitable for UPSERT
+ operations in ArangoDB. Returns a list of documents with '_key' attribute
+ and additional attributes.
"""
- Transform original documents into a format suitable for UPSERT
- operations in ArangoDB.
- :param original_documents: Original documents in the format
- {'key': {'any-attr-key': 'any-attr-value'}}.
- :return: List of documents with '_key' attribute and additional attributes.
- """
- transformed_documents = []
+ transformed_documents: list[dict[str, Any]] = []
for key, values in original_documents.items():
transformed_doc = {"_key": key}
@@ -764,18 +815,13 @@ def transform_local_documents_for_adb(original_documents):
return transformed_documents
-def upsert_collection_documents(db: StandardDatabase, separated: Any) -> Any:
- """
- Process each collection in the separated dictionary.
- :param db: The ArangoDB database object.
- :param separated: A dictionary where the keys are collection names and the
- values are dictionaries
- of key-value pairs belonging to those collections.
- :return: A list of results from the insert_many operation.
- If inserting a document fails, the exception is not raised but
- returned as an object in the result list.
+def upsert_collection_documents(
+ db: StandardDatabase, separated: dict[str, dict[str, Any]]
+) -> list[Any]:
+ """Process each collection in the separated dictionary.
+ If inserting a document fails, the exception is not raised but
+ returned as an object in the result list.
"""
-
results = []
for collection_name, documents in separated.items():
@@ -790,15 +836,14 @@ def upsert_collection_documents(db: StandardDatabase, separated: Any) -> Any:
return results
-def separate_edges_by_collections_graph(edges: Any, default_node_type: str) -> Any:
+def separate_edges_by_collections_graph(
+ edges: GraphAdjDict, default_node_type: str
+) -> dict[str, list[dict[str, Any]]]:
+ """Separate the dictionary into collections for Graph and DiGraph types.
+ Returns a dictionary where the keys are collection names and the
+ values are dictionaries of key-value pairs belonging to those collections.
"""
- Separate the dictionary into collections for Graph and DiGraph types.
- :param edges: The input dictionary with keys that must contain the real doc id.
- :param default_node_type: The name of the default collection for keys without '/'.
- :return: A dictionary where the keys are collection names and the
- values are dictionaries of key-value pairs belonging to those collections.
- """
- separated: Any = {}
+ separated: dict[str, list[dict[str, Any]]] = {}
for from_doc_id, target_dict in edges.items():
for to_doc_id, edge_doc in target_dict.items():
@@ -818,15 +863,15 @@ def separate_edges_by_collections_graph(edges: Any, default_node_type: str) -> A
return separated
-def separate_edges_by_collections_multigraph(edges: Any, default_node_type: str) -> Any:
+def separate_edges_by_collections_multigraph(
+ edges: MultiGraphAdjDict, default_node_type: str
+) -> Any:
"""
Separate the dictionary into collections for MultiGraph and MultiDiGraph types.
- :param edges: The input dictionary with keys that must contain the real doc id.
- :param default_node_type: The name of the default collection for keys without '/'.
- :return: A dictionary where the keys are collection names and the
- values are dictionaries of key-value pairs belonging to those collections.
+ Returns a dictionary where the keys are collection names and the
+ values are dictionaries of key-value pairs belonging to those collections.
"""
- separated: Any = {}
+ separated: dict[str, list[dict[str, Any]]] = {}
for from_doc_id, target_dict in edges.items():
for to_doc_id, edge_doc in target_dict.items():
@@ -849,15 +894,12 @@ def separate_edges_by_collections_multigraph(edges: Any, default_node_type: str)
def separate_edges_by_collections(
- edges: Any, graph_type: str, default_node_type: str
+ edges: GraphAdjDict | MultiGraphAdjDict, graph_type: str, default_node_type: str
) -> Any:
"""
Wrapper function to separate the dictionary into collections based on graph type.
- :param edges: The input dictionary with keys that must contain the real doc id.
- :param graph_type: The type of graph to create.
- :param default_node_type: The name of the default collection for keys without '/'.
- :return: A dictionary where the keys are collection names and the
- values are dictionaries of key-value pairs belonging to those collections.
+ Returns a dictionary where the keys are collection names and the
+ values are dictionaries of key-value pairs belonging to those collections.
"""
if graph_type in [GraphType.Graph.name, GraphType.DiGraph.name]:
return separate_edges_by_collections_graph(edges, default_node_type)
@@ -867,16 +909,13 @@ def separate_edges_by_collections(
raise ValueError(f"Unsupported graph type: {graph_type}")
-def upsert_collection_edges(db: StandardDatabase, separated: Any) -> Any:
- """
- Process each collection in the separated dictionary.
- :param db: The ArangoDB database object.
- :param separated: A dictionary where the keys are collection names and the
- values are dictionaries
- of key-value pairs belonging to those collections.
- :return: A list of results from the insert_many operation.
- If inserting a document fails, the exception is not raised but
- returned as an object in the result list.
+def upsert_collection_edges(
+ db: StandardDatabase, separated: dict[str, list[dict[str, Any]]]
+) -> Any:
+ """Process each collection in the separated dictionary.
+ Returns a list of results from the insert_many operation.
+ If inserting a document fails, the exception is not raised but
+ returned as an object in the result list.
"""
results = []
diff --git a/nx_arangodb/classes/graph.py b/nx_arangodb/classes/graph.py
index 2bb23831..9cadb069 100644
--- a/nx_arangodb/classes/graph.py
+++ b/nx_arangodb/classes/graph.py
@@ -18,7 +18,7 @@
)
from nx_arangodb.logger import logger
-from .coreviews import CustomAdjacencyView
+from .coreviews import ArangoAdjacencyView
from .dict import (
adjlist_inner_dict_factory,
adjlist_outer_dict_factory,
@@ -28,7 +28,7 @@
node_dict_factory,
)
from .function import get_node_id
-from .reportviews import CustomEdgeView, CustomNodeView
+from .reportviews import ArangoEdgeView, ArangoNodeView
networkx_api = nxadb.utils.decorators.networkx_class(nx.Graph) # type: ignore
@@ -49,6 +49,121 @@ class BaseLanguageModel: # type: ignore[no-redef]
class Graph(nx.Graph):
+ """
+ Base class for undirected graphs. Designed to work with ArangoDB graphs.
+
+ Subclasses ``nx.Graph``.
+
+ In order to connect to an ArangoDB instance, the following environment
+ variables must be set:
+
+ 1. ``DATABASE_HOST``
+ 2. ``DATABASE_USERNAME``
+ 3. ``DATABASE_PASSWORD``
+ 4. ``DATABASE_NAME``
+
+ Furthermore, the ``name`` parameter is required to create a new graph
+ or to connect to an existing graph in the database.
+
+ Example
+ -------
+ >>> import os
+ >>> import networkx as nx
+ >>> import nx_arangodb as nxadb
+ >>>
+ >>> os.environ["DATABASE_HOST"] = "http://localhost:8529"
+ >>> os.environ["DATABASE_USERNAME"] = "root"
+ >>> os.environ["DATABASE_PASSWORD"] = "openSesame"
+ >>> os.environ["DATABASE_NAME"] = "_system"
+ >>>
+ >>> G = nxadb.Graph(name="MyGraph")
+ >>> ...
+
+
+ Parameters
+ ----------
+ incoming_graph_data : input graph (optional, default: None)
+ Data to initialize graph. If None (default) an empty
+ graph is created. Must be used in conjunction with **name** if
+ the user wants to persist the graph in ArangoDB. NOTE: It is
+ recommended for incoming_graph_data to be a NetworkX graph due
+ to faster loading times.
+
+ name : str (optional, default: None)
+ Name of the graph in the database. If the graph already exists,
+ the user can pass the name of the graph to connect to it. If
+ the graph does not exist, the user can create a new graph by
+ passing the name. NOTE: Must be used in conjunction with
+ **incoming_graph_data** if the user wants to persist the graph
+ in ArangoDB.
+
+ default_node_type : str (optional, default: None)
+ Default node type for the graph. In ArangoDB terms, this is the
+ default vertex collection. If the graph already exists, the user can
+ omit this parameter and the default node type will be set to the
+ first vertex collection in the graph. If the graph does not exist,
+ the user can pass the default node type to create the default vertex
+ collection.
+
+ edge_type_key : str (optional, default: "_edge_type")
+ Key used to store the edge type when inserting edges into the graph.
+ Useful for working with Heterogeneous Graphs.
+
+ edge_type_func : Callable[[str, str], str] (optional, default: None)
+ Function to determine the edge type between two nodes. If the graph
+ already exists, the user can omit this parameter and the edge type
+ function will be set based on the existing edge definitions. If the
+ graph does not exist, the user can pass a function that determines
+ the edge type between two nodes.
+
+ edge_collections_attributes : set[str] (optional, default: None)
+ Set of edge attributes to fetch when executing a NetworkX algorithm.
+ Useful if the user has edge weights or other edge attributes that
+ they want to use in a NetworkX algorithm.
+
+ db : arango.database.StandardDatabase (optional, default: None)
+ ArangoDB database object. If the user has an existing python-arango
+ connection to the database, they can pass the database object to the graph.
+ If not provided, a database object will be created using the environment
+ variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and
+ DATABASE_NAME.
+
+ read_parallelism : int (optional, default: 10)
+ Number of parallel threads to use when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ read_batch_size : int (optional, default: 100000)
+ Number of documents to fetch in a single batch when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ write_batch_size : int (optional, default: 50000)
+ Number of documents to insert in a single batch when writing data to ArangoDB.
+ Used for inserting node and edge data into the database if and only if
+ **incoming_graph_data** is a NetworkX graph.
+
+ write_async : bool (optional, default: True)
+ Whether to insert data into ArangoDB asynchronously. Used for inserting
+ node and edge data into the database if and only if **incoming_graph_data**
+ is a NetworkX graph.
+
+ symmetrize_edges : bool (optional, default: False)
+ Whether to symmetrize the edges in the graph when fetched from the database.
+ Only applies to directed graphs, thereby converting them to undirected graphs.
+
+ use_arango_views : bool (optional, default: False)
+ Whether to use experimental work-in-progress ArangoDB Views for the
+ nodes, adjacency list, and edges. These views are designed to improve
+ data processing performance by delegating CRUD operations to the database
+ whenever possible. NOTE: This feature is experimental and may not work
+ as expected.
+
+ args: positional arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+
+ kwargs: keyword arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+ """
+
__networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2
__networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2
@@ -70,13 +185,13 @@ def __init__(
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
- use_experimental_views: bool = False,
+ use_arango_views: bool = False,
*args: Any,
**kwargs: Any,
):
self.__db = None
self.__name = None
- self.__use_experimental_views = use_experimental_views
+ self.__use_arango_views = use_arango_views
self.__graph_exists_in_db = False
self.__set_db(db)
@@ -313,7 +428,7 @@ def __set_graph_name(self, name: Any = None) -> None:
m = "Cannot set graph name without setting the database first"
raise DatabaseNotSet(m)
- if name is None:
+ if not name:
self.__graph_exists_in_db = False
logger.warning(f"**name** not set for {self.__class__.__name__}")
return
@@ -378,6 +493,27 @@ def clear_nxcg_cache(self):
def query(
self, query: str, bind_vars: dict[str, Any] = {}, **kwargs: Any
) -> Cursor:
+ """Execute an AQL query on the graph.
+
+ Read more about AQL here:
+ https://www.arangodb.com/docs/stable/aql/
+
+ Parameters
+ ----------
+ query : str
+ AQL query to execute.
+
+ bind_vars : dict[str, Any] (optional, default: {})
+ Bind variables to pass to the query.
+
+ kwargs : dict[str, Any]
+ Additional keyword arguments to pass to the query.
+
+ Returns
+ -------
+ arango.cursor.Cursor
+ Cursor object containing the results of the query.
+ """
return nxadb.classes.function.aql(self.db, query, bind_vars, **kwargs)
# def pull(self) -> None:
@@ -389,6 +525,26 @@ def query(
def chat(
self, prompt: str, verbose: bool = False, llm: BaseLanguageModel | None = None
) -> str:
+ """Chat with the graph using an LLM. Use at your own risk.
+
+ Parameters
+ ----------
+ prompt : str
+ Prompt to chat with the graph.
+
+ verbose : bool (optional, default: False)
+ Whether to print the intermediate steps of the conversation.
+
+ llm : langchain_core.language_models.BaseLanguageModel (optional, default: None)
+ Language model to use for the conversation. If None, the default
+ language model is ChatOpenAI with the GPT-4 model, which expects the
+ OpenAI API key to be set in the environment variable OPENAI_API_KEY.
+
+ Returns
+ -------
+ str
+ Response from the Language Model.
+ """
if not LLM_AVAILABLE:
m = "LLM dependencies not installed. Install with **pip install nx-arangodb[llm]**" # noqa: E501
raise ModuleNotFoundError(m)
@@ -416,33 +572,33 @@ def chat(
@cached_property
def nodes(self):
- if self.__use_experimental_views and self.graph_exists_in_db:
- logger.warning("nxadb.CustomNodeView is currently EXPERIMENTAL")
- return CustomNodeView(self)
+ if self.__use_arango_views and self.graph_exists_in_db:
+ logger.warning("nxadb.ArangoNodeView is currently EXPERIMENTAL")
+ return ArangoNodeView(self)
return super().nodes
@cached_property
def adj(self):
- if self.__use_experimental_views and self.graph_exists_in_db:
- logger.warning("nxadb.CustomAdjacencyView is currently EXPERIMENTAL")
- return CustomAdjacencyView(self._adj)
+ if self.__use_arango_views and self.graph_exists_in_db:
+ logger.warning("nxadb.ArangoAdjacencyView is currently EXPERIMENTAL")
+ return ArangoAdjacencyView(self._adj)
return super().adj
@cached_property
def edges(self):
- if self.__use_experimental_views and self.graph_exists_in_db:
+ if self.__use_arango_views and self.graph_exists_in_db:
if self.is_directed():
- logger.warning("CustomEdgeView for DiGraphs not yet implemented")
+ logger.warning("ArangoEdgeView for DiGraphs not yet implemented")
return super().edges
if self.is_multigraph():
- logger.warning("CustomEdgeView for MultiGraphs not yet implemented")
+ logger.warning("ArangoEdgeView for MultiGraphs not yet implemented")
return super().edges
- logger.warning("nxadb.CustomEdgeView is currently EXPERIMENTAL")
- return CustomEdgeView(self)
+ logger.warning("nxadb.ArangoEdgeView is currently EXPERIMENTAL")
+ return ArangoEdgeView(self)
return super().edges
diff --git a/nx_arangodb/classes/multidigraph.py b/nx_arangodb/classes/multidigraph.py
index fe25eb93..dc05e592 100644
--- a/nx_arangodb/classes/multidigraph.py
+++ b/nx_arangodb/classes/multidigraph.py
@@ -14,6 +14,134 @@
class MultiDiGraph(MultiGraph, DiGraph, nx.MultiDiGraph):
+ """
+ A directed graph class that can store multiedges.
+
+ Subclasses ``nxadb.MultiGraph``, ``nxadb.Digraph``, and ``nx.MultiGraph``.
+
+ In order to connect to an ArangoDB instance, the following environment
+ variables must be set:
+
+ 1. ``DATABASE_HOST``
+ 2. ``DATABASE_USERNAME``
+ 3. ``DATABASE_PASSWORD``
+ 4. ``DATABASE_NAME``
+
+ Furthermore, the ``name`` parameter is required to create a new graph
+ or to connect to an existing graph in the database.
+
+ Example
+ -------
+ >>> import os
+ >>> import networkx as nx
+ >>> import nx_arangodb as nxadb
+ >>>
+ >>> os.environ["DATABASE_HOST"] = "http://localhost:8529"
+ >>> os.environ["DATABASE_USERNAME"] = "root"
+ >>> os.environ["DATABASE_PASSWORD"] = "openSesame"
+ >>> os.environ["DATABASE_NAME"] = "_system"
+ >>>
+ >>> G = nxadb.DiGraph(name="MyGraph")
+ >>> ...
+
+
+ Parameters
+ ----------
+ incoming_graph_data : input graph (optional, default: None)
+ Data to initialize graph. If None (default) an empty
+ graph is created. Must be used in conjunction with **name** if
+ the user wants to persist the graph in ArangoDB. NOTE: It is
+ recommended for incoming_graph_data to be a NetworkX graph due
+ to faster loading times.
+
+ multigraph_input : bool or None (default None)
+ Note: Only used when `incoming_graph_data` is a dict.
+ If True, `incoming_graph_data` is assumed to be a
+ dict-of-dict-of-dict-of-dict structure keyed by
+ node to neighbor to edge keys to edge data for multi-edges.
+ A NetworkXError is raised if this is not the case.
+ If False, :func:`to_networkx_graph` is used to try to determine
+ the dict's graph data structure as either a dict-of-dict-of-dict
+ keyed by node to neighbor to edge data, or a dict-of-iterable
+ keyed by node to neighbors.
+ If None, the treatment for True is tried, but if it fails,
+ the treatment for False is tried.
+
+ name : str (optional, default: None)
+ Name of the graph in the database. If the graph already exists,
+ the user can pass the name of the graph to connect to it. If
+ the graph does not exist, the user can create a new graph by
+ passing the name. NOTE: Must be used in conjunction with
+ **incoming_graph_data** if the user wants to persist the graph
+ in ArangoDB.
+
+ default_node_type : str (optional, default: None)
+ Default node type for the graph. In ArangoDB terms, this is the
+ default vertex collection. If the graph already exists, the user can
+ omit this parameter and the default node type will be set to the
+ first vertex collection in the graph. If the graph does not exist,
+ the user can pass the default node type to create the default vertex
+ collection.
+
+ edge_type_key : str (optional, default: "_edge_type")
+ Key used to store the edge type when inserting edges into the graph.
+ Useful for working with Heterogeneous Graphs.
+
+ edge_type_func : Callable[[str, str], str] (optional, default: None)
+ Function to determine the edge type between two nodes. If the graph
+ already exists, the user can omit this parameter and the edge type
+ function will be set based on the existing edge definitions. If the
+ graph does not exist, the user can pass a function that determines
+ the edge type between two nodes.
+
+ edge_collections_attributes : set[str] (optional, default: None)
+ Set of edge attributes to fetch when executing a NetworkX algorithm.
+ Useful if the user has edge weights or other edge attributes that
+ they want to use in a NetworkX algorithm.
+
+ db : arango.database.StandardDatabase (optional, default: None)
+ ArangoDB database object. If the user has an existing python-arango
+ connection to the database, they can pass the database object to the graph.
+ If not provided, a database object will be created using the environment
+ variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and
+ DATABASE_NAME.
+
+ read_parallelism : int (optional, default: 10)
+ Number of parallel threads to use when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ read_batch_size : int (optional, default: 100000)
+ Number of documents to fetch in a single batch when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ write_batch_size : int (optional, default: 50000)
+ Number of documents to insert in a single batch when writing data to ArangoDB.
+ Used for inserting node and edge data into the database if and only if
+ **incoming_graph_data** is a NetworkX graph.
+
+ write_async : bool (optional, default: True)
+ Whether to insert data into ArangoDB asynchronously. Used for inserting
+ node and edge data into the database if and only if **incoming_graph_data**
+ is a NetworkX graph.
+
+ symmetrize_edges : bool (optional, default: False)
+ Whether to symmetrize the edges in the graph when fetched from the database.
+ Only applies to directed graphs, thereby converting them to undirected graphs.
+
+ use_arango_views : bool (optional, default: False)
+ Whether to use experimental work-in-progress ArangoDB Views for the
+ nodes, adjacency list, and edges. These views are designed to improve
+ data processing performance by delegating CRUD operations to the database
+ whenever possible. NOTE: This feature is experimental and may not work
+ as expected.
+
+ args: positional arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+
+ kwargs: keyword arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+ """
+
__networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2
__networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2
@@ -36,7 +164,7 @@ def __init__(
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
- use_experimental_views: bool = False,
+ use_arango_views: bool = False,
*args: Any,
**kwargs: Any,
):
@@ -54,7 +182,7 @@ def __init__(
write_batch_size,
write_async,
symmetrize_edges,
- use_experimental_views,
+ use_arango_views,
*args,
**kwargs,
)
diff --git a/nx_arangodb/classes/multigraph.py b/nx_arangodb/classes/multigraph.py
index 4f6d6b79..07c30b7f 100644
--- a/nx_arangodb/classes/multigraph.py
+++ b/nx_arangodb/classes/multigraph.py
@@ -15,6 +15,134 @@
class MultiGraph(Graph, nx.MultiGraph):
+ """
+ An undirected graph class that can store multiedges.
+
+ Subclasses ``nxadb.Graph`` and ``nx.MultiGraph``.
+
+ In order to connect to an ArangoDB instance, the following environment
+ variables must be set:
+
+ 1. ``DATABASE_HOST``
+ 2. ``DATABASE_USERNAME``
+ 3. ``DATABASE_PASSWORD``
+ 4. ``DATABASE_NAME``
+
+ Furthermore, the ``name`` parameter is required to create a new graph
+ or to connect to an existing graph in the database.
+
+ Example
+ -------
+ >>> import os
+ >>> import networkx as nx
+ >>> import nx_arangodb as nxadb
+ >>>
+ >>> os.environ["DATABASE_HOST"] = "http://localhost:8529"
+ >>> os.environ["DATABASE_USERNAME"] = "root"
+ >>> os.environ["DATABASE_PASSWORD"] = "openSesame"
+ >>> os.environ["DATABASE_NAME"] = "_system"
+ >>>
+ >>> G = nxadb.DiGraph(name="MyGraph")
+ >>> ...
+
+
+ Parameters
+ ----------
+ incoming_graph_data : input graph (optional, default: None)
+ Data to initialize graph. If None (default) an empty
+ graph is created. Must be used in conjunction with **name** if
+ the user wants to persist the graph in ArangoDB. NOTE: It is
+ recommended for incoming_graph_data to be a NetworkX graph due
+ to faster loading times.
+
+ multigraph_input : bool or None (default None)
+ Note: Only used when `incoming_graph_data` is a dict.
+ If True, `incoming_graph_data` is assumed to be a
+ dict-of-dict-of-dict-of-dict structure keyed by
+ node to neighbor to edge keys to edge data for multi-edges.
+ A NetworkXError is raised if this is not the case.
+ If False, :func:`to_networkx_graph` is used to try to determine
+ the dict's graph data structure as either a dict-of-dict-of-dict
+ keyed by node to neighbor to edge data, or a dict-of-iterable
+ keyed by node to neighbors.
+ If None, the treatment for True is tried, but if it fails,
+ the treatment for False is tried.
+
+ name : str (optional, default: None)
+ Name of the graph in the database. If the graph already exists,
+ the user can pass the name of the graph to connect to it. If
+ the graph does not exist, the user can create a new graph by
+ passing the name. NOTE: Must be used in conjunction with
+ **incoming_graph_data** if the user wants to persist the graph
+ in ArangoDB.
+
+ default_node_type : str (optional, default: None)
+ Default node type for the graph. In ArangoDB terms, this is the
+ default vertex collection. If the graph already exists, the user can
+ omit this parameter and the default node type will be set to the
+ first vertex collection in the graph. If the graph does not exist,
+ the user can pass the default node type to create the default vertex
+ collection.
+
+ edge_type_key : str (optional, default: "_edge_type")
+ Key used to store the edge type when inserting edges into the graph.
+ Useful for working with Heterogeneous Graphs.
+
+ edge_type_func : Callable[[str, str], str] (optional, default: None)
+ Function to determine the edge type between two nodes. If the graph
+ already exists, the user can omit this parameter and the edge type
+ function will be set based on the existing edge definitions. If the
+ graph does not exist, the user can pass a function that determines
+ the edge type between two nodes.
+
+ edge_collections_attributes : set[str] (optional, default: None)
+ Set of edge attributes to fetch when executing a NetworkX algorithm.
+ Useful if the user has edge weights or other edge attributes that
+ they want to use in a NetworkX algorithm.
+
+ db : arango.database.StandardDatabase (optional, default: None)
+ ArangoDB database object. If the user has an existing python-arango
+ connection to the database, they can pass the database object to the graph.
+ If not provided, a database object will be created using the environment
+ variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and
+ DATABASE_NAME.
+
+ read_parallelism : int (optional, default: 10)
+ Number of parallel threads to use when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ read_batch_size : int (optional, default: 100000)
+ Number of documents to fetch in a single batch when reading data from ArangoDB.
+ Used for fetching node and edge data from the database.
+
+ write_batch_size : int (optional, default: 50000)
+ Number of documents to insert in a single batch when writing data to ArangoDB.
+ Used for inserting node and edge data into the database if and only if
+ **incoming_graph_data** is a NetworkX graph.
+
+ write_async : bool (optional, default: True)
+ Whether to insert data into ArangoDB asynchronously. Used for inserting
+ node and edge data into the database if and only if **incoming_graph_data**
+ is a NetworkX graph.
+
+ symmetrize_edges : bool (optional, default: False)
+ Whether to symmetrize the edges in the graph when fetched from the database.
+ Only applies to directed graphs, thereby converting them to undirected graphs.
+
+ use_arango_views : bool (optional, default: False)
+ Whether to use experimental work-in-progress ArangoDB Views for the
+ nodes, adjacency list, and edges. These views are designed to improve
+ data processing performance by delegating CRUD operations to the database
+ whenever possible. NOTE: This feature is experimental and may not work
+ as expected.
+
+ args: positional arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+
+ kwargs: keyword arguments for nx.Graph
+ Additional arguments passed to nx.Graph.
+ """
+
__networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2
__networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2
@@ -37,7 +165,7 @@ def __init__(
write_batch_size: int = 50000,
write_async: bool = True,
symmetrize_edges: bool = False,
- use_experimental_views: bool = False,
+ use_arango_views: bool = False,
*args: Any,
**kwargs: Any,
):
@@ -54,7 +182,7 @@ def __init__(
write_batch_size,
write_async,
symmetrize_edges,
- use_experimental_views,
+ use_arango_views,
*args,
**kwargs,
)
diff --git a/nx_arangodb/classes/reportviews.py b/nx_arangodb/classes/reportviews.py
index 9741272b..ae8bff1d 100644
--- a/nx_arangodb/classes/reportviews.py
+++ b/nx_arangodb/classes/reportviews.py
@@ -1,6 +1,15 @@
-"""
-An override of the NodeView, NodeDataView, EdgeView, and EdgeDataView classes
-to allow for custom data filtering in the database instead of in Python.
+"""Experimental overrides of the NetworkX Views that represent the
+nodes and edges of the graph.
+
+Overriding these classes allows us to implement custom logic for
+data filtering and updating in the database, instead of in Python.
+
+These classes are a work-in-progress. The main goal is to try
+to delegate data processing to ArangoDB, whenever possible.
+
+To use these experimental views, you must set **use_arango_views=True**
+when creating a new graph object:
+>>> G = nxadb.Graph(name="MyGraph", use_arango_views=True)
"""
from __future__ import annotations
@@ -9,25 +18,86 @@
import nx_arangodb as nxadb
-from .function import get_node_id
+class ArangoNodeView(nx.classes.reportviews.NodeView):
+ """The ArangoNodeView class is an experimental subclass of the
+ NodeView class.
-class CustomNodeView(nx.classes.reportviews.NodeView):
+ Contrary to the original NodeView class, the ArangoNodeView is
+ writable to allow for bulk updates to the graph in the DB.
+ """
+
+ # DataView method
def __call__(self, data=False, default=None):
if data is False:
return self
- return CustomNodeDataView(self._nodes, data, default)
+ return ArangoNodeDataView(self._nodes, data, default)
def data(self, data=True, default=None):
+ """Return a read-only view of node data.
+
+ Parameters
+ ----------
+ data : bool or node data key, default=True
+ If ``data=True`` (the default), return a `NodeDataView` object that
+ maps each node to *all* of its attributes. `data` may also be an
+ arbitrary key, in which case the `NodeDataView` maps each node to
+ the value for the keyed attribute. In this case, if a node does
+ not have the `data` attribute, the `default` value is used.
+ default : object, default=None
+ The value used when a node does not have a specific attribute.
+
+ Returns
+ -------
+ NodeDataView
+ The layout of the returned NodeDataView depends on the value of the
+ `data` parameter.
+
+ Notes
+ -----
+ If ``data=False``, returns a `NodeView` object without data.
+
+ See Also
+ --------
+ NodeDataView
+ """
if data is False:
return self
- return CustomNodeDataView(self._nodes, data, default)
+ return ArangoNodeDataView(self._nodes, data, default)
def update(self, data):
+ """Update a set of nodes within the graph.
+
+ The benefit of this method is that it allows for bulk API updates,
+ as opposed to `G.add_nodes_from`, which currently makes
+ one API request per node.
+
+ Example
+ -------
+ >>> G = nxadb.Graph(name="MyGraph")
+ >>> G.nodes.update(
+ {
+ 'node/1': {"node/1", "foo": "bar"},
+ 'node/2': {"node/2", "foo": "baz"},
+ ...
+ })
+ """
return self._nodes.update(data)
-class CustomNodeDataView(nx.classes.reportviews.NodeDataView):
+class ArangoNodeDataView(nx.classes.reportviews.NodeDataView):
+ """The ArangoNodeDataView class is an experimental subclass of the
+ NodeDataView class.
+
+ The main use for this class is to iterate through node-data pairs.
+ The data can be the entire data-dictionary for each node, or it
+ can be a specific attribute (with default) for each node.
+
+ In the event that the data is a specific attribute, the data is
+ filtered server-side, instead of in Python. This is done by using
+ the ArangoDB Query Language (AQL) to filter the data.
+ """
+
def __iter__(self):
data = self._data
if data is False:
@@ -54,13 +124,23 @@ def __iter__(self):
###########################
-class CustomEdgeDataView(nx.classes.reportviews.EdgeDataView):
+class ArangoEdgeDataView(nx.classes.reportviews.EdgeDataView):
+ """The ArangoEdgeDataView class is an experimental subclass of the
+ EdgeDataView class.
- ######################
- # NOTE: Monkey Patch #
- ######################
+ This view is primarily used to iterate over the edges reporting
+ edges as node-tuples with edge data optionally reported.
+
+ In the event that the data is a specific attribute, the data is
+ filtered server-side, instead of in Python. This is done by using
+ the ArangoDB Query Language (AQL) to filter the data.
+ """
def __iter__(self):
+ ######################
+ # NOTE: Monkey Patch #
+ ######################
+
if self._nbunch is None and self._data not in [None, True, False]:
# Reason: We can utilize AQL to filter the data we
# want to return, instead of filtering it in Python
@@ -70,14 +150,21 @@ def __iter__(self):
# is the AdjListOuterDict object that has a custom
# items() method that can filter data with AQL.
- # Filter for self._data server-side
yield from self._adjdict.items(data=self._data, default=self._default)
else:
yield from super().__iter__()
-class CustomEdgeView(nx.classes.reportviews.EdgeView):
- dataview = CustomEdgeDataView
+class ArangoEdgeView(nx.classes.reportviews.EdgeView):
+ """The ArangoEdgeView class is an experimental subclass of the
+ EdgeView class.
+
+ The __len__ method is overridden to count the number of edges
+ in the graph by querying the database, instead of iterating
+ through the edges in Python.
+ """
+
+ dataview = ArangoEdgeDataView
def __len__(self):
diff --git a/nx_arangodb/convert.py b/nx_arangodb/convert.py
index 17458b90..09cfa973 100644
--- a/nx_arangodb/convert.py
+++ b/nx_arangodb/convert.py
@@ -1,3 +1,21 @@
+"""Functions to convert between NetworkX, NetworkX-ArangoDB,
+and NetworkX-cuGraph.
+
+Examples
+--------
+>>> import networkx as nx
+>>> import nx_arangodb as nxadb
+>>> import nx_cugraph as nxcg
+>>>
+>>> G = nx.Graph()
+>>> G.add_edge(1, 2, weight=3.0)
+>>> G.add_edge(2, 3, weight=7.5)
+>>>
+>>> G_ADB = nxadb.convert._to_nxadb_graph(G)
+>>> G_CG = nxadb.convert._to_nxcg_graph(G_ADB)
+>>> G_NX = nxadb.convert._to_nx_graph(G_ADB)
+"""
+
from __future__ import annotations
import time
@@ -8,7 +26,6 @@
import nx_arangodb as nxadb
from nx_arangodb.classes.dict.adj import AdjListOuterDict
from nx_arangodb.classes.dict.node import NodeDict
-from nx_arangodb.classes.function import do_load_all_edge_attributes
from nx_arangodb.logger import logger
try:
@@ -29,6 +46,22 @@
def _to_nx_graph(G: Any, *args: Any, **kwargs: Any) -> nx.Graph:
+ """Convert a graph to a NetworkX graph.
+
+ Parameters
+ ----------
+ G : Any
+ The graph to convert.
+
+ Currently supported types:
+ - nx.Graph
+ - nxadb.Graph
+
+ Returns
+ -------
+ nx.Graph
+ The converted graph.
+ """
logger.debug(f"_to_nx_graph for {G.__class__.__name__}")
if isinstance(G, nxadb.Graph):
@@ -41,11 +74,28 @@ def _to_nx_graph(G: Any, *args: Any, **kwargs: Any) -> nx.Graph:
def _to_nxadb_graph(
- G: Any,
- *args: Any,
- as_directed: bool = False,
- **kwargs: Any,
+ G: Any, *args: Any, as_directed: bool = False, **kwargs: Any
) -> nxadb.Graph:
+ """Convert a graph to a NetworkX-ArangoDB graph.
+
+ Parameters
+ ----------
+ G : Any
+ The graph to convert.
+
+ Currently supported types:
+ - nx.Graph
+ - nxadb.Graph
+
+ as_directed : bool, optional
+ Whether to convert the graph to a directed graph.
+ Default is False.
+
+ Returns
+ -------
+ nxadb.Graph
+ The converted graph.
+ """
logger.debug(f"_to_nxadb_graph for {G.__class__.__name__}")
if isinstance(G, nxadb.Graph):
@@ -60,6 +110,28 @@ def _to_nxadb_graph(
if GPU_AVAILABLE:
def _to_nxcg_graph(G: Any, as_directed: bool = False) -> nxcg.Graph:
+ """Convert a graph to a NetworkX-cuGraph graph.
+
+ NOTE: Only supported if NetworkX-cuGraph is installed.
+
+ Parameters
+ ----------
+ G : Any
+ The graph to convert.
+
+ Currently supported types:
+ - nxadb.Graph
+ - nxcg.Graph
+
+ as_directed : bool, optional
+ Whether to convert the graph to a directed graph.
+ Default is False.
+
+ Returns
+ -------
+ nxcg.Graph
+ The converted graph.
+ """
logger.debug(f"_to_nxcg_graph for {G.__class__.__name__}")
if isinstance(G, nxcg.Graph):
@@ -87,8 +159,23 @@ def nx_to_nxadb(
*args: Any,
as_directed: bool = False,
**kwargs: Any,
- # name: str | None = None,
) -> nxadb.Graph:
+ """Convert a NetworkX graph to a NetworkX-ArangoDB graph.
+
+ Parameters
+ ----------
+ graph : nx.Graph
+ The NetworkX graph to convert.
+
+ as_directed : bool, optional
+ Whether to convert the graph to a directed graph.
+ Default is False.
+
+ Returns
+ -------
+ nxadb.Graph
+ The converted graph.
+ """
logger.debug(f"from_networkx for {graph.__class__.__name__}")
klass: type[nxadb.Graph]
@@ -104,11 +191,48 @@ def nx_to_nxadb(
else:
klass = nxadb.Graph
- # name=kwargs.get("name") ?
return klass(incoming_graph_data=graph)
def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph:
+ """Convert a NetworkX-ArangoDB graph to a NetworkX graph.
+
+ This function will pull the graph from the database if it does
+ not exist in the cache. A new NetworkX graph will be created
+ using the node and adjacency dictionaries that are fetched.
+
+ NOTE: The current downside of this approach is that we are not
+ able to take advantage of the custom Dictionary classes that we
+ have implemented in nx_arangodb.classes.dict. This is because
+ the node and adjacency dictionaries are fetched as regular
+ Python dictionaries. Furthermore, we don't cache the dictionaries
+ themselves, so we have to fetch them every time we convert the
+ graph, which is currently being invoked on *every* algorithm
+ call. See the note below for a potential solution. As a temporary
+ workaround, users can do the following:
+
+ ```
+ import networkx as nx
+ import nx_arangodb as nxadb
+
+ G_ADB = nxadb.Graph(name="MyGraph") # Connect to the graph
+ G_NX = nxadb.convert._to_nx_graph(G_ADB) # Pull the graph
+
+ nx.pagerank(G_NX)
+ nx.betweenness_centrality(G_NX)
+ ...
+ ```
+
+ Parameters
+ ----------
+ G : nxadb.Graph
+ The NetworkX-ArangoDB graph to convert.
+
+ Returns
+ -------
+ nx.Graph
+ The converted graph.
+ """
if not G.graph_exists_in_db:
# Since nxadb.Graph is a subclass of nx.Graph, we can return it as is.
# This only applies if the graph does not exist in the database.
@@ -128,7 +252,7 @@ def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph:
load_coo=False,
edge_collections_attributes=G.edge_attributes,
load_all_vertex_attributes=False,
- load_all_edge_attributes=do_load_all_edge_attributes(G.edge_attributes),
+ load_all_edge_attributes=len(G.edge_attributes) == 0,
is_directed=G.is_directed(),
is_multigraph=G.is_multigraph(),
symmetrize_edges_if_directed=G.symmetrize_edges if G.is_directed() else False,
@@ -163,6 +287,27 @@ def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph:
if GPU_AVAILABLE:
def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph:
+ """Convert a NetworkX-ArangoDB graph to a NetworkX-cuGraph graph.
+
+ This function will pull the graph from the database if it does
+ not exist in the cache. A new NetworkX-cuGraph graph will be
+ created using the COO format that is fetched. The created graph
+ will be cached in the nxadb.Graph object for future use.
+
+ Parameters
+ ----------
+ G : nxadb.Graph
+ The NetworkX-ArangoDB graph to convert.
+
+ as_directed : bool, optional
+ Whether to convert the graph to a directed graph.
+ Default is False.
+
+ Returns
+ -------
+ nxcg.Graph
+ The converted graph.
+ """
if G.use_nxcg_cache and G.nxcg_graph is not None:
m = "**use_nxcg_cache** is enabled. using cached NXCG Graph. no pull required." # noqa
logger.debug(m)
@@ -186,7 +331,7 @@ def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph:
load_coo=True,
edge_collections_attributes=G.edge_attributes,
load_all_vertex_attributes=False, # not used
- load_all_edge_attributes=do_load_all_edge_attributes(G.edge_attributes),
+ load_all_edge_attributes=len(G.edge_attributes) == 0,
is_directed=G.is_directed(),
is_multigraph=G.is_multigraph(),
symmetrize_edges_if_directed=(
diff --git a/nx_arangodb/exceptions.py b/nx_arangodb/exceptions.py
index 4c72402e..35e538e4 100644
--- a/nx_arangodb/exceptions.py
+++ b/nx_arangodb/exceptions.py
@@ -30,10 +30,6 @@ class ArangoDBAlgorithmError(NetworkXArangoDBException):
pass
-class ShortestPathError(ArangoDBAlgorithmError):
- pass
-
-
class MultipleEdgesFound(NetworkXArangoDBException):
pass
diff --git a/tests/test.py b/tests/test.py
index 58ea73f8..6a191434 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -397,7 +397,7 @@ def test_node_dict_update_existing_single_collection(
) -> None:
# This tests uses the existing nodes and updates each
# of them using the update method using a single collection
- G_1 = nxadb.Graph(name="KarateGraph", foo="bar", use_experimental_views=True)
+ G_1 = nxadb.Graph(name="KarateGraph", foo="bar", use_arango_views=True)
nodes_ids_list = G_1.nodes
local_nodes_dict = {}
@@ -447,9 +447,7 @@ def test_node_dict_update_multiple_collections(
assert db.collection(e_1_name).count() == 0
assert db.collection(e_2_name).count() == 0
- G_1 = graph_cls(
- name=graph_name, default_node_type=v_1_name, use_experimental_views=True
- )
+ G_1 = graph_cls(name=graph_name, default_node_type=v_1_name, use_arango_views=True)
assert len(G_1.nodes) == 0
assert len(G_1.edges) == 0
@@ -489,7 +487,7 @@ def test_node_dict_update_multiple_collections(
def test_edge_adj_dict_update_existing_single_collection_graph_and_digraph(
load_karate_graph: Any, graph_cls: type[nxadb.Graph]
) -> None:
- G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True)
+ G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True)
local_adj = G_1.adj
local_edges_dict: Union[GraphAdjDict | DiGraphAdjDict] = {}
@@ -563,7 +561,7 @@ def test_edge_adj_dict_update_existing_single_collection_graph_and_digraph(
def test_edge_adj_dict_update_existing_single_collection_MultiGraph_and_MultiDiGraph(
load_karate_graph: Any, graph_cls: type[nxadb.Graph]
) -> None:
- G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True)
+ G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True)
local_adj = G_1.adj
local_edges_dict: Union[MultiGraphAdjDict | MultiDiGraphAdjDict] = {}
@@ -647,7 +645,7 @@ def test_edge_dict_update_multiple_collections(load_two_relation_graph: Any) ->
assert db.collection(e_2_name).count() == 0
G_1 = nxadb.Graph(
- name=graph_name, default_node_type=v_1_name, use_experimental_views=True
+ name=graph_name, default_node_type=v_1_name, use_arango_views=True
)
assert len(G_1.nodes) == 0
assert len(G_1.edges) == 0
@@ -704,7 +702,7 @@ def test_edge_dict_update_multiple_collections(load_two_relation_graph: Any) ->
def test_edge_adj_inner_dict_update_existing_single_collection(
load_karate_graph: Any, graph_cls: type[nxadb.Graph]
) -> None:
- G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True)
+ G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True)
local_adj = G_1.adj
local_inner_edges_dict: GraphAdjDict = {}
@@ -749,7 +747,7 @@ def test_edge_adj_inner_dict_update_existing_single_collection(
def test_edge_adj_inner_dict_update_existing_single_collection_multi_graphs(
load_karate_graph: Any, graph_cls: type[nxadb.Graph]
) -> None:
- G_1 = graph_cls(name="KarateGraph", foo="bar", use_experimental_views=True)
+ G_1 = graph_cls(name="KarateGraph", foo="bar", use_arango_views=True)
local_adj = G_1.adj
local_inner_edges_dict: GraphAdjDict = {}