diff --git a/.github/workflows/analyze.yml b/.github/workflows/analyze.yml index 25ddf32..c4c5db7 100644 --- a/.github/workflows/analyze.yml +++ b/.github/workflows/analyze.yml @@ -37,7 +37,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 64c7ee5..bc3f9ff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,9 +1,8 @@ name: build on: workflow_dispatch: - push: - branches: [ master ] pull_request: + push: branches: [ master ] env: PACKAGE_DIR: adbdgl_adapter @@ -13,34 +12,47 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.7", "3.8", "3.9"] + python: ["3.8", "3.9", "3.10", "3.11"] name: Python ${{ matrix.python }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} + cache: 'pip' + cache-dependency-path: setup.py + - name: Set up ArangoDB Instance via Docker - run: docker create --name adb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb:3.9.1 + run: docker create --name adb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb + - name: Start ArangoDB Instance run: docker start adb + - name: Setup pip run: python -m pip install --upgrade pip setuptools wheel + - name: Install packages run: pip install .[dev] + - name: Run black run: black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} + - name: Run flake8 run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} + - name: Run isort run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} + - name: Run mypy run: mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} + - name: Run pytest run: pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes + - name: Publish to coveralls.io - if: matrix.python == '3.8' + if: matrix.python == '3.10' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: coveralls --service=github \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index eb8b8a2..ac040fc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,76 +3,34 @@ on: workflow_dispatch: release: types: [published] -env: - PACKAGE_DIR: adbdgl_adapter - TESTS_DIR: tests jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python: ["3.7", "3.8", "3.9"] - name: Python ${{ matrix.python }} - steps: - - uses: actions/checkout@v2 - - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python }} - - name: Set up ArangoDB Instance via Docker - run: docker create --name adb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb:3.9.1 - - name: Start ArangoDB Instance - run: docker start adb - - name: Setup pip - run: python -m pip install --upgrade pip setuptools wheel - - name: Install packages - run: pip install .[dev] - - name: Run black - run: black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run flake8 - run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run isort - run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run mypy - run: mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run pytest - run: pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes - - name: Publish to coveralls.io - if: matrix.python == '3.8' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coveralls --service=github - release: - needs: build runs-on: ubuntu-latest name: Release package steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch complete history for all tags and branches run: git fetch --prune --unshallow - - name: Setup python - uses: actions/setup-python@v2 + - name: Setup Python + uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.10" - name: Install release packages run: pip install setuptools wheel twine setuptools-scm[toml] - - name: Install dependencies - run: pip install .[dev] - - name: Build distribution run: python setup.py sdist bdist_wheel - - name: Publish to PyPI Test + - name: Publish to Test PyPi env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }} run: twine upload --repository testpypi dist/* #--skip-existing - - name: Publish to PyPI + + - name: Publish to PyPi env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} @@ -83,7 +41,7 @@ jobs: runs-on: ubuntu-latest name: Update Changelog steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -95,10 +53,10 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Setup python - uses: actions/setup-python@v2 + - name: Setup Python + uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.10" - name: Install release packages run: pip install wheel gitchangelog pystache @@ -110,12 +68,12 @@ jobs: run: gitchangelog ${{env.VERSION}} > CHANGELOG.md - name: Make commit for auto-generated changelog - uses: EndBug/add-and-commit@v7 + uses: EndBug/add-and-commit@v9 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: add: "CHANGELOG.md" - branch: actions/changelog + new_branch: actions/changelog message: "!gitchangelog" - name: Create pull request for the auto generated changelog @@ -128,4 +86,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Alert developer of open PR - run: echo "Changelog $PR_URL is ready to be merged by developer." \ No newline at end of file + run: echo "Changelog $PR_URL is ready to be merged by developer." diff --git a/README.md b/README.md index 3c782ce..301b4c8 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![License](https://img.shields.io/github/license/arangoml/dgl-adapter?color=9E2165&style=for-the-badge)](https://github.com/arangoml/dgl-adapter/blob/master/LICENSE) [![Code style: black](https://img.shields.io/static/v1?style=for-the-badge&label=code%20style&message=black&color=black)](https://github.com/psf/black) -[![Downloads](https://img.shields.io/badge/dynamic/json?style=for-the-badge&color=282661&label=Downloads&query=total_downloads&url=https://api.pepy.tech/api/projects/adbdgl-adapter)](https://pepy.tech/project/adbdgl-adapter) +[![Downloads](https://img.shields.io/badge/dynamic/json?style=for-the-badge&color=282661&label=Downloads&query=total_downloads&url=https://api.pepy.tech/api/v2/projects/adbdgl-adapter)](https://pepy.tech/project/adbdgl-adapter) ![](https://raw.githubusercontent.com/arangoml/dgl-adapter/master/examples/assets/adb_logo.png) @@ -18,6 +18,7 @@ The ArangoDB-DGL Adapter exports Graphs from ArangoDB, the multi-model database for graph & beyond, into Deep Graph Library (DGL), a python package for graph neural networks, and vice-versa. +Note: The ArangoDB-DGL Adapter currently only supports the use of PyTorch as the [DGL backend](https://docs.dgl.ai/en/0.8.x/install/#backends). Support for MXNet and Tensorflow will be added in the future. ## About DGL @@ -45,44 +46,217 @@ pip install git+https://github.com/arangoml/dgl-adapter.git Also available as an ArangoDB Lunch & Learn session: [Graph & Beyond Course #2.8](https://www.arangodb.com/resources/lunch-sessions/graph-beyond-lunch-break-2-8-dgl-adapter/) ```py -from arango import ArangoClient # Python-Arango driver -from dgl.data import KarateClubDataset # Sample graph from DGL +import dgl +import torch +import pandas -from adbdgl_adapter import ADBDGL_Adapter +from arango import ArangoClient +from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller +from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder -# Let's assume that the ArangoDB "fraud detection" dataset is imported to this endpoint -db = ArangoClient(hosts="http://localhost:8529").db("_system", username="root", password="") +# Connect to ArangoDB +db = ArangoClient().db() +# Instantiate the adapter adbdgl_adapter = ADBDGL_Adapter(db) -# Use Case 1.1: ArangoDB to DGL via Graph name -dgl_fraud_graph = adbdgl_adapter.arangodb_graph_to_dgl("fraud-detection") +# Create a DGL Heterogeneous Graph +fake_hetero = dgl.heterograph({ + ("user", "follows", "user"): (torch.tensor([0, 1]), torch.tensor([1, 2])), + ("user", "follows", "topic"): (torch.tensor([1, 1]), torch.tensor([1, 2])), + ("user", "plays", "game"): (torch.tensor([0, 3]), torch.tensor([3, 4])), +}) +fake_hetero.nodes["user"].data["features"] = torch.tensor([21, 44, 16, 25]) +fake_hetero.nodes["user"].data["label"] = torch.tensor([1, 2, 0, 1]) +fake_hetero.nodes["game"].data["features"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]]) +fake_hetero.edges[("user", "plays", "game")].data["features"] = torch.tensor([[6, 1], [1000, 0]]) +``` + +### DGL to ArangoDB +```py +############################ +# 1.1: without a Metagraph # +############################ + +adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero) -# Use Case 1.2: ArangoDB to DGL via Collection names -dgl_fraud_graph_2 = adbdgl_adapter.arangodb_collections_to_dgl( - "fraud-detection", - {"account", "Class", "customer"}, # Vertex collections - {"accountHolder", "Relationship", "transaction"}, # Edge collections -) +######################### +# 1.2: with a Metagraph # +######################### -# Use Case 1.3: ArangoDB to DGL via Metagraph +# Specifying a Metagraph provides customized adapter behaviour metagraph = { + "nodeTypes": { + "user": { + "features": "user_age", # 1) you can specify a string value for attribute renaming + "label": label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame + }, + # 3) You can specify set of strings if you want to preserve the same DGL attribute names for the node/edge type + "game": {"features"} # this is equivalent to {"features": "features"} + }, + "edgeTypes": { + ("user", "plays", "game"): { + # 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance) + "features": ["hours_played", "is_satisfied_with_game"] + }, + }, +} + +def label_tensor_to_2_column_dataframe(dgl_tensor: torch.Tensor, adb_df: pandas.DataFrame) -> pandas.DataFrame: + """A user-defined function to create two + ArangoDB attributes out of the 'user' label tensor + + :param dgl_tensor: The DGL Tensor containing the data + :type dgl_tensor: torch.Tensor + :param adb_df: The ArangoDB DataFrame to populate, whose + size is preset to the length of **dgl_tensor**. + :type adb_df: pandas.DataFrame + :return: The populated ArangoDB DataFrame + :rtype: pandas.DataFrame + """ + label_map = {0: "Class A", 1: "Class B", 2: "Class C"} + + adb_df["label_num"] = dgl_tensor.tolist() + adb_df["label_str"] = adb_df["label_num"].map(label_map) + + return adb_df + + +adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=False) + +####################################################### +# 1.3: with a Metagraph and `explicit_metagraph=True` # +####################################################### + +# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB. +adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=True) + +######################################## +# 1.4: with a custom ADBDGL Controller # +######################################## + +class Custom_ADBDGL_Controller(ADBDGL_Controller): + def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict: + """Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection. + + :param dgl_node: The DGL node object to (optionally) modify. + :param node_type: The DGL Node Type of the node. + :return: The DGL Node object + """ + dgl_node["foo"] = "bar" + return dgl_node + + def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict: + """Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection. + + :param dgl_edge: The DGL edge object to (optionally) modify. + :param edge_type: The Edge Type of the DGL edge. Formatted + as (from_collection, edge_collection, to_collection) + :return: The DGL Edge object + """ + dgl_edge["bar"] = "foo" + return dgl_edge + + +adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb("FakeHetero", fake_hetero) +``` + +### ArangoDB to DGL +```py +# Start from scratch! +db.delete_graph("FakeHetero", drop_collections=True, ignore_missing=True) +adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero) + +####################### +# 2.1: via Graph name # +####################### + +# Due to risk of ambiguity, this method does not transfer attributes +dgl_g = adbdgl_adapter.arangodb_graph_to_dgl("FakeHetero") + +############################# +# 2.2: via Collection names # +############################# + +# Due to risk of ambiguity, this method does not transfer attributes +dgl_g = adbdgl_adapter.arangodb_collections_to_dgl("FakeHetero", v_cols={"user", "game"}, e_cols={"plays"}) + +###################### +# 2.3: via Metagraph # +###################### + +# Transfers attributes "as is", meaning they are already formatted to DGL data standards. +# Learn more about the DGL Data Standards here: https://docs.dgl.ai/guide/graph.html#guide-graph +metagraph_v1 = { + "vertexCollections": { + # Move the "features" & "label" ArangoDB attributes to DGL as "features" & "label" Tensors + "user": {"features", "label"}, # equivalent to {"features": "features", "label": "label"} + "game": {"dgl_game_features": "features"}, + "topic": {}, + }, + "edgeCollections": { + "plays": {"dgl_plays_features": "features"}, + "follows": {} + }, +} + +dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v1) + +################################################# +# 2.4: via Metagraph with user-defined encoders # +################################################# + +# Transforms attributes via user-defined encoders +metagraph_v2 = { + "vertexCollections": { + "Movies": { + "features": { # Build a feature matrix from the "Action" & "Drama" document attributes + "Action": IdentityEncoder(dtype=torch.long), + "Drama": IdentityEncoder(dtype=torch.long), + }, + "label": "Comedy", + }, + "Users": { + "features": { + "Gender": CategoricalEncoder(), # CategoricalEncoder(mapping={"M": 0, "F": 1}), + "Age": IdentityEncoder(dtype=torch.long), + } + }, + }, + "edgeCollections": {"Ratings": {"weight": "Rating"}}, +} + +dgl_g = adbdgl_adapter.arangodb_to_dgl("imdb", metagraph_v2) + +################################################## +# 2.5: via Metagraph with user-defined functions # +################################################## + +# Transforms attributes via user-defined functions +metagraph_v3 = { "vertexCollections": { - "account": {"Balance", "rank"}, - "customer": {"rank"}, - "Class": {}, + "user": { + "features": udf_user_features, # supports named functions + "label": lambda df: torch.tensor(df["label"].to_list()), # also supports lambda functions + }, + "game": {"features": udf_game_features}, }, "edgeCollections": { - "transaction": {"transaction_amt", "sender_bank_id", "receiver_bank_id"}, - "accountHolder": {}, - "Relationship": {}, + "plays": {"features": (lambda df: torch.tensor(df["features"].to_list()))}, }, } -dgl_fraud_graph_3 = adbdgl_adapter.arangodb_to_dgl("fraud-detection", metagraph) -# Use Case 2: DGL to ArangoDB -dgl_karate_graph = KarateClubDataset()[0] -adb_karate_graph = adbdgl_adapter.dgl_to_arangodb("Karate", dgl_karate_graph) +def udf_user_features(user_df: pandas.DataFrame) -> torch.Tensor: + # user_df["features"] = ... + return torch.tensor(user_df["features"].to_list()) + + +def udf_game_features(game_df: pandas.DataFrame) -> torch.Tensor: + # game_df["features"] = ... + return torch.tensor(game_df["features"].to_list()) + + +dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v3) ``` ## Development & Testing diff --git a/adbdgl_adapter/abc.py b/adbdgl_adapter/abc.py index 51c8117..12d1746 100644 --- a/adbdgl_adapter/abc.py +++ b/adbdgl_adapter/abc.py @@ -2,14 +2,12 @@ # -*- coding: utf-8 -*- from abc import ABC -from typing import Any, List, Set, Union +from typing import Any, Set, Union from arango.graph import Graph as ArangoDBGraph -from dgl import DGLGraph -from dgl.heterograph import DGLHeteroGraph -from torch import Tensor +from dgl import DGLGraph, DGLHeteroGraph -from .typings import ArangoMetagraph, DGLCanonicalEType, Json +from .typings import ADBMetagraph, DGLCanonicalEType, DGLMetagraph, Json class Abstract_ADBDGL_Adapter(ABC): @@ -17,55 +15,35 @@ def __init__(self) -> None: raise NotImplementedError # pragma: no cover def arangodb_to_dgl( - self, name: str, metagraph: ArangoMetagraph, **query_options: Any + self, name: str, metagraph: ADBMetagraph, **adb_export_kwargs: Any ) -> DGLHeteroGraph: raise NotImplementedError # pragma: no cover def arangodb_collections_to_dgl( - self, name: str, v_cols: Set[str], e_cols: Set[str], **query_options: Any + self, name: str, v_cols: Set[str], e_cols: Set[str], **adb_export_kwargs: Any ) -> DGLHeteroGraph: raise NotImplementedError # pragma: no cover - def arangodb_graph_to_dgl(self, name: str, **query_options: Any) -> DGLHeteroGraph: + def arangodb_graph_to_dgl( + self, name: str, **adb_export_kwargs: Any + ) -> DGLHeteroGraph: raise NotImplementedError # pragma: no cover def dgl_to_arangodb( self, name: str, dgl_g: Union[DGLGraph, DGLHeteroGraph], + metagraph: DGLMetagraph = {}, + explicit_metagraph: bool = True, overwrite_graph: bool = False, - **import_options: Any, + **adb_import_kwargs: Any, ) -> ArangoDBGraph: raise NotImplementedError # pragma: no cover - def etypes_to_edefinitions( - self, canonical_etypes: List[DGLCanonicalEType] - ) -> List[Json]: - raise NotImplementedError # pragma: no cover - - def __prepare_dgl_features(self) -> None: - raise NotImplementedError # pragma: no cover - - def __insert_dgl_features(self) -> None: - raise NotImplementedError # pragma: no cover - - def __prepare_adb_attributes(self) -> None: - raise NotImplementedError # pragma: no cover - - def __fetch_adb_docs(self) -> None: - raise NotImplementedError # pragma: no cover - - def __insert_adb_docs(self) -> None: - raise NotImplementedError # pragma: no cover - - @property - def DEFAULT_CANONICAL_ETYPE(self) -> List[DGLCanonicalEType]: - return [("_N", "_E", "_N")] - class Abstract_ADBDGL_Controller(ABC): - def _adb_attribute_to_dgl_feature(self, key: str, col: str, val: Any) -> Any: + def _prepare_dgl_node(self, dgl_node: Json, node_type: str) -> Json: raise NotImplementedError # pragma: no cover - def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor) -> Any: + def _prepare_dgl_edge(self, dgl_edge: Json, edge_type: DGLCanonicalEType) -> Json: raise NotImplementedError # pragma: no cover diff --git a/adbdgl_adapter/adapter.py b/adbdgl_adapter/adapter.py index 2e3dfb8..71a3092 100644 --- a/adbdgl_adapter/adapter.py +++ b/adbdgl_adapter/adapter.py @@ -2,20 +2,43 @@ # -*- coding: utf-8 -*- import logging from collections import defaultdict -from typing import Any, DefaultDict, Dict, List, Optional, Set, Union +from math import ceil +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Tuple, Union from arango.cursor import Cursor -from arango.database import Database +from arango.database import StandardDatabase from arango.graph import Graph as ADBGraph -from arango.result import Result -from dgl import DGLGraph, DGLHeteroGraph, heterograph -from dgl.view import HeteroEdgeDataView, HeteroNodeDataView -from torch import Tensor, tensor +from dgl import DGLGraph, DGLHeteroGraph, graph, heterograph +from dgl.view import EdgeSpace, HeteroEdgeDataView, HeteroNodeDataView, NodeSpace +from pandas import DataFrame, Series +from rich.console import Group +from rich.live import Live +from rich.progress import Progress +from torch import Tensor, cat, tensor from .abc import Abstract_ADBDGL_Adapter from .controller import ADBDGL_Controller -from .typings import ArangoMetagraph, DGLCanonicalEType, DGLDataDict, Json -from .utils import logger +from .exceptions import ADBMetagraphError, DGLMetagraphError +from .typings import ( + ADBMap, + ADBMetagraph, + ADBMetagraphValues, + DGLCanonicalEType, + DGLData, + DGLDataDict, + DGLDataTypes, + DGLMetagraph, + DGLMetagraphValues, + Json, +) +from .utils import ( + get_bar_progress, + get_export_spinner_progress, + get_import_spinner_progress, + logger, + validate_adb_metagraph, + validate_dgl_metagraph, +) class ADBDGL_Adapter(Abstract_ADBDGL_Adapter): @@ -36,27 +59,28 @@ class ADBDGL_Adapter(Abstract_ADBDGL_Adapter): def __init__( self, - db: Database, + db: StandardDatabase, controller: ADBDGL_Controller = ADBDGL_Controller(), logging_lvl: Union[str, int] = logging.INFO, ): self.set_logging(logging_lvl) - if issubclass(type(db), Database) is False: - msg = "**db** parameter must inherit from arango.database.Database" + if not isinstance(db, StandardDatabase): + msg = "**db** parameter must inherit from arango.database.StandardDatabase" raise TypeError(msg) - if issubclass(type(controller), ADBDGL_Controller) is False: + if not isinstance(controller, ADBDGL_Controller): msg = "**controller** parameter must inherit from ADBDGL_Controller" raise TypeError(msg) self.__db = db - self.__cntrl: ADBDGL_Controller = controller + self.__async_db = db.begin_async_execution(return_result=False) + self.__cntrl = controller logger.info(f"Instantiated ADBDGL_Adapter with database '{db.name}'") @property - def db(self) -> Database: + def db(self) -> StandardDatabase: return self.__db # pragma: no cover @property @@ -66,99 +90,239 @@ def cntrl(self) -> ADBDGL_Controller: def set_logging(self, level: Union[int, str]) -> None: logger.setLevel(level) + ########################### + # Public: ArangoDB -> DGL # + ########################### + def arangodb_to_dgl( - self, name: str, metagraph: ArangoMetagraph, **query_options: Any - ) -> DGLHeteroGraph: - """Create a DGLHeteroGraph from the user-defined metagraph. + self, name: str, metagraph: ADBMetagraph, **adb_export_kwargs: Any + ) -> Union[DGLGraph, DGLHeteroGraph]: + """Create a DGL graph from an ArangoDB Metagraph. Carries + over node/edge data via the **metagraph**. :param name: The DGL graph name. :type name: str :param metagraph: An object defining vertex & edge collections to import - to DGL, along with their associated attributes to keep. - :type metagraph: adbdgl_adapter.typings.ArangoMetagraph - :param query_options: Keyword arguments to specify AQL query options when + to DGL, along with collection-level specifications to indicate + which ArangoDB attributes will become DGL features/labels. + + The current supported **metagraph** values are: + 1) Set[str]: The set of DGL-ready ArangoDB attributes to store + in your DGL graph. + + 2) Dict[str, str]: The DGL property name mapped to the ArangoDB + attribute name that stores your DGL ready data. + + 3) Dict[str, Dict[str, None | Callable]]: + The DGL property name mapped to a dictionary, which maps your + ArangoDB attribute names to a callable Python Class + (i.e has a `__call__` function defined), or to None + (if the ArangoDB attribute is already a list of numerics). + NOTE: The `__call__` function must take as input a Pandas DataFrame, + and must return a PyTorch Tensor. + + 4) Dict[str, Callable[[pandas.DataFrame], torch.Tensor]]: + The DGL property name mapped to a user-defined function + for custom behaviour. NOTE: The function must take as input + a Pandas DataFrame, and must return a PyTorch Tensor. + + See below for examples of **metagraph**. + :type metagraph: adbdgl_adapter.typings.ADBMetagraph + :param adb_export_kwargs: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. Full parameter list: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute - :type query_options: Any - :return: A DGL Heterograph - :rtype: dgl.heterograph.DGLHeteroGraph - :raise ValueError: If missing required keys in metagraph + :type adb_export_kwargs: Any + :return: A DGL Homogeneous or Heterogeneous graph object + :rtype: dgl.DGLGraph | dgl.DGLHeteroGraph + :raise adbdgl_adapter.exceptions.ADBMetagraphError: If invalid metagraph. - Here is an example entry for parameter **metagraph**: + **metagraph** examples + 1) .. code-block:: python { "vertexCollections": { - "account": {"Balance", "account_type", "customer_id", "rank"}, - "bank": {"Country", "Id", "bank_id", "bank_name"}, - "customer": {"Name", "Sex", "Ssn", "rank"}, + "v0": {'x', 'y'}, # equivalent to {'x': 'x', 'y': 'y'} + "v1": {'x'}, + "v2": {'x'}, }, "edgeCollections": { - "accountHolder": {}, - "transaction": { - "transaction_amt", "receiver_bank_id", "sender_bank_id" + "e0": {'edge_attr'}, + "e1": {'edge_weight'}, + }, + } + + The metagraph above specifies that each document + within the "v0" ArangoDB collection has a "pre-built" feature matrix + named "x", and also has a node label named "y". + We map these keys to the "x" and "y" properties of the DGL graph. + + 2) + .. code-block:: python + { + "vertexCollections": { + "v0": {'x': 'v0_features', 'y': 'label'}, + "v1": {'x': 'v1_features'}, + "v2": {'x': 'v2_features'}, + }, + "edgeCollections": { + "e0": {'edge_attr': 'e0_features'}, + "e1": {'edge_weight': 'edge_weight'}, + }, + } + + The metagraph above specifies that each document + within the "v0" ArangoDB collection has a "pre-built" feature matrix + named "v0_features", and also has a node label named "label". + We map these keys to the "x" and "y" properties of the DGL graph. + + 3) + .. code-block:: python + from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder + + { + "vertexCollections": { + "Movies": { + "x": { + "Action": IdentityEncoder(dtype=torch.long), + "Drama": IdentityEncoder(dtype=torch.long), + 'Misc': None + }, + "y": "Comedy", + }, + "Users": { + "x": { + "Gender": CategoricalEncoder(), + "Age": IdentityEncoder(dtype=torch.long), + } + }, + }, + "edgeCollections": { + "Ratings": { "edge_weight": "Rating" } + }, + } + + The metagraph above will build the "Movies" feature matrix 'x' + using the ArangoDB 'Action', 'Drama' & 'misc' attributes, by relying on + the user-specified Encoders (see adbdgl_adapter.encoders for examples). + NOTE: If the mapped value is `None`, then it assumes that the ArangoDB attribute + value is a list containing numerical values only. + + 4) + .. code-block:: python + def udf_v0_x(v0_df): + # process v0_df here to return v0 "x" feature matrix + # ... + return torch.tensor(v0_df["x"].to_list()) + + def udf_v1_x(v1_df): + # process v1_df here to return v1 "x" feature matrix + # ... + return torch.tensor(v1_df["x"].to_list()) + + { + "vertexCollections": { + "v0": { + "x": udf_v0_x, # named functions + "y": (lambda df: tensor(df["y"].to_list())), # lambda functions }, + "v1": {"x": udf_v1_x}, + "v2": {"x": (lambda df: tensor(df["x"].to_list()))}, + }, + "edgeCollections": { + "e0": {"edge_attr": (lambda df: tensor(df["edge_attr"].to_list()))}, }, } + + The metagraph above provides an interface for a user-defined function to + build a DGL-ready Tensor from a DataFrame equivalent to the + associated ArangoDB collection. """ logger.debug(f"--arangodb_to_dgl('{name}')--") - # Maps ArangoDB vertex IDs to DGL node IDs - adb_map: Dict[str, Json] = dict() + validate_adb_metagraph(metagraph) + + # Maps ArangoDB Vertex _keys to DGL Node ids + adb_map: ADBMap = defaultdict(dict) - # Dictionaries for constructing a heterogeneous graph. + # The data for constructing a graph, + # which takes the form of (U, V). + # (U[i], V[i]) forms the edge with ID i in the graph. data_dict: DGLDataDict = dict() - ndata: DefaultDict[str, DefaultDict[str, List[Any]]] - ndata = defaultdict(lambda: defaultdict(list)) + # The node data view for storing node features + ndata: DGLData = defaultdict(lambda: defaultdict(Tensor)) - edata: DefaultDict[str, DefaultDict[str, List[Any]]] - edata = defaultdict(lambda: defaultdict(list)) + # The edge data view for storing edge features + edata: DGLData = defaultdict(lambda: defaultdict(Tensor)) - adb_v: Json - for v_col, atribs in metagraph["vertexCollections"].items(): - logger.debug(f"Preparing '{v_col}' vertices") - for i, adb_v in enumerate(self.__fetch_adb_docs(v_col, query_options)): - adb_id = adb_v["_id"] - logger.debug(f"V{i}: {adb_id}") + v_cols: List[str] = list(metagraph["vertexCollections"].keys()) - adb_map[adb_id] = {"id": i, "col": v_col} - self.__prepare_dgl_features(ndata, atribs, adb_v, v_col) + ###################### + # Vertex Collections # + ###################### - adb_e: Json - edge_dict: DefaultDict[DGLCanonicalEType, DefaultDict[str, List[Any]]] - for e_col, atribs in metagraph["edgeCollections"].items(): - logger.debug(f"Preparing '{e_col}' edges") + for v_col, meta in metagraph["vertexCollections"].items(): + logger.debug(f"Preparing '{v_col}' vertices") - edge_dict = defaultdict(lambda: defaultdict(list)) + # 1. Fetch ArangoDB vertices + v_col_cursor, v_col_size = self.__fetch_adb_docs( + v_col, meta, **adb_export_kwargs + ) - for i, adb_e in enumerate(self.__fetch_adb_docs(e_col, query_options)): - logger.debug(f'E{i}: {adb_e["_id"]}') + # 2. Process ArangoDB vertices + self.__process_adb_cursor( + "#319BF5", + v_col_cursor, + v_col_size, + self.__process_adb_vertex_df, + v_col, + adb_map, + meta, + ndata=ndata, + ) - from_node = adb_map[adb_e["_from"]] - to_node = adb_map[adb_e["_to"]] - edge_type = (from_node["col"], e_col, to_node["col"]) + #################### + # Edge Collections # + #################### - edge_data = edge_dict[edge_type] - edge_data["from_nodes"].append(from_node["id"]) - edge_data["to_nodes"].append(to_node["id"]) + # The set of skipped edge types + edge_type_blacklist: Set[DGLCanonicalEType] = set() - self.__prepare_dgl_features(edata, atribs, adb_e, edge_type) + for e_col, meta in metagraph["edgeCollections"].items(): + logger.debug(f"Preparing '{e_col}' edges") - for edge_type, edges in edge_dict.items(): - logger.debug(f"Inserting {edge_type} edges") - data_dict[edge_type] = ( - tensor(edges["from_nodes"]), - tensor(edges["to_nodes"]), - ) + # 1. Fetch ArangoDB edges + e_col_cursor, e_col_size = self.__fetch_adb_docs( + e_col, meta, **adb_export_kwargs + ) - dgl_g: DGLHeteroGraph = heterograph(data_dict) - has_one_ntype = len(dgl_g.ntypes) == 1 - has_one_etype = len(dgl_g.etypes) == 1 - logger.debug(f"Is graph '{name}' homogenous? {has_one_ntype and has_one_etype}") + # 2. Process ArangoDB edges + self.__process_adb_cursor( + "#FCFDFC", + e_col_cursor, + e_col_size, + self.__process_adb_edge_df, + e_col, + adb_map, + meta, + edata=edata, + data_dict=data_dict, + v_cols=v_cols, + edge_type_blacklist=edge_type_blacklist, + ) + + if not data_dict: # pragma: no cover + msg = f""" + Can't create the DGL graph: no complete edge types found. + The following edge types were skipped due to missing + vertex collection specifications: {edge_type_blacklist} + """ + raise ValueError(msg) - self.__insert_dgl_features(ndata, dgl_g.ndata, has_one_ntype) - self.__insert_dgl_features(edata, dgl_g.edata, has_one_etype) + dgl_g = self.__create_dgl_graph(data_dict, adb_map, metagraph) + self.__link_dgl_data(dgl_g.ndata, ndata, len(dgl_g.ntypes) == 1) + self.__link_dgl_data(dgl_g.edata, edata, len(dgl_g.canonical_etypes) == 1) logger.info(f"Created DGL '{name}' Graph") return dgl_g @@ -168,55 +332,70 @@ def arangodb_collections_to_dgl( name: str, v_cols: Set[str], e_cols: Set[str], - **query_options: Any, - ) -> DGLHeteroGraph: - """Create a DGL graph from ArangoDB collections. + **adb_export_kwargs: Any, + ) -> Union[DGLGraph, DGLHeteroGraph]: + """Create a DGL graph from ArangoDB collections. Due to risk of + ambiguity, this method DOES NOT transfer ArangoDB attributes to DGL. :param name: The DGL graph name. :type name: str - :param v_cols: A set of ArangoDB vertex collections to - import to DGL. + :param v_cols: The set of ArangoDB vertex collections to import to DGL. :type v_cols: Set[str] - :param e_cols: A set of ArangoDB edge collections to import to DGL. + :param e_cols: The set of ArangoDB edge collections to import to DGL. :type e_cols: Set[str] - :param query_options: Keyword arguments to specify AQL query options when + :param adb_export_kwargs: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. Full parameter list: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute - :type query_options: Any - :return: A DGL Heterograph - :rtype: dgl.heterograph.DGLHeteroGraph + :type adb_export_kwargs: Any + :return: A DGL Homogeneous or Heterogeneous graph object + :rtype: dgl.DGLGraph | dgl.DGLHeteroGraph + :raise adbdgl_adapter.exceptions.ADBMetagraphError: If invalid metagraph. """ - metagraph: ArangoMetagraph = { - "vertexCollections": {col: set() for col in v_cols}, - "edgeCollections": {col: set() for col in e_cols}, + metagraph: ADBMetagraph = { + "vertexCollections": {col: dict() for col in v_cols}, + "edgeCollections": {col: dict() for col in e_cols}, } - return self.arangodb_to_dgl(name, metagraph, **query_options) + return self.arangodb_to_dgl(name, metagraph, **adb_export_kwargs) - def arangodb_graph_to_dgl(self, name: str, **query_options: Any) -> DGLHeteroGraph: + def arangodb_graph_to_dgl( + self, name: str, **adb_export_kwargs: Any + ) -> Union[DGLGraph, DGLHeteroGraph]: """Create a DGL graph from an ArangoDB graph. :param name: The ArangoDB graph name. :type name: str - :param query_options: Keyword arguments to specify AQL query options when + :param adb_export_kwargs: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. Full parameter list: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute - :type query_options: Any - :return: A DGL Heterograph - :rtype: dgl.heterograph.DGLHeteroGraph + :type adb_export_kwargs: Any + :return: A DGL Homogeneous or Heterogeneous graph object + :rtype: dgl.DGLGraph | dgl.DGLHeteroGraph + :raise adbdgl_adapter.exceptions.ADBMetagraphError: If invalid metagraph. """ graph = self.__db.graph(name) - v_cols = graph.vertex_collections() - e_cols = {col["edge_collection"] for col in graph.edge_definitions()} + v_cols: Set[str] = graph.vertex_collections() # type: ignore + edge_definitions: List[Json] = graph.edge_definitions() # type: ignore + e_cols: Set[str] = {c["edge_collection"] for c in edge_definitions} - return self.arangodb_collections_to_dgl(name, v_cols, e_cols, **query_options) + return self.arangodb_collections_to_dgl( + name, v_cols, e_cols, **adb_export_kwargs + ) + + ########################### + # Public: DGL -> ArangoDB # + ########################### def dgl_to_arangodb( self, name: str, dgl_g: Union[DGLGraph, DGLHeteroGraph], + metagraph: DGLMetagraph = {}, + explicit_metagraph: bool = True, overwrite_graph: bool = False, - **import_options: Any, + batch_size: Optional[int] = None, + use_async: bool = False, + **adb_import_kwargs: Any, ) -> ADBGraph: """Create an ArangoDB graph from a DGL graph. @@ -224,117 +403,644 @@ def dgl_to_arangodb( :type name: str :param dgl_g: The existing DGL graph. :type dgl_g: Union[dgl.DGLGraph, dgl.heterograph.DGLHeteroGraph] + :param metagraph: An optional object mapping the DGL keys of + the node & edge data to strings, list of strings, or user-defined + functions. NOTE: Unlike the metagraph for ArangoDB to DGL, this + one is optional. + + The current supported **metagraph** values are: + 1) Set[str]: The set of DGL data properties to store + in your ArangoDB database. + + 2) Dict[str, str]: The DGL property name mapped to the ArangoDB + attribute name that will be used to store your DGL data in ArangoDB. + + 3) List[str]: A list of ArangoDB attribute names that will break down + your tensor data, resulting in one ArangoDB attribute per feature. + Must know the number of node/edge features in advance to take + advantage of this metagraph value type. + + 4) Dict[str, Callable[[pandas.DataFrame], torch.Tensor]]: + The DGL property name mapped to a user-defined function + for custom behaviour. NOTE: The function must take as input + a PyTorch Tensor, and must return a Pandas DataFrame. + + See below for an example of **metagraph**. + :type metagraph: adbdgl_adapter.typings.DGLMetagraph + :param explicit_metagraph: Whether to take the metagraph at face value or not. + If False, node & edge types OMITTED from the metagraph will still be + brought over into ArangoDB. Also applies to node & edge attributes. + Defaults to True. + :type explicit_metagraph: bool :param overwrite_graph: Overwrites the graph if it already exists. - Does not drop associated collections. + Does not drop associated collections. Defaults to False. :type overwrite_graph: bool - :param import_options: Keyword arguments to specify additional + :param batch_size: Process the DGL Nodes & Edges in batches of size + **batch_size**. Defaults to `None`, which processes each + NodeStorage & EdgeStorage in one batch. + :type batch_size: int + :param use_async: Performs asynchronous ArangoDB ingestion if enabled. + Defaults to False. + :type use_async: bool + :param adb_import_kwargs: Keyword arguments to specify additional parameters for ArangoDB document insertion. Full parameter list: https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk - :type import_options: Any + :type adb_import_kwargs: Any :return: The ArangoDB Graph API wrapper. :rtype: arango.graph.Graph + :raise adbdgl_adapter.exceptions.DGLMetagraphError: If invalid metagraph. + + **metagraph** example + + .. code-block:: python + def y_tensor_to_2_column_dataframe(dgl_tensor): + # A user-defined function to create two ArangoDB attributes + # out of the 'y' label tensor + label_map = {0: "Kiwi", 1: "Blueberry", 2: "Avocado"} + + df = pandas.DataFrame(columns=["label_num", "label_str"]) + df["label_num"] = dgl_tensor.tolist() + df["label_str"] = df["label_num"].map(label_map) + + return df + + metagraph = { + "nodeTypes": { + "v0": { + "x": "features", # 1) + "y": y_tensor_to_2_column_dataframe, # 2) + }, + "v1": {"x"} # 3) + }, + "edgeTypes": { + ("v0", "e0", "v0"): {"edge_attr": [ "a", "b"]}, # 4) + }, + } + + The metagraph above accomplishes the following: + 1) Renames the DGL 'v0' 'x' feature matrix to 'features' + when stored in ArangoDB. + 2) Builds a 2-column Pandas DataFrame from the 'v0' 'y' labels + through a user-defined function for custom behaviour handling. + 3) Transfers the DGL 'v1' 'x' feature matrix under the same name. + 4) Dissasembles the 2-feature Tensor into two ArangoDB attributes, + where each attribute holds one feature value. """ logger.debug(f"--dgl_to_arangodb('{name}')--") - is_default = dgl_g.canonical_etypes == self.DEFAULT_CANONICAL_ETYPE - logger.debug(f"Is graph '{name}' using default canonical_etypes? {is_default}") + validate_dgl_metagraph(metagraph) - edge_definitions = self.etypes_to_edefinitions( - [(name + "_N", name + "_E", name + "_N")] - if is_default - else dgl_g.canonical_etypes + is_custom_controller = type(self.__cntrl) is not ADBDGL_Controller + is_explicit_metagraph = metagraph != {} and explicit_metagraph + + has_one_ntype = len(dgl_g.ntypes) == 1 + has_one_etype = len(dgl_g.canonical_etypes) == 1 + + # Get the Node & Edge types + node_types, edge_types = self.__get_node_and_edge_types( + name, dgl_g, metagraph, is_explicit_metagraph ) - if overwrite_graph: - logger.debug("Overwrite graph flag is True. Deleting old graph.") - self.__db.delete_graph(name, ignore_missing=True) + # Create the ArangoDB Graph + adb_graph = self.__create_adb_graph( + name, overwrite_graph, node_types, edge_types + ) - if self.__db.has_graph(name): - adb_graph = self.__db.graph(name) - else: - adb_graph = self.__db.create_graph(name, edge_definitions) - - adb_v_cols = adb_graph.vertex_collections() - adb_e_cols = [e_d["edge_collection"] for e_d in adb_graph.edge_definitions()] - - has_one_vcol = len(adb_v_cols) == 1 - has_one_ecol = len(adb_e_cols) == 1 - logger.debug(f"Is graph '{name}' homogenous? {has_one_vcol and has_one_ecol}") - - node: Tensor - v_col_docs: List[Json] = [] # to-be-inserted ArangoDB vertices - for ntype in dgl_g.ntypes: - v_col = adb_v_cols[0] if is_default else ntype - logger.debug(f"Preparing {dgl_g.number_of_nodes(ntype)} '{v_col}' nodes") - - features = dgl_g.node_attr_schemes(ntype).keys() - - for i, node in enumerate(dgl_g.nodes(ntype)): - dgl_node_id = node.item() - logger.debug(f"N{i}: {dgl_node_id}") - - adb_vertex = {"_key": str(dgl_node_id)} - self.__prepare_adb_attributes( - dgl_g.ndata, - features, - dgl_node_id, - adb_vertex, - v_col, - has_one_vcol, + spinner_progress = get_import_spinner_progress(" ") + + ############# + # DGL Nodes # + ############# + + n_meta = metagraph.get("nodeTypes", {}) + for n_type in node_types: + meta = n_meta.get(n_type, {}) + + n_key = None if has_one_ntype else n_type + + ndata = dgl_g.nodes[n_key].data + ndata_size = dgl_g.num_nodes(n_key) + ndata_batch_size = batch_size or ndata_size + + start_index = 0 + end_index = min(ndata_batch_size, ndata_size) + batches = ceil(ndata_size / ndata_batch_size) + + bar_progress = get_bar_progress(f"(DGL → ADB): '{n_type}'", "#97C423") + bar_progress_task = bar_progress.add_task(n_type, total=ndata_size) + + with Live(Group(bar_progress, spinner_progress)): + for _ in range(batches): + # 1. Process the Node batch + df = self.__process_dgl_node_batch( + n_type, + ndata, + ndata_size, + meta, + is_explicit_metagraph, + is_custom_controller, + start_index, + end_index, + ) + + bar_progress.advance(bar_progress_task, advance=len(df)) + + # 2. Insert the ArangoDB Node Documents + self.__insert_adb_docs( + spinner_progress, df, n_type, use_async, **adb_import_kwargs + ) + + # 3. Update the batch indices + start_index = end_index + end_index = min(end_index + ndata_batch_size, ndata_size) + + ############# + # DGL Edges # + ############# + + e_meta = metagraph.get("edgeTypes", {}) + for e_type in edge_types: + meta = e_meta.get(e_type, {}) + + e_key = None if has_one_etype else e_type + + edata = dgl_g.edges[e_key].data + edata_size = dgl_g.num_edges(e_key) + edata_batch_size = batch_size or edata_size + + start_index = 0 + end_index = min(edata_batch_size, edata_size) + batches = ceil(edata_size / edata_batch_size) + + bar_progress = get_bar_progress(f"(DGL → ADB): {e_type}", "#994602") + bar_progress_task = bar_progress.add_task(str(e_type), total=edata_size) + + from_nodes, to_nodes = dgl_g.edges(etype=e_key) + + with Live(Group(bar_progress, spinner_progress)): + for _ in range(batches): + # 1. Process the Edge batch + df = self.__process_dgl_edge_batch( + e_type, + edata, + edata_size, + meta, + from_nodes, + to_nodes, + is_explicit_metagraph, + is_custom_controller, + start_index, + end_index, + ) + + bar_progress.advance(bar_progress_task, advance=len(df)) + + # 2. Insert the ArangoDB Edge Documents + self.__insert_adb_docs( + spinner_progress, df, e_type[1], use_async, **adb_import_kwargs + ) + + # 3. Update the batch indices + start_index = end_index + end_index = min(end_index + edata_batch_size, edata_size) + + logger.info(f"Created ArangoDB '{name}' Graph") + return adb_graph + + ############################ + # Private: ArangoDB -> DGL # + ############################ + + def __fetch_adb_docs( + self, + col: str, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + **adb_export_kwargs: Any, + ) -> Tuple[Cursor, int]: + """ArangoDB -> DGL: Fetches ArangoDB documents within a collection. + Returns the documents in a DataFrame. + + :param col: The ArangoDB collection. + :type col: str + :param meta: The MetaGraph associated to **col** + :type meta: Set[str] | Dict[str, adbdgl_adapter.typings.ADBMetagraphValues] + :param adb_export_kwargs: Keyword arguments to specify AQL query options + when fetching documents from the ArangoDB instance. + :type adb_export_kwargs: Any + :return: A DataFrame representing the ArangoDB documents. + :rtype: pandas.DataFrame + """ + + def get_aql_return_value( + meta: Union[Set[str], Dict[str, ADBMetagraphValues]] + ) -> str: + """Helper method to formulate the AQL `RETURN` value based on + the document attributes specified in **meta** + """ + attributes = [] + + if type(meta) is set: + attributes = list(meta) + + elif type(meta) is dict: + for value in meta.values(): + if type(value) is str: + attributes.append(value) + elif type(value) is dict: + attributes.extend(list(value.keys())) + elif callable(value): + # Cannot determine which attributes to extract if UDFs are used + # Therefore we just return the entire document + return "doc" + + return f""" + MERGE( + {{ _key: doc._key, _from: doc._from, _to: doc._to }}, + KEEP(doc, {list(attributes)}) ) + """ - v_col_docs.append(adb_vertex) + col_size: int = self.__db.collection(col).count() # type: ignore + + with get_export_spinner_progress(f"ADB Export: '{col}' ({col_size})") as p: + p.add_task(col) + + cursor: Cursor = self.__db.aql.execute( # type: ignore + f"FOR doc IN @@col RETURN {get_aql_return_value(meta)}", + bind_vars={"@col": col}, + **{**adb_export_kwargs, **{"stream": True}}, + ) - self.__insert_adb_docs(v_col, v_col_docs, import_options) - v_col_docs.clear() + return cursor, col_size + + def __process_adb_cursor( + self, + progress_color: str, + cursor: Cursor, + col_size: int, + process_adb_df: Callable[..., int], + col: str, + adb_map: ADBMap, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + **kwargs: Any, + ) -> None: + """ArangoDB -> DGL: Processes the ArangoDB Cursors for vertices and edges. + + :param progress_color: The progress bar color. + :type progress_color: str + :param cursor: The ArangoDB cursor for the current **col**. + :type cursor: arango.cursor.Cursor + :param col_size: The size of **col**. + :type col_size: int + :param process_adb_df: The function to process the cursor data + (in the form of a Dataframe). + :type process_adb_df: Callable + :param col: The ArangoDB collection for the current **cursor**. + :type col: str + :param adb_map: The ArangoDB -> DGL map. + :type adb_map: adbdgl_adapter.typings.ADBMap + :param meta: The metagraph for the current **col**. + :type meta: Set[str] | Dict[str, ADBMetagraphValues] + :param kwargs: Additional keyword arguments to pass to **process_adb_df**. + :type args: Any + """ - from_n: Tensor - to_n: Tensor - e_col_docs: List[Json] = [] # to-be-inserted ArangoDB edges - for c_etype in dgl_g.canonical_etypes: - logger.debug(f"Preparing {dgl_g.number_of_edges(c_etype)} {c_etype} edges") + progress = get_bar_progress(f"(ADB → DGL): '{col}'", progress_color) + progress_task_id = progress.add_task(col, total=col_size) - features = dgl_g.edge_attr_schemes(c_etype).keys() + with Live(Group(progress)): + i = 0 + while not cursor.empty(): + cursor_batch = len(cursor.batch()) # type: ignore + df = DataFrame([cursor.pop() for _ in range(cursor_batch)]) - if is_default: - e_col = adb_e_cols[0] - from_col = to_col = adb_v_cols[0] + i = process_adb_df(i, df, col, adb_map, meta, **kwargs) + progress.advance(progress_task_id, advance=len(df)) + + df.drop(df.index, inplace=True) + + if cursor.has_more(): + cursor.fetch() + + def __process_adb_vertex_df( + self, + i: int, + df: DataFrame, + v_col: str, + adb_map: ADBMap, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + ndata: DGLData, + ) -> int: + """ArangoDB -> DGL: Process the ArangoDB Vertex DataFrame + into the DGL NData object. + + :param i: The last DGL Node id value. + :type i: int + :param df: The ArangoDB Vertex DataFrame. + :type df: pandas.DataFrame + :param v_col: The ArangoDB Vertex Collection. + :type v_col: str + :param adb_map: The ArangoDB -> DGL map. + :type adb_map: adbdgl_adapter.typings.ADBMap + :param meta: The metagraph for the current **v_col**. + :type meta: Set[str] | Dict[str, ADBMetagraphValues] + :param node_data: The node data view for storing node features + :type node_data: adbdgl_adapter.typings.DGLData + :return: The last DGL Node id value. + :rtype: int + """ + # 1. Map each ArangoDB _key to a DGL node id + for adb_id in df["_key"]: + adb_map[v_col][adb_id] = i + i += 1 + + # 2. Set the DGL Node Data + self.__set_dgl_data(v_col, meta, ndata, df) + + return i + + def __process_adb_edge_df( + self, + _: int, + df: DataFrame, + e_col: str, + adb_map: ADBMap, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + edata: DGLData, + data_dict: DGLDataDict, + v_cols: List[str], + edge_type_blacklist: Set[DGLCanonicalEType], + ) -> int: + """ArangoDB -> DGL: Process the ArangoDB Edge DataFrame + into the DGL EdgeData object. + + :param _: Not used. + :type _: int + :param df: The ArangoDB Edge DataFrame. + :type df: pandas.DataFrame + :param e_col: The ArangoDB Edge Collection. + :type e_col: str + :param adb_map: The ArangoDB -> DGL map. + :type adb_map: adbdgl_adapter.typings.ADBMap + :param meta: The metagraph for the current **e_col**. + :type meta: Set[str] | Dict[str, ADBMetagraphValues] + :param edata: The edge data view for storing edge features + :type edata: adbdgl_adapter.typings.DGLData + :param data_dict: The data for constructing a graph, + which takes the form of (U, V). + (U[i], V[i]) forms the edge with ID i in the graph. + :type data_dict: adbdgl_adapter.typings.DGLDataDict + :param v_cols: The list of ArangoDB Vertex Collections. + :type v_cols: List[str] + :param edge_type_blacklist: The set of skipped edge types + :type edge_type_blacklist: Set[DGLCanonicalEType] + :return: The last DGL Edge id value. This is a useless return value, + but is needed for type hinting. + :rtype: int + """ + # 1. Split the ArangoDB _from & _to IDs into two columns + df[["from_col", "from_key"]] = self.__split_adb_ids(df["_from"]) + df[["to_col", "to_key"]] = self.__split_adb_ids(df["_to"]) + + # 2. Iterate over each edge type + for (from_col, to_col), count in ( + df[["from_col", "to_col"]].value_counts().items() + ): + edge_type: DGLCanonicalEType = (from_col, e_col, to_col) + + # 3. Check for partial Edge Collection import + if from_col not in v_cols or to_col not in v_cols: + logger.debug(f"Skipping {edge_type}") + edge_type_blacklist.add(edge_type) + continue + + logger.debug(f"Preparing {count} {edge_type} edges") + + # 4. Get the edge data corresponding to the current edge type + et_df = df[(df["from_col"] == from_col) & (df["to_col"] == to_col)] + + # 5. Map each ArangoDB from/to _key to the corresponding DGL node id + from_nodes = et_df["from_key"].map(adb_map[from_col]).tolist() + to_nodes = et_df["to_key"].map(adb_map[to_col]).tolist() + + # 6. Set/Update the DGL Edge Index + if edge_type not in data_dict: + data_dict[edge_type] = (tensor(from_nodes), tensor(to_nodes)) else: - from_col, e_col, to_col = c_etype + previous_from_nodes, previous_to_nodes = data_dict[edge_type] + data_dict[edge_type] = ( + cat((previous_from_nodes, tensor(from_nodes))), + cat((previous_to_nodes, tensor(to_nodes))), + ) - for i, (from_n, to_n) in enumerate(zip(*dgl_g.edges(etype=c_etype))): - logger.debug(f"E{i}: ({from_n}, {to_n})") + # 7. Set the DGL Edge Data + self.__set_dgl_data(edge_type, meta, edata, df) - adb_edge = { - "_from": f"{from_col}/{str(from_n.item())}", - "_to": f"{to_col}/{str(to_n.item())}", - } - self.__prepare_adb_attributes( - dgl_g.edata, - features, - i, - adb_edge, - e_col, - has_one_ecol, - c_etype, + return 1 # Useless return value, but needed for type hinting + + def __split_adb_ids(self, s: Series) -> Series: + """AranogDB -> DGL: Helper method to split the ArangoDB IDs + within a Series into two columns + + :param s: The Series containing the ArangoDB IDs. + :type s: pandas.Series + :return: A DataFrame with two columns: the ArangoDB Collection, + and the ArangoDB _key. + :rtype: pandas.Series + """ + return s.str.split(pat="/", n=1, expand=True) + + def __set_dgl_data( + self, + data_type: DGLDataTypes, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + dgl_data: DGLData, + df: DataFrame, + ) -> None: + """AranogDB -> DGL: A helper method to build the DGL NodeSpace or + EdgeSpace object for the DGL graph. Is responsible for preparing the + input **meta** such that it becomes a dictionary, and building DGL-ready + tensors from the ArangoDB DataFrame **df**. + + :param data_type: The current node or edge type of the soon-to-be DGL graph. + :type data_type: str | tuple[str, str, str] + :param meta: The metagraph associated to the current ArangoDB vertex or + edge collection. e.g metagraph['vertexCollections']['Users'] + :type meta: Set[str] | Dict[str, adbdgl_adapter.typings.ADBMetagraphValues] + :param dgl_data: The (currently empty) DefaultDict object storing the node or + edge features of the soon-to-be DGL graph. + :type dgl_data: adbdgl_adapter.typings.DGLData + :param df: The DataFrame representing the ArangoDB collection data + :type df: pandas.DataFrame + """ + valid_meta: Dict[str, ADBMetagraphValues] + valid_meta = meta if type(meta) is dict else {m: m for m in meta} + + for k, v in valid_meta.items(): + t = self.__build_tensor_from_dataframe(df, k, v) + dgl_data[k][data_type] = cat((dgl_data[k][data_type], t)) + + def __build_tensor_from_dataframe( + self, + adb_df: DataFrame, + meta_key: str, + meta_val: ADBMetagraphValues, + ) -> Tensor: + """AranogDB -> DGL: Constructs a DGL-ready Tensor from a Pandas + Dataframe, based on the nature of the user-defined metagraph. + + :param adb_df: The Pandas Dataframe representing ArangoDB data. + :type adb_df: pandas.DataFrame + :param meta_key: The current ArangoDB-DGL metagraph key + :type meta_key: str + :param meta_val: The value mapped to **meta_key** to + help convert **df** into a DGL-ready Tensor. + e.g the value of `metagraph['vertexCollections']['users']['x']`. + :type meta_val: adbdgl_adapter.typings.ADBMetagraphValues + :return: A DGL-ready tensor equivalent to the dataframe + :rtype: torch.Tensor + :raise adbdgl_adapter.exceptions.ADBMetagraphError: If invalid **meta_val**. + """ + logger.debug( + f"__build_tensor_from_dataframe(df, '{meta_key}', {type(meta_val)})" + ) + + if type(meta_val) is str: + return tensor(adb_df[meta_val].to_list()) + + if type(meta_val) is dict: + data = [] + for attr, encoder in meta_val.items(): + if encoder is None: + data.append(tensor(adb_df[attr].to_list())) + elif callable(encoder): + data.append(encoder(adb_df[attr])) + else: # pragma: no cover + msg = f"Invalid encoder for ArangoDB attribute '{attr}': {encoder}" + raise ADBMetagraphError(msg) + + return cat(data, dim=-1) + + if callable(meta_val): + # **meta_val** is a user-defined that returns a tensor + user_defined_result = meta_val(adb_df) + + if type(user_defined_result) is not Tensor: # pragma: no cover + msg = f"Invalid return type for function {meta_val} ('{meta_key}')" + raise ADBMetagraphError(msg) + + return user_defined_result + + raise ADBMetagraphError(f"Invalid {meta_val} type") # pragma: no cover + + def __create_dgl_graph( + self, data_dict: DGLDataDict, adb_map: ADBMap, metagraph: ADBMetagraph + ) -> Union[DGLGraph, DGLHeteroGraph]: + """AranogDB -> DGL: Creates a DGL graph from the given DGL data. + + :param data_dict: The data for constructing a graph, + which takes the form of (U, V). + (U[i], V[i]) forms the edge with ID i in the graph. + :type data_dict: adbdgl_adapter.typings.DGLDataDict + :param adb_map: A mapping of ArangoDB IDs to DGL IDs. + :type adb_map: adbdgl_adapter.typings.ADBMap + :param metagraph: The ArangoDB metagraph. + :type metagraph: adbdgl_adapter.typings.ADBMetagraph + :return: A DGL Homogeneous or Heterogeneous graph object + :rtype: dgl.DGLGraph | dgl.DGLHeteroGraph + """ + is_homogeneous = ( + len(metagraph["vertexCollections"]) == 1 + and len(metagraph["edgeCollections"]) == 1 + ) + + if is_homogeneous: + v_col = next(iter(metagraph["vertexCollections"])) + data = next(iter(data_dict.values())) + + return graph(data, num_nodes=len(adb_map[v_col])) + + num_nodes_dict = {v_col: len(adb_map[v_col]) for v_col in adb_map} + return heterograph(data_dict, num_nodes_dict) + + def __link_dgl_data( + self, + dgl_data: Union[HeteroNodeDataView, HeteroEdgeDataView], + dgl_data_temp: DGLData, + has_one_type: bool, + ) -> None: + """Links **dgl_data_temp** to **dgl_data**. This method is (unfortunately) + required, since a dgl graph's `ndata` and `edata` properties can't be + manually set (i.e `g.ndata = ndata` is not possible). + + :param dgl_data: The (empty) ndata or edata instance attribute of a dgl graph, + which is about to receive **dgl_data_temp**. + :type dgl_data: Union[dgl.view.HeteroNodeDataView, dgl.view.HeteroEdgeDataView] + :param dgl_data_temp: A temporary place to store the ndata or edata features. + :type dgl_data_temp: adbdgl_adapter.typings.DGLData + :param has_one_type: Set to True if the DGL graph only has one + node type or edge type. + :type has_one_type: bool + """ + for feature_name, feature_map in dgl_data_temp.items(): + for data_type, dgl_tensor in feature_map.items(): + dgl_data[feature_name] = ( + dgl_tensor if has_one_type else {data_type: dgl_tensor} ) - e_col_docs.append(adb_edge) + ############################ + # Private: DGL -> ArangoDB # + ############################ - self.__insert_adb_docs(e_col, e_col_docs, import_options) - e_col_docs.clear() + def __get_node_and_edge_types( + self, + name: str, + dgl_g: DGLGraph, + metagraph: DGLMetagraph, + is_explicit_metagraph: bool, + ) -> Tuple[List[str], List[DGLCanonicalEType]]: + """DGL -> ArangoDB: Returns the node & edge types of the DGL graph, + based on the metagraph and whether the graph has default canonical etypes. - logger.info(f"Created ArangoDB '{name}' Graph") - return adb_graph + :param name: The DGL graph name. + :type name: str + :param dgl_g: The existing DGL graph. + :type dgl_g: dgl.DGLGraph + :param metagraph: The DGL Metagraph. + :type metagraph: adbdgl_adapter.typings.DGLMetagraph + :param is_explicit_metagraph: Take the metagraph at face value or not. + :type is_explicit_metagraph: bool + :return: The node & edge types of the DGL graph. + :rtype: Tuple[List[str], List[adbdgl_adapter.typings.DGLCanonicalEType]] + """ + node_types: List[str] + edge_types: List[DGLCanonicalEType] + + has_default_canonical_etypes = dgl_g.canonical_etypes == [("_N", "_E", "_N")] + + if is_explicit_metagraph: + node_types = metagraph.get("nodeTypes", {}).keys() # type: ignore + edge_types = metagraph.get("edgeTypes", {}).keys() # type: ignore + + elif has_default_canonical_etypes: + n_type = name + "_N" + node_types = [n_type] + edge_types = [(n_type, name + "_E", n_type)] + + else: + node_types = dgl_g.ntypes + edge_types = dgl_g.canonical_etypes - def etypes_to_edefinitions( - self, canonical_etypes: List[DGLCanonicalEType] + return node_types, edge_types + + def __etypes_to_edefinitions( + self, edge_types: List[DGLCanonicalEType] ) -> List[Json]: - """Converts a DGL graph's canonical_etypes property to ArangoDB graph edge definitions + """Converts DGL canonical_etypes to ArangoDB edge_definitions - :param canonical_etypes: A list of string triplets (str, str, str) for + :param edge_types: A list of string triplets (str, str, str) for source node type, edge type and destination node type. - :type canonical_etypes: List[adbdgl_adapter.typings.DGLCanonicalEType] + :type edge_types: List[adbdgl_adapter.typings.DGLCanonicalEType] :return: ArangoDB Edge Definitions :rtype: List[adbdgl_adapter.typings.Json] @@ -350,9 +1056,13 @@ def etypes_to_edefinitions( ] """ + if not edge_types: + return [] + edge_type_map: DefaultDict[str, DefaultDict[str, Set[str]]] edge_type_map = defaultdict(lambda: defaultdict(set)) - for edge_type in canonical_etypes: + + for edge_type in edge_types: from_col, e_col, to_col = edge_type edge_type_map[e_col]["from"].add(from_col) edge_type_map[e_col]["to"].add(to_col) @@ -369,120 +1079,362 @@ def etypes_to_edefinitions( return edge_definitions - def __prepare_dgl_features( + def __ntypes_to_ocollections( + self, node_types: List[str], edge_types: List[DGLCanonicalEType] + ) -> List[str]: + """Converts DGL node_types to ArangoDB orphan collections, if any. + + :param node_types: A list of strings representing the DGL node types. + :type node_types: List[str] + :param edge_types: A list of string triplets (str, str, str) for + source node type, edge type and destination node type. + :type edge_types: List[adbdgl_adapter.typings.DGLCanonicalEType] + :return: ArangoDB Orphan Collections + :rtype: List[str] + """ + + non_orphan_collections = set() + for from_col, _, to_col in edge_types: + non_orphan_collections.add(from_col) + non_orphan_collections.add(to_col) + + orphan_collections = set(node_types) ^ non_orphan_collections + return list(orphan_collections) + + def __create_adb_graph( self, - features_data: DefaultDict[Any, Any], - attributes: Set[str], - doc: Json, - col: Union[str, DGLCanonicalEType], - ) -> None: - """Convert a set of ArangoDB attributes into valid DGL features - - :param features_data: A dictionary storing the DGL features formatted as lists. - :type features_data: Defaultdict[Any, Any] - :param attributes: A set of ArangoDB attribute keys to convert into DGL features - :type attributes: Set[str] - :param doc: The current ArangoDB document - :type doc: adbdgl_adapter.typings.Json - :param col: The collection the current document belongs to. For edge - collections, the entire DGL Canonical eType is specified (src, e, dst) - :type col: str | Tuple[str, str, str] + name: str, + overwrite_graph: bool, + node_types: List[str], + edge_types: List[DGLCanonicalEType], + ) -> ADBGraph: + """Creates an ArangoDB graph. + + :param name: The ArangoDB graph name. + :type name: str + :param overwrite_graph: Overwrites the graph if it already exists. + Does not drop associated collections. Defaults to False. + :type overwrite_graph: bool + :param node_types: A list of strings representing the DGL node types. + :type node_types: List[str] + :param edge_types: A list of string triplets (str, str, str) for + source node type, edge type and destination node type. + :type edge_types: List[adbdgl_adapter.typings.DGLCanonicalEType] + :return: The ArangoDB Graph API wrapper. + :rtype: arango.graph.Graph """ - key: str - for key in attributes: - arr: List[Any] = features_data[key][col] - arr.append( - self.__cntrl._adb_attribute_to_dgl_feature(key, col, doc.get(key, None)) - ) + if overwrite_graph: + logger.debug("Overwrite graph flag is True. Deleting old graph.") + self.__db.delete_graph(name, ignore_missing=True) - def __insert_dgl_features( + if self.__db.has_graph(name): + return self.__db.graph(name) + + edge_definitions = self.__etypes_to_edefinitions(edge_types) + orphan_collections = self.__ntypes_to_ocollections(node_types, edge_types) + + return self.__db.create_graph( # type: ignore[return-value] + name, + edge_definitions, + orphan_collections, + ) + + def __process_dgl_node_batch( self, - features_data: DefaultDict[Any, Any], - data: Union[HeteroNodeDataView, HeteroEdgeDataView], - has_one_type: bool, - ) -> None: - """Insert valid DGL features into a DGL graph. - - :param features_data: A dictionary storing the DGL features formatted as lists. - :type features_data: Defaultdict[Any, Any] - :param data: The (empty) ndata or edata instance attribute of a dgl graph, - which is about to receive **features_data**. - :type data: Union[dgl.view.HeteroNodeDataView, dgl.view.HeteroEdgeDataView] - :param has_one_type: Set to True if the DGL graph only has one ntype, - or one etype. - :type has_one_type: bool + n_type: str, + ndata: NodeSpace, + ndata_size: int, + meta: Union[Set[str], Dict[Any, DGLMetagraphValues]], + is_explicit_metagraph: bool, + is_custom_controller: bool, + start_index: int, + end_index: int, + ) -> DataFrame: + """DGL -> ArangoDB: Processes the DGL Node batch + into an ArangoDB DataFrame. + + :param n_type: The DGL node type. + :type n_type: str + :param ndata: The DGL Node Space for the current **n_type**. + :type ndata: dgl.view.NodeSpace + :param ndata_size: The size of **ndata**. + :param ndata_size: int + :param meta: The metagraph for the current **n_type**. + :type meta: Set[str] | Dict[Any, adbdgl_adapter.typings.DGLMetagraphValues] + :param is_explicit_metagraph: Take the metagraph at face value or not. + :type is_explicit_metagraph: bool + :param is_custom_controller: Whether a custom controller is used. + :type is_custom_controller: bool + :param start_index: The start index of the current batch. + :type start_index: int + :param end_index: The end index of the current batch. + :type end_index: int + :return: The ArangoDB DataFrame representing the DGL Node batch. + :rtype: pandas.DataFrame """ - col_dict: Dict[str, List[Any]] - for key, col_dict in features_data.items(): - for col, array in col_dict.items(): - logger.debug(f"Inserting {len(array)} '{key}' features into '{col}'") - data[key] = tensor(array) if has_one_type else {col: tensor(array)} + # 1. Map each DGL node id to an ArangoDB _key + adb_keys = [{"_key": str(i)} for i in range(start_index, end_index)] + + # 2. Set the ArangoDB Node Data + df = self.__set_adb_data( + DataFrame(adb_keys, index=range(start_index, end_index)), + meta, + ndata, + ndata_size, + is_explicit_metagraph, + start_index, + end_index, + ) + + # 3. Apply the ArangoDB Node Controller (if provided) + if is_custom_controller: + f = lambda n: self.__cntrl._prepare_dgl_node(n, n_type) + df = df.apply(f, axis=1) - def __prepare_adb_attributes( + return df + + def __process_dgl_edge_batch( self, - data: Union[HeteroNodeDataView, HeteroEdgeDataView], - features: Set[Any], - id: Union[int, float, bool], - doc: Json, - col: str, - has_one_col: bool, - canonical_etype: Optional[DGLCanonicalEType] = None, - ) -> None: - """Convert DGL features into a set of ArangoDB attributes for a given document - - :param data: The ndata or edata instance attribute of a dgl graph, filled with - node or edge feature data. - :type data: Union[dgl.view.HeteroNodeDataView, dgl.view.HeteroEdgeDataView] - :param features: A set of DGL feature keys to convert into ArangoDB attributes - :type features: Set[Any] - :param id: The ID of the current DGL node / edge - :type id: Union[int, float, bool] - :param doc: The current ArangoDB document - :type doc: adbdgl_adapter.typings.Json - :param col: The collection the current document belongs to - :type col: str - :param has_one_col: Set to True if the ArangoDB graph has one - vertex collection or one edge collection only. - :type has_one_col: bool - :param canonical_etype: The DGL canonical edge type belonging to the current - **col**, provided that **col** is an edge collection (ignored otherwise). - :type canonical_etype: adbdgl_adapter.typings.DGLCanonicalEType + e_type: DGLCanonicalEType, + edata: EdgeSpace, + edata_size: int, + meta: Union[Set[str], Dict[Any, DGLMetagraphValues]], + from_nodes: Tensor, + to_nodes: Tensor, + is_explicit_metagraph: bool, + is_custom_controller: bool, + start_index: int, + end_index: int, + ) -> DataFrame: + """DGL -> ArangoDB: Processes the DGL Edge batch + into an ArangoDB DataFrame. + + :param e_type: The DGL edge type. + :type e_type: adbdgl_adapter.typings.DGLCanonicalEType + :param edata: The DGL EdgeSpace for the current **e_type**. + :type edata: dgl.view.EdgeSpace + :param edata_size: The size of **edata**. + :param edata_size: int + :param meta: The metagraph for the current **e_type**. + :type meta: Set[str] | Dict[Any, adbdgl_adapter.typings.DGLMetagraphValues] + :param from_nodes: Tensor representing the Source Nodes of the **e_type**. + :type from_nodes: torch.Tensor + :param to_nodes: Tensor representing the Destination Nodes of the **e_type**. + :type to_nodes: torch.Tensor + :param is_explicit_metagraph: Take the metagraph at face value or not. + :type is_explicit_metagraph: bool + :param is_custom_controller: Whether a custom controller is used. + :type is_custom_controller: bool + :param start_index: The start index of the current batch. + :type start_index: int + :param end_index: The end index of the current batch. + :type end_index: int + :return: The ArangoDB DataFrame representing the DGL Edge batch. + :rtype: pandas.DataFrame """ - for key in features: - tensor = data[key] if has_one_col else data[key][canonical_etype or col] - doc[key] = self.__cntrl._dgl_feature_to_adb_attribute(key, col, tensor[id]) + from_col, _, to_col = e_type - def __fetch_adb_docs(self, col: str, query_options: Any) -> Result[Cursor]: - """Fetches ArangoDB documents within a collection. + # 1. Map the DGL edges to ArangoDB _from & _to IDs + data = zip( + *( + from_nodes[start_index:end_index].tolist(), + to_nodes[start_index:end_index].tolist(), + ) + ) - :param col: The ArangoDB collection. - :type col: str - :param query_options: Keyword arguments to specify AQL query options - when fetching documents from the ArangoDB instance. - :type query_options: Any - :return: Result cursor. - :rtype: arango.cursor.Cursor + # 2. Set the ArangoDB Edge Data + df = self.__set_adb_data( + DataFrame( + data, + index=range(start_index, end_index), + columns=["_from", "_to"], + ), + meta, + edata, + edata_size, + is_explicit_metagraph, + start_index, + end_index, + ) + + df["_from"] = from_col + "/" + df["_from"].astype(str) + df["_to"] = to_col + "/" + df["_to"].astype(str) + + # 3. Apply the ArangoDB Edge Controller (if provided) + if is_custom_controller: + f = lambda e: self.__cntrl._prepare_dgl_edge(e, e_type) + df = df.apply(f, axis=1) + + return df + + def __set_adb_data( + self, + df: DataFrame, + meta: Union[Set[str], Dict[Any, DGLMetagraphValues]], + dgl_data: Union[NodeSpace, EdgeSpace], + dgl_data_size: int, + is_explicit_metagraph: bool, + start_index: int, + end_index: int, + ) -> DataFrame: + """A helper method to build the ArangoDB Dataframe for the given + collection. Is responsible for creating "sub-DataFrames" from DGL tensors, + and appending them to the main dataframe **df**. If the data + does not adhere to the supported types, or is not of specific length, + then it is silently skipped. + + :param df: The main ArangoDB DataFrame containing (at minimum) + the vertex/edge _id or _key attribute. + :type df: pandas.DataFrame + :param meta: The metagraph associated to the + current DGL node or edge type. e.g metagraph['nodeTypes']['v0'] + :type meta: Set[str] | Dict[Any, adbdgl_adapter.typings.DGLMetagraphValues] + :param dgl_data: The NodeSpace or EdgeSpace of the current + DGL node or edge type. + :type dgl_data: dgl.view.(NodeSpace | EdgeSpace) + :param dgl_data_size: The size of the NodeStorage or EdgeStorage of the + current DGL node or edge type. + :type dgl_data_size: int + :param is_explicit_metagraph: Take the metagraph at face value or not. + :type is_explicit_metagraph: bool + :param start_index: The starting index of the current batch to process. + :type start_index: int + :param end_index: The ending index of the current batch to process. + :type end_index: int + :return: The completed DataFrame for the (soon-to-be) ArangoDB collection. + :rtype: pandas.DataFrame + :raise ValueError: If an unsupported DGL data value is found. """ - aql = f""" - FOR doc IN {col} - RETURN doc + logger.debug( + f"__set_adb_data(df, {meta}, {type(dgl_data)}, {is_explicit_metagraph}" + ) + + valid_meta: Dict[Any, DGLMetagraphValues] + valid_meta = meta if type(meta) is dict else {m: m for m in meta} + + dgl_keys = set(valid_meta.keys()) if is_explicit_metagraph else dgl_data.keys() + for meta_key in dgl_keys: + data = dgl_data[meta_key] + meta_val = valid_meta.get(meta_key, str(meta_key)) + + if type(data) is Tensor and len(data) == dgl_data_size: + df = df.join( + self.__build_dataframe_from_tensor( + data[start_index:end_index], + start_index, + end_index, + meta_key, + meta_val, + ) + ) + + return df + + def __build_dataframe_from_tensor( + self, + dgl_tensor: Tensor, + start_index: int, + end_index: int, + meta_key: Any, + meta_val: DGLMetagraphValues, + ) -> DataFrame: + """Builds a Pandas DataFrame from DGL Tensor, based on + the nature of the user-defined metagraph. + + :param dgl_tensor: The Tensor representing DGL data. + :type dgl_tensor: torch.Tensor + :param meta_key: The current DGL-ArangoDB metagraph key + :type meta_key: Any + :param meta_val: The value mapped to the DGL-ArangoDB metagraph key to + help convert **tensor** into a Pandas Dataframe. + e.g the value of `metagraph['nodeTypes']['users']['x']`. + :type meta_val: adbdgl_adapter.typings.DGLMetagraphValues + :return: A Pandas DataFrame equivalent to the Tensor + :rtype: pandas.DataFrame + :raise adbdgl_adapter.exceptions.DGLMetagraphError: If invalid **meta_val**. """ + logger.debug( + f"__build_dataframe_from_tensor(df, '{meta_key}', {type(meta_val)})" + ) - return self.__db.aql.execute(aql, **query_options) + if type(meta_val) is str: + df = DataFrame(index=range(start_index, end_index), columns=[meta_val]) + df[meta_val] = dgl_tensor.tolist() + return df + + if type(meta_val) is list: + num_features = dgl_tensor.size()[-1] + if len(meta_val) != num_features: # pragma: no cover + msg = f""" + Invalid list length for **meta_val** ('{meta_key}'): + List length must match the number of + features found in the tensor ({num_features}). + """ + raise DGLMetagraphError(msg) + + df = DataFrame(index=range(start_index, end_index), columns=meta_val) + df[meta_val] = dgl_tensor.tolist() + return df + + if callable(meta_val): + # **meta_val** is a user-defined function that populates + # and returns the empty dataframe + empty_df = DataFrame(index=range(start_index, end_index)) + user_defined_result = meta_val(dgl_tensor, empty_df) + + if not isinstance(user_defined_result, DataFrame): # pragma: no cover + msg = f""" + Invalid return type for function {meta_val} ('{meta_key}'). + Function must return Pandas DataFrame. + """ + raise DGLMetagraphError(msg) + + if ( + user_defined_result.index.start != start_index + or user_defined_result.index.stop != end_index + ): # pragma: no cover + msg = f""" + User Defined Function {meta_val} ('{meta_key}') must return + DataFrame with start index {start_index} & stop index {end_index} + """ + raise DGLMetagraphError(msg) + + return user_defined_result + + raise DGLMetagraphError(f"Invalid {meta_val} type") # pragma: no cover def __insert_adb_docs( - self, col: str, docs: List[Json], import_options: Any + self, + spinner_progress: Progress, + df: DataFrame, + col: str, + use_async: bool, + **adb_import_kwargs: Any, ) -> None: - """Insert ArangoDB documents into their ArangoDB collection. + """DGL -> ArangoDB: Insert ArangoDB documents into their ArangoDB collection. - :param col: The ArangoDB collection name + :param spinner_progress: The spinner progress bar. + :type spinner_progress: rich.progress.Progress + :param df: To-be-inserted ArangoDB documents, formatted as a DataFrame + :type df: pandas.DataFrame + :param col: The ArangoDB collection name. :type col: str - :param docs: To-be-inserted ArangoDB documents - :type docs: List[Json] - :param import_options: Keyword arguments to specify additional + :param use_async: Performs asynchronous ArangoDB ingestion if enabled. + :type use_async: bool + :param adb_import_kwargs: Keyword arguments to specify additional parameters for ArangoDB document insertion. Full parameter list: https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk + :param adb_import_kwargs: Any """ - logger.debug(f"Inserting {len(docs)} documents into '{col}'") - result = self.__db.collection(col).import_bulk(docs, **import_options) + action = f"ADB Import: '{col}' ({len(df)})" + spinner_progress_task = spinner_progress.add_task("", action=action) + + docs = df.to_dict("records") + db = self.__async_db if use_async else self.__db + result = db.collection(col).import_bulk(docs, **adb_import_kwargs) logger.debug(result) + + df.drop(df.index, inplace=True) + + spinner_progress.stop_task(spinner_progress_task) + spinner_progress.update(spinner_progress_task, visible=False) diff --git a/adbdgl_adapter/controller.py b/adbdgl_adapter/controller.py index cd1c0f5..77e9cc3 100644 --- a/adbdgl_adapter/controller.py +++ b/adbdgl_adapter/controller.py @@ -1,74 +1,52 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from typing import Any, Union - -from torch import Tensor - -from adbdgl_adapter.typings import DGLCanonicalEType - from .abc import Abstract_ADBDGL_Controller +from .typings import DGLCanonicalEType, Json class ADBDGL_Controller(Abstract_ADBDGL_Controller): """ArangoDB-DGL controller. - Responsible for controlling how ArangoDB attributes - are converted into DGL features, and vice-versa. + Responsible for controlling how nodes & edges are handled when + transitioning from DGL to ArangoDB. - You can derive your own custom ADBDGL_Controller if you want to maintain - consistency between your ArangoDB attributes & your DGL features. + You can derive your own custom ADBDGL_Controller. """ - def _adb_attribute_to_dgl_feature( - self, key: str, col: Union[str, DGLCanonicalEType], val: Any - ) -> Any: - """ - Given an ArangoDB attribute key, its assigned value (for an arbitrary document), - and the collection it belongs to, convert it to a valid - DGL feature: https://docs.dgl.ai/en/0.6.x/guide/graph-feature.html. - - NOTE: You must override this function if you want to transfer non-numerical - ArangoDB attributes to DGL (DGL only accepts 'attributes' (a.k.a features) - of numerical types). Read more about DGL features here: - https://docs.dgl.ai/en/0.6.x/new-tutorial/2_dglgraph.html#assigning-node-and-edge-features-to-graph. - - :param key: The ArangoDB attribute key name - :type key: str - :param col: The ArangoDB collection of the ArangoDB document. - :type col: str - :param val: The assigned attribute value of the ArangoDB document. - :type val: Any - :return: The attribute's representation as a DGL Feature - :rtype: Any + def _prepare_dgl_node(self, dgl_node: Json, node_type: str) -> Json: + """Prepare a DGL node object before it gets inserted into its + designated ArangoDB collection. + + Given a JSON representation of a DGL node, you can modify it + before it gets inserted into its ArangoDB collection, + and/or derive a custom vertex id by updating the "_key" attribute + of the vertex (otherwise the vertex's current "_key" value will be used). + + :param dgl_node: The DGL node object to (optionally) modify. + :type dgl_node: adbnx_adapter.typings.Json + :param node_type: The DGL Node Type of the node + :type node_type: str + :return: The DGL Node object + :rtype: Dict[str, Any] """ - if type(val) in [int, float, bool]: - return val - - try: - return float(val) - except (ValueError, TypeError, SyntaxError): - return 0 - - def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor) -> Any: - """ - Given a DGL feature key, its assigned value (for an arbitrary node or edge), - and the collection it belongs to, convert it to a valid ArangoDB attribute - (e.g string, list, number, ...). - - NOTE: No action is needed here if you want to keep the numerical-based values - of your DGL features. - - :param key: The DGL attribute key name - :type key: str - :param col: The ArangoDB collection of the (soon-to-be) ArangoDB document. - :type col: str - :param val: The assigned attribute value of the DGL node. - :type val: Tensor - :return: The feature's representation as an ArangoDB Attribute - :rtype: Any + return dgl_node # pragma: no cover + + def _prepare_dgl_edge(self, dgl_edge: Json, edge_type: DGLCanonicalEType) -> Json: + """Prepare a DGL edge object before it gets inserted into its + designated ArangoDB collection. + + Given a JSON representation of a DGL edge, you can modify it + before it gets inserted into its ArangoDB edge collection, + and/or derive a custom edge id by setting the "_key" attribute + of the edge (otherwise the "_key" will be randomly generated by ArangoDB). + + :param dgl_edge: The DGL edge object to (optionally) modify. + :type dgl_edge: adbnx_adapter.typings.Json + :param edge_type: The Edge Type of The DGL edge. Formatted + as (from_collection, edge_collection, to_collection) + :type edge_type: Tuple[str, str, str] + :return: The DGL Edge object + :rtype: Dict[str, Any] """ - try: - return val.item() - except ValueError: - return val.tolist() + return dgl_edge # pragma: no cover diff --git a/adbdgl_adapter/encoders.py b/adbdgl_adapter/encoders.py new file mode 100644 index 0000000..fca7574 --- /dev/null +++ b/adbdgl_adapter/encoders.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# See https://pytorch-geometric.readthedocs.io/en/latest/notes/load_csv.html +# for an example on encoders. + +from typing import Any, Dict, Optional + +from pandas import DataFrame +from torch import Tensor, from_numpy, zeros + + +class IdentityEncoder(object): + """Converts a list of floating-point values into a PyTorch tensor""" + + def __init__(self, dtype: Any = None) -> None: + self.dtype = dtype + + def __call__(self, df: DataFrame) -> Tensor: + return from_numpy(df.values).view(-1, 1).to(self.dtype) + + +class CategoricalEncoder(object): + """Converts a list of values into a PyTorch tensor through a mapping""" + + def __init__(self, mapping: Optional[Dict[Any, Any]] = None) -> None: + self.mapping = mapping + + def __call__(self, df: DataFrame) -> Tensor: + if self.mapping is None: + unique_vals = df.unique() + self.mapping = {u_v: i for i, u_v in enumerate(unique_vals)} + + x = zeros(len(df), 1) + for i, col in enumerate(df.values): + x[i, 0] = self.mapping[col] + + return x diff --git a/adbdgl_adapter/exceptions.py b/adbdgl_adapter/exceptions.py new file mode 100644 index 0000000..7fcc916 --- /dev/null +++ b/adbdgl_adapter/exceptions.py @@ -0,0 +1,19 @@ +class ADBDGLError(Exception): + """Base class for all exceptions in adbdgl-adapter.""" + + +class ADBDGLValidationError(ADBDGLError, TypeError): + """Base class for errors originating from adbdgl-adapter user input validation.""" + + +################## +# Metagraphs # +################## + + +class ADBMetagraphError(ADBDGLValidationError): + """Invalid ArangoDB Metagraph value""" + + +class DGLMetagraphError(ADBDGLValidationError): + """Invalid DGL Metagraph value""" diff --git a/adbdgl_adapter/typings.py b/adbdgl_adapter/typings.py index 22e86f7..05d7ec4 100644 --- a/adbdgl_adapter/typings.py +++ b/adbdgl_adapter/typings.py @@ -1,12 +1,39 @@ -__all__ = ["Json", "ArangoMetagraph", "DGLCanonicalEType"] +__all__ = [ + "Json", + "ADBMetagraph", + "ADBMetagraphValues", + "DGLMetagraph", + "DGLMetagraphValues", + "DGLCanonicalEType", + "DGLDataDict", + "ADBMap", + "DGLMap", +] -from typing import Any, Dict, Set, Tuple +from typing import Any, Callable, DefaultDict, Dict, List, Set, Tuple, Union +from pandas import DataFrame from torch import Tensor Json = Dict[str, Any] -ArangoMetagraph = Dict[str, Dict[str, Set[str]]] + +DataFrameToTensor = Callable[[DataFrame], Tensor] +TensorToDataFrame = Callable[[Tensor, DataFrame], DataFrame] + +ADBEncoders = Dict[str, DataFrameToTensor] +ADBMetagraphValues = Union[str, DataFrameToTensor, ADBEncoders] +ADBMetagraph = Dict[str, Dict[str, Union[Set[str], Dict[str, ADBMetagraphValues]]]] DGLCanonicalEType = Tuple[str, str, str] +DGLData = DefaultDict[str, DefaultDict[Union[str, DGLCanonicalEType], Tensor]] DGLDataDict = Dict[DGLCanonicalEType, Tuple[Tensor, Tensor]] + +DGLDataTypes = Union[str, DGLCanonicalEType] +DGLMetagraphValues = Union[str, List[str], TensorToDataFrame] +DGLMetagraph = Dict[ + str, Dict[DGLDataTypes, Union[Set[str], Dict[Any, DGLMetagraphValues]]] +] + +ADBMap = DefaultDict[DGLDataTypes, Dict[str, int]] +DGLMap = DefaultDict[DGLDataTypes, Dict[int, str]] diff --git a/adbdgl_adapter/utils.py b/adbdgl_adapter/utils.py index 3f3f894..b88dc73 100644 --- a/adbdgl_adapter/utils.py +++ b/adbdgl_adapter/utils.py @@ -1,5 +1,17 @@ import logging import os +from typing import Any, Dict, Set, Union + +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TaskProgressColumn, + TextColumn, + TimeElapsedColumn, +) + +from .exceptions import ADBMetagraphError, DGLMetagraphError logger = logging.getLogger(__package__) handler = logging.StreamHandler() @@ -9,3 +21,159 @@ ) handler.setFormatter(formatter) logger.addHandler(handler) + + +def get_export_spinner_progress( + text: str, +) -> Progress: + return Progress( + TextColumn(text), + SpinnerColumn("aesthetic", "#5BC0DE"), + TimeElapsedColumn(), + transient=True, + ) + + +def get_import_spinner_progress(text: str) -> Progress: + return Progress( + TextColumn(text), + TextColumn("{task.fields[action]}"), + SpinnerColumn("aesthetic", "#5BC0DE"), + TimeElapsedColumn(), + transient=True, + ) + + +def get_bar_progress(text: str, color: str) -> Progress: + return Progress( + TextColumn(text), + BarColumn(complete_style=color, finished_style=color), + TaskProgressColumn(), + TextColumn("({task.completed}/{task.total})"), + TimeElapsedColumn(), + ) + + +def validate_adb_metagraph(metagraph: Dict[Any, Dict[Any, Any]]) -> None: + meta: Union[Set[Any], Dict[Any, Any]] + + if "vertexCollections" not in metagraph: + raise ADBMetagraphError("Missing 'vertexCollections' key in metagraph") + + if "edgeCollections" not in metagraph: + raise ADBMetagraphError("Missing 'edgeCollections' key in metagraph") + + for parent_key in ["vertexCollections", "edgeCollections"]: + sub_metagraph = metagraph[parent_key] + if not sub_metagraph or type(sub_metagraph) != dict: + raise ADBMetagraphError(f"{parent_key} must map to non-empty dictionary") + + for col, meta in sub_metagraph.items(): + if type(col) != str: + msg = f""" + Invalid {parent_key} sub-key type: + {col} must be str + """ + raise ADBMetagraphError(msg) + + if type(meta) == set: + for m in meta: + if type(m) != str: + msg = f""" + Invalid set value type for {meta}: + {m} must be str + """ + raise ADBMetagraphError(msg) + + elif type(meta) == dict: + for meta_key, meta_val in meta.items(): + if type(meta_key) != str: + msg = f""" + Invalid key type in {meta}: + {meta_key} must be str + """ + raise ADBMetagraphError(msg) + + if type(meta_val) not in [str, dict] and not callable(meta_val): + msg = f""" + Invalid mapped value type in {meta}: + {meta_val} must be + str | Dict[str, None | Callable] | Callable + """ + raise ADBMetagraphError(msg) + + if type(meta_val) == dict: + for k, v in meta_val.items(): + if type(k) != str: + msg = f""" + Invalid ArangoDB attribute key type: + {v} must be str + """ + raise ADBMetagraphError(msg) + + if v is not None and not callable(v): + msg = f""" + Invalid DGL Encoder type: + {v} must be None | Callable + """ + raise ADBMetagraphError(msg) + else: + msg = f""" + Invalid mapped value type for {col}: + {meta} must be dict | set + """ + raise ADBMetagraphError(msg) + + +def validate_dgl_metagraph(metagraph: Dict[Any, Dict[Any, Any]]) -> None: + meta: Union[Set[Any], Dict[Any, Any]] + + for node_type in metagraph.get("nodeTypes", {}).keys(): + if type(node_type) != str: + msg = f"Invalid nodeTypes sub-key: {node_type} is not str" + raise DGLMetagraphError(msg) + + for edge_type in metagraph.get("edgeTypes", {}).keys(): + if type(edge_type) != tuple: + msg = f"Invalid edgeTypes sub-key: {edge_type} must be Tuple[str, str, str]" + raise DGLMetagraphError(msg) + else: + for elem in edge_type: + if type(elem) != str: + msg = f"{elem} in {edge_type} must be str" + raise DGLMetagraphError(msg) + + for parent_key in ["nodeTypes", "edgeTypes"]: + for k, meta in metagraph.get(parent_key, {}).items(): + if type(meta) == set: + for m in meta: + if type(m) != str: + msg = f""" + Invalid set value type for {meta}: + {m} must be str + """ + raise DGLMetagraphError(msg) + + elif type(meta) == dict: + for meta_val in meta.values(): + if type(meta_val) not in [str, list] and not callable(meta_val): + msg = f""" + Invalid mapped value type in {meta}: + {meta_val} must be str | List[str] | Callable + """ + raise DGLMetagraphError(msg) + + if type(meta_val) == list: + for v in meta_val: + if type(v) != str: + msg = f""" + Invalid ArangoDB attribute key type: + {v} must be str + """ + raise DGLMetagraphError(msg) + else: + msg = f""" + Invalid mapped value type for {k}: + {meta} must be dict | set + """ + raise DGLMetagraphError(msg) diff --git a/examples/ArangoDB_DGL_Adapter.ipynb b/examples/ArangoDB_DGL_Adapter.ipynb index 918fecd..7ace981 100644 --- a/examples/ArangoDB_DGL_Adapter.ipynb +++ b/examples/ArangoDB_DGL_Adapter.ipynb @@ -15,7 +15,7 @@ "id": "U1d45V4OeG89" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -34,7 +34,7 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 2.1.0\n", + "Version: 3.0.0\n", "\n", "Objective: Export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database, to [Deep Graph Library](https://www.dgl.ai/) (DGL), a python package for graph neural networks, and vice-versa." ] @@ -57,9 +57,9 @@ "outputs": [], "source": [ "%%capture\n", - "!pip install adbdgl-adapter==2.1.0\n", + "!pip install adbdgl-adapter==3.0.0\n", "!pip install adb-cloud-connector\n", - "!git clone -b 2.1.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", + "!git clone -b 3.0.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", "\n", "## For drawing purposes \n", "!pip install matplotlib\n", @@ -70,26 +70,27 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "niijQHqBM6zp" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "niijQHqBM6zp", + "outputId": "77df8f72-4000-44e8-9dd6-c56bbf33c07d" }, "outputs": [], "source": [ "# All imports\n", "\n", + "import pandas\n", + "import torch\n", "import dgl\n", - "from dgl import remove_self_loop\n", - "from dgl.data import MiniGCDataset\n", "from dgl.data import KarateClubDataset\n", "\n", - "import torch\n", - "from torch import Tensor\n", - "\n", - "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", - "from adbdgl_adapter.typings import Json, ArangoMetagraph, DGLCanonicalEType, DGLDataDict\n", - "\n", "from arango import ArangoClient\n", "from adb_cloud_connector import get_temp_credentials\n", "\n", + "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", + "from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder\n", + "\n", "import json\n", "import logging\n", "\n", @@ -130,7 +131,7 @@ "base_uri": "https://localhost:8080/" }, "id": "vf0350qvj8up", - "outputId": "fbf300df-5dcd-44e8-a746-cb554eba1dd8" + "outputId": "bb473200-893d-4d4e-ed6d-239ec497d0e3" }, "outputs": [], "source": [ @@ -163,7 +164,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oOS3AVAnkQEV", - "outputId": "3a7403db-d11b-4f7a-a0b7-6e8220186273" + "outputId": "5b5feaaa-2a6f-4e0e-ef89-68b9e365a6db" }, "outputs": [], "source": [ @@ -199,7 +200,7 @@ "base_uri": "https://localhost:8080/" }, "id": "meLon-KgkU4h", - "outputId": "fa57e121-5294-45f9-b3d0-3a2cfa212da7" + "outputId": "7517b39b-adfa-426d-ccae-89254cf642b5" }, "outputs": [], "source": [ @@ -237,7 +238,7 @@ "base_uri": "https://localhost:8080/" }, "id": "zTebQ0LOlsGA", - "outputId": "f5c06fec-a3e3-41fb-b478-42e492af07de" + "outputId": "c871096b-b06e-4cd8-ad56-06758090600d" }, "outputs": [], "source": [ @@ -280,7 +281,7 @@ "base_uri": "https://localhost:8080/" }, "id": "KsxNujb0mSqZ", - "outputId": "0cf12da9-c754-41a3-9496-5aea0a0faac9" + "outputId": "0b7b4106-7385-4489-e49a-399efbef0cb8" }, "outputs": [], "source": [ @@ -323,7 +324,7 @@ "base_uri": "https://localhost:8080/" }, "id": "2ekGwnJDeG8-", - "outputId": "02cf35c6-9416-44fb-be44-5c0f517e0f78" + "outputId": "84a1c36b-3dc1-47e2-dadf-8a4ebefd98c0" }, "outputs": [], "source": [ @@ -359,7 +360,7 @@ "id": "BM0iRYPDeG8_" }, "source": [ - "For demo purposes, we will be using the [ArangoDB Fraud Detection example graph](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." + "For demo purposes, we will be using the [ArangoDB IMDB example graph](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)." ] }, { @@ -370,12 +371,38 @@ "base_uri": "https://localhost:8080/" }, "id": "7bgGJ3QkeG8_", - "outputId": "15b25959-5a2f-4d1c-852e-5019845716a4" + "outputId": "1f490370-72f3-4d1b-8950-ef1d0f690218" }, "outputs": [], "source": [ "!chmod -R 755 dgl-adapter/\n", - "!./dgl-adapter/tests/assets/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/examples/data/fraud_dump\" --include-system-collections true" + "!./dgl-adapter/tests/tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/tests/data/adb/imdb_dump\" --include-system-collections true" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XLiXYJPRlVYZ", + "outputId": "2666c5b3-1f62-4bfc-c9af-53bc53f0ffd8" + }, + "outputs": [], + "source": [ + "# Create the IMDB graph\n", + "db.delete_graph(\"imdb\", ignore_missing=True)\n", + "db.create_graph(\n", + " \"imdb\",\n", + " edge_definitions=[\n", + " {\n", + " \"edge_collection\": \"Ratings\",\n", + " \"from_vertex_collections\": [\"Users\"],\n", + " \"to_vertex_collections\": [\"Movies\"],\n", + " },\n", + " ],\n", + ")" ] }, { @@ -404,7 +431,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oG496kBeeG9A", - "outputId": "792a3ad2-3d04-4132-d878-a5e52c58dc17" + "outputId": "e5d8657f-a644-4493-ca16-16a300ac4a87" }, "outputs": [], "source": [ @@ -414,36 +441,35 @@ { "cell_type": "markdown", "metadata": { - "id": "uByvwf9feG9A" + "id": "bvzJXSHHTi3v" }, "source": [ - "# ArangoDB to DGL\n", - "\n" + "# DGL to ArangoDB" ] }, { "cell_type": "markdown", "metadata": { - "id": "ZrEDmtqCVD0W" + "id": "UafSB_3JZNwK" }, "source": [ - "#### Via ArangoDB Graph" + "#### Karate Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "H8nlvWCryPW0" + "id": "tx-tjPfx0U_h" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Graph\n", + "Data\n", + "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_graph_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L198-L213)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. " + "Notes\n", + "* The `name` parameter in this case is simply for naming your ArangoDB graph." ] }, { @@ -451,54 +477,70 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 577, + "referenced_widgets": [ + "61d2a0426c324309ab51111933276e3d", + "77c208846c1e4503bc22a5b5504f89ee", + "2d1fc41d509e481cb779603827359184", + "87d9c9de620847f48b4088e8577cd653" + ] }, - "id": "zZ-Hu3lLVHgd", - "outputId": "d1c38c22-eebb-456d-8e4c-140ddd9baed8" + "id": "eRVbiBy4ZdE4", + "outputId": "74ac6cb8-824b-443a-ad6e-9f36b23060a1" }, "outputs": [], "source": [ - "# Define graph name\n", - "graph_name = \"fraud-detection\"\n", + "# Create the DGL graph & draw it\n", + "dgl_karate_graph = KarateClubDataset()[0]\n", + "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", "\n", - "# Create DGL graph from ArangoDB graph\n", - "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name)\n", + "name = \"Karate\"\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = aadbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graph\n", + "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", + "\n", + "# You can also provide valid Python-Arango Import Bulk options to the command above, like such:\n", + "# adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph, batch_size=5, on_duplicate=\"replace\")\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk\n", "\n", - "# Show graph data\n", "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "RQ4CknYfUEuz" + "id": "CNj1xKhwoJoL" }, "source": [ - "#### Via ArangoDB Collections" + "\n", + "#### FakeHeterogeneous Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "bRcCmqWGy1Kf" + "id": "CZ1UX9YX1Zzo" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_collections_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L169-L196)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph." ] }, { @@ -506,55 +548,84 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 408, + "referenced_widgets": [ + "3fc8b14d794a46118b328893bd216405", + "c7e222474ff445fe86e4e599848b2ae2", + "289a6e16c3d640c29d96edf09908bd0f", + "61f3832c906445a3ab7e7ba9b41c0127", + "99bbe81a24db49ff9352987fd97649cd", + "21e50aa61c3d4de19b5cc0bbe27d53c9", + "f9fdfe6ce44e4e1c8f513f82efca3e0d", + "9b2b3abbe2c04af0bc232c9b16bfd90d", + "8444e147be8f44aba06ec1f8a880104e", + "80e69b3aa98b44e295efe3940c1146c2", + "ec7b8b0b853f463fa079dda845891391", + "dd2376f84c794b4989f385a5bb147bd8" + ] }, - "id": "i4XOpdRLUNlJ", - "outputId": "4d53a3d0-316b-40c2-d841-5fb29fa1358b" + "id": "jbJsvMMaoJoT", + "outputId": "c1606984-c2ef-41c1-e8b1-78a4ae40d93c" }, "outputs": [], "source": [ - "# Define collection names\n", - "vertex_collections = {\"account\", \"Class\", \"customer\"}\n", - "edge_collections = {\"accountHolder\", \"Relationship\", \"transaction\"}\n", + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", "\n", - "# Create DGL from ArangoDB collections\n", - "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections)\n", + "print(hetero_graph)\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "name = \"FakeHetero\"\n", + "\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph)\n", "\n", - "# Show graph data\n", "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "qEH6OdSB23Ya" + "id": "n08RC_GtkDrC" }, "source": [ - "#### Via ArangoDB Metagraph" + "\n", + "#### FakeHeterogeneous Graph with a DGL-ArangoDB metagraph" ] }, { "cell_type": "markdown", "metadata": { - "id": "PipFzJ0HzTMA" + "id": "rUD_y0yxkDrK" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `metagraph` parameter is an optional object mapping the DGL keys of the node & edge data to strings, list of strings, or user-defined functions." ] }, { @@ -562,69 +633,128 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 408, + "referenced_widgets": [ + "345a5984959c4e57b7e2715fa8eeef8f", + "99e6613c4187459396eea503453934cb", + "968020b1388e4883843575d9198af1cd", + "f1a08470110e4099af2a3d4cf4d0f956", + "6744eb60dfa04a8598fca3b998ce3077", + "09d25097c75c4fa8a2c7376f1965afc5", + "cb8167f00277413eaaa2ad6e0e162fab", + "8128e6d80fcb4a8ca0a72097bb8b6521", + "575205f1a4e64c5d977e69d4939a5605", + "d20843bfa9064d56b37aaea011789a26", + "8bf075c6f7834d3fa905b7ddc37cf128", + "b080f26fe35241fb9cca48e97bc9ef0c" + ] }, - "id": "7Kz8lXXq23Yk", - "outputId": "7804e7ba-3760-4eb5-8669-f6fa20948262" + "id": "xAdjZiJ8kDrK", + "outputId": "2822ed4b-8199-48e2-a753-4b1f60d648a0" }, "outputs": [], "source": [ - "# Define Metagraph\n", - "fraud_detection_metagraph = {\n", - " \"vertexCollections\": {\n", - " \"account\": {\"rank\", \"Balance\", \"customer_id\"},\n", - " \"Class\": {\"concrete\"},\n", - " \"customer\": {\"rank\"},\n", + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "print(hetero_graph)\n", + "\n", + "name = \"FakeHetero\"\n", + "\n", + "# Define the metagraph\n", + "def label_tensor_to_2_column_dataframe(dgl_tensor, adb_df):\n", + " \"\"\"\n", + " A user-defined function to create two\n", + " ArangoDB attributes out of the 'user' label tensor\n", + "\n", + " :param dgl_tensor: The DGL Tensor containing the data\n", + " :type dgl_tensor: torch.Tensor\n", + " :param adb_df: The ArangoDB DataFrame to populate, whose\n", + " size is preset to the length of **dgl_tensor**.\n", + " :type adb_df: pandas.DataFrame\n", + "\n", + " NOTE: user-defined functions must return the modified **adb_df**\n", + " \"\"\"\n", + " label_map = {0: \"Class A\", 1: \"Class B\", 2: \"Class C\"}\n", + "\n", + " adb_df[\"label_num\"] = dgl_tensor.tolist()\n", + " adb_df[\"label_str\"] = adb_df[\"label_num\"].map(label_map)\n", + "\n", + " return adb_df\n", + "\n", + "\n", + "metagraph = {\n", + " \"nodeTypes\": {\n", + " \"user\": {\n", + " \"features\": \"user_age\", # 1) you can specify a string value for attribute renaming\n", + " \"label\": label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame\n", + " },\n", + " # 3) You can specify set of strings if you want to preserve the same DGL attribute names for the node/edge type\n", + " \"game\": {\"features\"} # this is equivalent to {\"features\": \"features\"}\n", " },\n", - " \"edgeCollections\": {\n", - " \"accountHolder\": {},\n", - " \"Relationship\": {},\n", - " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\"},\n", + " \"edgeTypes\": {\n", + " (\"user\", \"plays\", \"game\"): {\n", + " # 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance)\n", + " \"features\": [\"hours_played\", \"is_satisfied_with_game\"]\n", + " },\n", " },\n", "}\n", "\n", - "# Create DGL Graph from attributes\n", - "dgl_g = adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection', fraud_detection_metagraph, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "# Create the ArangoDB graphs\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=False)\n", "\n", - "# Show graph data\n", - "print('\\n--------------')\n", - "print(dgl_g)\n", - "print('\\n--------------')\n", - "print(dgl_g.ndata)\n", - "print('--------------\\n')\n", - "print(dgl_g.edata)" + "# Create the ArangoDB graph with `explicit_metagraph=True`\n", + "# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.\n", + "# Only 'user', 'game', and ('user', 'plays', 'game') will be brought over (i.e 'topic', ('user', 'follows', 'user'), ... are ignored)\n", + "## adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=True)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "DqIKT1lO4ASw" + "id": "mk6m0hBRkkkT" }, "source": [ - "#### Via ArangoDB Metagraph with a custom controller and verbose logging" + "\n", + "#### FakeHeterogeneous Graph with a user-defined ADBDGL Controller" ] }, { "cell_type": "markdown", "metadata": { - "id": "PGkGh_KjzlYM" + "id": "KG7kFoOUkkkb" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", - "* [`adbdgl_adapter.controller._adb_attribute_to_dgl_feature()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L21-L47)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our ArangoDB vertex/edge attributes into DGL node/edge features. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `ADBDGL_Controller` is an optional user-defined class for controlling how nodes & edges are handled when transitioning from DGL to ArangoDB. **It is interpreted as the alternative to the `metagraph` parameter.**" ] }, { @@ -632,143 +762,158 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 443, + "referenced_widgets": [ + "ea5e9803c5de4d2bbb48782069b9829b", + "3f633be94c7d466ea40571e805a76948", + "96e57d98afce44cd8269204dd19ff6e0", + "da43ef4a8c6a41f9bda153a0cd14c2d7", + "3bc228aa98454dc59a604c8f7ff6b2a0", + "65138d18c9c449d1aaaad387293c5ede", + "3ea99b2a6b4246d3abf628ca743f9f24", + "841ce4f5d391457e858c3c48185e259d", + "987bf80aee4b4b97bfad1699f8384af8", + "4ab3c113235746cab5fde158756ab420", + "09e8c93741bf45acb69ba9e757107564", + "d7d06973b2984eb19fa050409bf62222" + ] }, - "id": "U4_vSdU_4AS4", - "outputId": "8af82665-9ae6-40d4-ada2-248edd993291" + "id": "A-DtrD2Ykkkb", + "outputId": "f2672554-16e4-4b88-e24b-f567ff13bb3f" }, "outputs": [], "source": [ - "# Define Metagraph\n", - "fraud_detection_metagraph = {\n", - " \"vertexCollections\": {\n", - " \"account\": {\"rank\"},\n", - " \"Class\": {\"concrete\", \"name\"},\n", - " \"customer\": {\"Sex\", \"Ssn\", \"rank\"},\n", - " },\n", - " \"edgeCollections\": {\n", - " \"accountHolder\": {},\n", - " \"Relationship\": {},\n", - " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\", \"transaction_date\", \"trans_time\"},\n", - " },\n", - "}\n", + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", "\n", - "# A user-defined Controller class is REQUIRED when converting non-numerical\n", - "# ArangoDB attributes to DGL features.\n", - "class FraudDetection_ADBDGL_Controller(ADBDGL_Controller):\n", - " \"\"\"ArangoDB-DGL controller.\n", + "print(hetero_graph)\n", "\n", - " Responsible for controlling how ArangoDB attributes\n", - " are converted into DGL features, and vice-versa.\n", + "name = \"FakeHetero\"\n", "\n", - " You can derive your own custom ADBDGL_Controller if you want to maintain\n", - " consistency between your ArangoDB attributes & your DGL features.\n", - " \"\"\"\n", + "# Create a custom ADBDGL_Controller\n", + "class Custom_ADBDGL_Controller(ADBDGL_Controller):\n", + " def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict:\n", + " \"\"\"Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.\n", "\n", - " def _adb_attribute_to_dgl_feature(self, key: str, col: str, val):\n", + " :param dgl_node: The DGL node object to (optionally) modify.\n", + " :param node_type: The DGL Node Type of the node.\n", + " :return: The DGL Node object\n", " \"\"\"\n", - " Given an ArangoDB attribute key, its assigned value (for an arbitrary document),\n", - " and the collection it belongs to, convert it to a valid\n", - " DGL feature: https://docs.dgl.ai/en/0.6.x/guide/graph-feature.html.\n", - "\n", - " NOTE: You must override this function if you want to transfer non-numerical\n", - " ArangoDB attributes to DGL (DGL only accepts 'attributes' (a.k.a features)\n", - " of numerical types). Read more about DGL features here:\n", - " https://docs.dgl.ai/en/0.6.x/new-tutorial/2_dglgraph.html#assigning-node-and-edge-features-to-graph.\n", - "\n", - " :param key: The ArangoDB attribute key name\n", - " :type key: str\n", - " :param col: The ArangoDB collection of the ArangoDB document.\n", - " :type col: str\n", - " :param val: The assigned attribute value of the ArangoDB document.\n", - " :type val: Any\n", - " :return: The attribute's representation as a DGL Feature\n", - " :rtype: Any\n", + " dgl_node[\"foo\"] = \"bar\"\n", + " return dgl_node\n", + "\n", + " def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict:\n", + " \"\"\"Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.\n", + "\n", + " :param dgl_edge: The DGL edge object to (optionally) modify.\n", + " :param edge_type: The Edge Type of the DGL edge. Formatted\n", + " as (from_collection, edge_collection, to_collection)\n", + " :return: The DGL Edge object\n", " \"\"\"\n", - " try:\n", - " if col == \"transaction\":\n", - " if key == \"transaction_date\":\n", - " return int(str(val).replace(\"-\", \"\"))\n", - " \n", - " if key == \"trans_time\":\n", - " return int(str(val).replace(\":\", \"\"))\n", - " \n", - " if col == \"customer\":\n", - " if key == \"Sex\":\n", - " return {\n", - " \"M\": 0,\n", - " \"F\": 1\n", - " }.get(val, -1)\n", - "\n", - " if key == \"Ssn\":\n", - " return int(str(val).replace(\"-\", \"\"))\n", - "\n", - " if col == \"Class\":\n", - " if key == \"name\":\n", - " return {\n", - " \"Bank\": 0,\n", - " \"Branch\": 1,\n", - " \"Account\": 2,\n", - " \"Customer\": 3\n", - " }.get(val, -1)\n", - "\n", - " except (ValueError, TypeError, SyntaxError):\n", - " return 0\n", - "\n", - " # Rely on the parent Controller as a final measure\n", - " return super()._adb_attribute_to_dgl_feature(key, col, val)\n", - "\n", - "# Instantiate the new adapter\n", - "fraud_adbdgl_adapter = ADBDGL_Adapter(db, FraudDetection_ADBDGL_Controller())\n", - "\n", - "# You can also change the adapter's logging level for access to \n", - "# silent, regular, or verbose logging (logging.WARNING, logging.INFO, logging.DEBUG)\n", - "fraud_adbdgl_adapter.set_logging(logging.DEBUG) # verbose logging\n", - "\n", - "# Create DGL Graph from attributes\n", - "dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + " dgl_edge[\"bar\"] = \"foo\"\n", + " return dgl_edge\n", "\n", - "# Show graph data\n", - "print('\\n--------------')\n", - "print(dgl_g)\n", - "print('\\n--------------')\n", - "print(dgl_g.ndata)\n", - "print('--------------\\n')\n", - "print(dgl_g.edata)" + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb(name, hetero_graph)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "bvzJXSHHTi3v" + "id": "uByvwf9feG9A" }, "source": [ - "# DGL to ArangoDB" + "# ArangoDB to DGL\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165, + "referenced_widgets": [ + "c6cffa0a64434e56879ba2a8c9de018a", + "0083494093574c50952dd066502a708d", + "1dea128bde204a8fa53e094e014183fe", + "50f8ff3637ee4fc7af8c811cd5d177be", + "6582a9d3fe044d5380d8e918f3bc5a6d", + "40da9dd52dd6443684b990f74b6cb876", + "80d19dc0d20842c3b5c7313c0ad23d24", + "0478c90ef8234f3a8987dbe9cd3030b2", + "c61e3997250d4f93a8e0494db674892d", + "97e7543f202749c197515a9c5c79adbe", + "88e83ddc1ca1464291e1631b8fced847", + "a9c14a3f339445338119631c8e56ff68" + ] + }, + "id": "rnMe3iMz2K7j", + "outputId": "b1485ec1-64bf-43d5-a5fe-7d6bd5fc2da1" + }, + "outputs": [], + "source": [ + "# Start from scratch! (with the same DGL graph)\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "db.delete_graph(\"FakeHetero\", drop_collections=True, ignore_missing=True)\n", + "adbdgl_adapter.dgl_to_arangodb(\"FakeHetero\", hetero_graph)" ] }, { "cell_type": "markdown", "metadata": { - "id": "UafSB_3JZNwK" + "id": "ZrEDmtqCVD0W" }, "source": [ - "#### Karate Graph" + "#### Via ArangoDB Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "tx-tjPfx0U_h" + "id": "H8nlvWCryPW0" }, "source": [ - "Data source\n", - "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_graph_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your ArangoDB graph." + "Notes\n", + "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. \n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." ] }, { @@ -777,63 +922,67 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 683 + "height": 184, + "referenced_widgets": [ + "9403e71c2bbe46bd9e6d49d555264554", + "34c4ef0c4aa5454893c0f0fa35902fbd", + "1690574b32cc4b48a8b87520458d5066", + "a9edf4f85a4a4504b155608bb740178a", + "fd2db543279f4a13ab6376b9c23160e0", + "5c310145af4f4c90b659dee771185ab6", + "31a9f782f36d407f8cc42b19679c5c2c", + "9fd8d07a43cd4c06a2d448047ede846c", + "2c2900512b5244d3a0fcaf7409446d0e", + "c5d064af7f4a49dca6716f98d052e951" + ] }, - "id": "eRVbiBy4ZdE4", - "outputId": "c629be2d-1bc9-4539-c7f2-d3ae46676659" + "id": "zZ-Hu3lLVHgd", + "outputId": "85729665-feb3-4382-e84b-4286162581c3" }, "outputs": [], "source": [ - "# Create the DGL graph & draw it\n", - "dgl_karate_graph = KarateClubDataset()[0]\n", - "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", - "\n", - "name = \"Karate\"\n", + "# Define graph name\n", + "name = \"FakeHetero\"\n", "\n", - "# Delete the graph if it already exists\n", - "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "# Create the DGL Graph from the ArangoDB graph\n", + "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(name)\n", "\n", - "# Create the ArangoDB graph\n", - "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", - "\n", - "# You can also provide valid Python-Arango Import Bulk options to the command above, like such:\n", - "# adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph, batch_size=5, on_duplicate=\"replace\")\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", "\n", + "# Show graph data\n", "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", - "print(f\"View the original graph below:\\n\")" + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" ] }, { "cell_type": "markdown", "metadata": { - "id": "gshTlSX_ZZsS" + "id": "RQ4CknYfUEuz" }, "source": [ - "\n", - "#### MiniGCDataset Graphs" + "#### Via ArangoDB Collections" ] }, { "cell_type": "markdown", "metadata": { - "id": "KaExiE2x0-M6" + "id": "bRcCmqWGy1Kf" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_collections_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph." + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance.\n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." ] }, { @@ -842,82 +991,64 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 253, + "referenced_widgets": [ + "f01997b9b43d43368d632e26ba9732ad", + "14b29dc1f2b8454fa9acc1d79dcd4870", + "5f5c119141a24cab907ceb2da27e0244", + "46b88027e41a43578ebcc47513dd6911", + "7a43c4b816da4a40b0eed167a85eef22", + "eb376d5cf782424aaccbce31f0d3ede5", + "7a4db2b18c634bef932fb9b1157d4af1", + "b5be8c1e4ab3415c9fffbb61aeb0fff3", + "4e085418ce1b41e1bc24ad6acea92fc4", + "7b5dba3f4d50466eb2071cb13548ef1b" + ] }, - "id": "dADiexlAioGH", - "outputId": "9921ec34-b860-49e8-f8cb-0b403029ead4" + "id": "i4XOpdRLUNlJ", + "outputId": "c0fa5973-3e46-4227-8b0c-48b4f14736e5" }, "outputs": [], "source": [ - "# Load the dgl graphs & draw:\n", - "## 1) Lollipop Graph\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "plt.figure(1)\n", - "nx.draw(dgl_lollipop_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 2) Hypercube Graph\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "plt.figure(2)\n", - "nx.draw(dgl_hypercube_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 3) Clique Graph\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "plt.figure(3)\n", - "nx.draw(dgl_clique_graph.to_networkx(), with_labels=True)\n", - "\n", - "lollipop = \"Lollipop\"\n", - "hypercube = \"Hypercube\"\n", - "clique = \"Clique\"\n", - "\n", - "# Delete the graphs from ArangoDB if they already exist\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "name = \"FakeHetero\"\n", "\n", - "# Create the ArangoDB graphs\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = adbdgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph)\n", + "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\n", + " name, \n", + " v_cols={\"user\", \"game\"},\n", + " e_cols={\"plays\", \"follows\"}\n", + ")\n", "\n", + "# Show graph data (notice that the \"topic\" data is skipped)\n", "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")\n", - "print(f\"View the original graphs below:\\n\")" + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" ] }, { "cell_type": "markdown", "metadata": { - "id": "CNj1xKhwoJoL" + "id": "qEH6OdSB23Ya" }, "source": [ - "\n", - "#### MiniGCDataset Graphs with attributes" + "#### Via ArangoDB-DGL metagraph 1" ] }, { "cell_type": "markdown", "metadata": { - "id": "CZ1UX9YX1Zzo" + "id": "PipFzJ0HzTMA" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", - "* [`adbdgl_adapter.controller._dgl_feature_to_adb_attribute()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L49-L70)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our DGL node/edge features into ArangoDB vertex/edge attributes. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. It should contain collections & associated document attributes names that exist within your ArangoDB instance." ] }, { @@ -925,118 +1056,238 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 409, + "referenced_widgets": [ + "77b31c42e914410aaea93044f1390121", + "8349f1e6b1f34680bacd7de1a1937122", + "38aaa492d75c48f38de60ea0cc5fa93f", + "63845b04ecbc40de8bcc017d754ac907", + "4b7f5f21b98b4c5d8475929bf1f01a65", + "404a19cadaca4b85a957cad231b73cbb", + "bd8b6caa7d2d4df1a99b1870ecc0ae46", + "13d0f7da120b40b993ce3c0b257d5788", + "ea88ab86e9774ed78ea62daa6e338637", + "712770e675424d7eb0c8efd6c34f2012" + ] }, - "id": "jbJsvMMaoJoT", - "outputId": "6dba7563-84b8-4934-a07f-1525ef67bd5e" + "id": "7Kz8lXXq23Yk", + "outputId": "b17433d7-d344-4748-ffe3-f0abca6fb112" }, "outputs": [], "source": [ - "# Load the dgl graphs\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "\n", - " # Add DGL Node & Edge Features to each graph\n", - "dgl_lollipop_graph.ndata[\"random_ndata\"] = torch.tensor(\n", - " [[i, i, i] for i in range(0, dgl_lollipop_graph.num_nodes())]\n", - ")\n", - "dgl_lollipop_graph.edata[\"random_edata\"] = torch.rand(dgl_lollipop_graph.num_edges())\n", + "# Define the Metagraph that transfers ArangoDB attributes \"as is\",\n", + "# meaning the data is already formatted to DGL data standards\n", + "metagraph_v1 = {\n", + " \"vertexCollections\": {\n", + " # Move the \"features\" & \"label\" ArangoDB attributes to DGL as \"features\" & \"label\" Tensors\n", + " \"user\": {\"features\", \"label\"}, # equivalent to {\"features\": \"features\", \"label\": \"label\"}\n", + " \"game\": {\"dgl_game_features\": \"features\"},\n", + " \"topic\": {},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"dgl_plays_features\": \"features\"}, \n", + " \"follows\": {}\n", + " },\n", + "}\n", "\n", - "dgl_hypercube_graph.ndata[\"random_ndata\"] = torch.rand(dgl_hypercube_graph.num_nodes())\n", - "dgl_hypercube_graph.edata[\"random_edata\"] = torch.tensor(\n", - " [[[i], [i], [i]] for i in range(0, dgl_hypercube_graph.num_edges())]\n", - ")\n", + "# Create the DGL graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v1)\n", "\n", - "dgl_clique_graph.ndata['clique_ndata'] = torch.tensor([1,2,3,4,5,6])\n", - "dgl_clique_graph.edata['clique_edata'] = torch.tensor(\n", - " [1 if i % 2 == 0 else 0 for i in range(0, dgl_clique_graph.num_edges())]\n", - ")\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0806IB4o3WRz" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cnByWtpa3WR7" + }, + "source": [ + "Data\n", + "* [ArangoDB IMDB Movie Dataset](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)\n", "\n", - "# A user-defined Controller class is OPTIONAL when converting DGL features\n", - "# to ArangoDB attributes. NOTE: A custom Controller is NOT needed if you want to\n", - "# keep the numerical-based values of your DGL features.\n", - "class Clique_ADBDGL_Controller(ADBDGL_Controller):\n", - " \"\"\"ArangoDB-DGL controller.\n", + "API\n", + "* `adbddgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - " Responsible for controlling how ArangoDB attributes\n", - " are converted into DGL features, and vice-versa.\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. In this example, we rely on user-defined encoders to build DGL-ready tensors (i.e feature matrices) from ArangoDB attributes. See https://pytorch-geometric.readthedocs.io/en/latest/notes/load_csv.html for an example on using encoders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 499, + "referenced_widgets": [ + "2b13e46a722e4be384fad74e1b3e6461", + "848230df62434c77b5b18f9a43e2d14f", + "59405e2d0c164d5b965680cc9d9cd8f3", + "2a380fe111794c3a951cdafa4a2bf0b3", + "3d081c88cd2945fa9534de722669ada9", + "82f996185e8444ada5e18602e2f8e105" + ] + }, + "id": "cKqLoawE3WR7", + "outputId": "02a8bfed-44ae-4c76-9eea-ba7348738707" + }, + "outputs": [], + "source": [ + "# Define the Metagraph that transfers attributes via user-defined encoders\n", + "metagraph_v2 = {\n", + " \"vertexCollections\": {\n", + " \"Movies\": {\n", + " \"features\": { # Build a feature matrix from the \"Action\" & \"Drama\" document attributes\n", + " \"Action\": IdentityEncoder(dtype=torch.long),\n", + " \"Drama\": IdentityEncoder(dtype=torch.long),\n", + " },\n", + " \"label\": \"Comedy\",\n", + " },\n", + " \"Users\": {\n", + " \"features\": {\n", + " \"Gender\": CategoricalEncoder(), # CategoricalEncoder(mapping={\"M\": 0, \"F\": 1}),\n", + " \"Age\": IdentityEncoder(dtype=torch.long),\n", + " }\n", + " },\n", + " },\n", + " \"edgeCollections\": {\"Ratings\": {\"weight\": \"Rating\"}},\n", + "}\n", "\n", - " You can derive your own custom ADBDGL_Controller if you want to maintain\n", - " consistency between your ArangoDB attributes & your DGL features.\n", - " \"\"\"\n", + "# Create the DGL Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"IMDB\", metagraph_v2)\n", "\n", - " def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor):\n", - " \"\"\"\n", - " Given a DGL feature key, its assigned value (for an arbitrary node or edge),\n", - " and the collection it belongs to, convert it to a valid ArangoDB attribute\n", - " (e.g string, list, number, ...).\n", - "\n", - " NOTE: No action is needed here if you want to keep the numerical-based values\n", - " of your DGL features.\n", - "\n", - " :param key: The DGL attribute key name\n", - " :type key: str\n", - " :param col: The ArangoDB collection of the (soon-to-be) ArangoDB document.\n", - " :type col: str\n", - " :param val: The assigned attribute value of the DGL node.\n", - " :type val: Tensor\n", - " :return: The feature's representation as an ArangoDB Attribute\n", - " :rtype: Any\n", - " \"\"\"\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d5ijSCcY4bYs" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aKzxxZrUXJ" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - " if key == \"clique_ndata\":\n", - " try:\n", - " return [\"Eins\", \"Zwei\", \"Drei\", \"Vier\", \"Fünf\", \"Sechs\"][val-1]\n", - " except:\n", - " return -1\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - " if key == \"clique_edata\":\n", - " return bool(val)\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. In this example, we rely on user-defined functions to handle ArangoDB attribute to DGL feature conversion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 377, + "referenced_widgets": [ + "e4b7b35461e848f5819b9f38d67ee652", + "9968f928e28147f7a0956aff8412a608", + "54801c3c74494fe8bf9e2a7fb64bde48", + "903622e283524c7f89635599920c2b14", + "f0d4515c88a44775be59c4e1a0b3c60a", + "9e1eb071f0b24cb6a8d206477b10b831" + ] + }, + "id": "t-lNli3d4bY0", + "outputId": "7bc48392-81a7-4232-aad2-931ff3c8ca48" + }, + "outputs": [], + "source": [ + "# Define the metagraph that transfers attributes via user-defined functions\n", + "def udf_user_features(user_df):\n", + " # process the user_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # user_df[\"features\"] = ...\n", + " return torch.tensor(user_df[\"features\"].to_list())\n", "\n", - " return super()._dgl_feature_to_adb_attribute(key, col, val)\n", "\n", - "# Re-instantiate a new adapter specifically for the Clique Graph Conversion\n", - "clique_adbgl_adapter = ADBDGL_Adapter(db, Clique_ADBDGL_Controller())\n", + "def udf_game_features(game_df):\n", + " # process the game_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # game_df[\"features\"] = ...\n", + " return torch.tensor(game_df[\"features\"].to_list())\n", "\n", - "# Create the ArangoDB graphs\n", - "lollipop = \"Lollipop_With_Attributes\"\n", - "hypercube = \"Hypercube_With_Attributes\"\n", - "clique = \"Clique_With_Attributes\"\n", "\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "metagraph_v3 = {\n", + " \"vertexCollections\": {\n", + " \"user\": {\n", + " \"features\": udf_user_features, # supports named functions\n", + " \"label\": lambda df: torch.tensor(df[\"label\"].to_list()), # also supports lambda functions\n", + " },\n", + " \"game\": {\"features\": udf_game_features},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"features\": (lambda df: torch.tensor(df[\"features\"].to_list()))},\n", + " },\n", + "}\n", "\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = clique_adbgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph) # Notice the new adapter here!\n", + "# Create the DGL Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v3)\n", "\n", - "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")" + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" ] } ], "metadata": { "colab": { "collapsed_sections": [ - "KS9c-vE5eG89", "ot1oJqn7m78n", "Oc__NAd1eG8-", "7y81WHO8eG8_", "QfE_tKxneG9A", + "bvzJXSHHTi3v", + "UafSB_3JZNwK", + "CNj1xKhwoJoL", + "n08RC_GtkDrC", + "mk6m0hBRkkkT", "uByvwf9feG9A", - "bvzJXSHHTi3v" + "ZrEDmtqCVD0W", + "RQ4CknYfUEuz", + "qEH6OdSB23Ya", + "0806IB4o3WRz", + "d5ijSCcY4bYs" ], - "name": "ArangoDB_DGL_Adapter_v2.ipynb", + "name": "ArangoDB_DGL_Adapter_v3.ipynb", "provenance": [] }, "kernelspec": { diff --git a/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.data.json.gz b/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.data.json.gz deleted file mode 100644 index 3b56137..0000000 Binary files a/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.structure.json b/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.structure.json deleted file mode 100644 index dc132f3..0000000 --- a/examples/data/fraud_dump/Class_9bd81329febf6efe22788e03ddeaf0af.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63915","deleted":false,"globallyUniqueId":"c6251106/","id":"63915","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"Class","numberOfShards":1,"planId":"6251106","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251107":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.data.json.gz b/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.data.json.gz deleted file mode 100644 index 03244bc..0000000 Binary files a/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.structure.json b/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.structure.json deleted file mode 100644 index e4d2d9a..0000000 --- a/examples/data/fraud_dump/Relationship_fbc97786af4bf30dc5b07809a950792c.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63926","deleted":false,"globallyUniqueId":"c6251114/","id":"63926","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"Relationship","numberOfShards":1,"planId":"6251114","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251115":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":3,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/Text_Search.view.json b/examples/data/fraud_dump/Text_Search.view.json deleted file mode 100644 index 432adb3..0000000 --- a/examples/data/fraud_dump/Text_Search.view.json +++ /dev/null @@ -1 +0,0 @@ -{"globallyUniqueId":"h7CC8359662CF/1626628","id":"1626628","name":"Text_Search","type":"arangosearch","cleanupIntervalStep":10,"commitIntervalMsec":1000,"consolidationIntervalMsec":60000,"consolidationPolicy":{"type":"bytes_accum","threshold":0.10000000149011612},"primarySort":[],"writebufferActive":0,"writebufferIdle":64,"writebufferSizeMax":33554432,"links":{}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.data.json.gz b/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.data.json.gz deleted file mode 100644 index d210671..0000000 Binary files a/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.structure.json b/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.structure.json deleted file mode 100644 index 74877aa..0000000 --- a/examples/data/fraud_dump/_analyzers_839c888a45b895a4783b6dbd338f0155.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63866","deleted":false,"globallyUniqueId":"_analyzers","id":"63866","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_analyzers","numberOfShards":1,"planId":"63866","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.data.json.gz b/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.data.json.gz deleted file mode 100644 index d210671..0000000 Binary files a/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.structure.json b/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.structure.json deleted file mode 100644 index f3bdfc8..0000000 --- a/examples/data/fraud_dump/_appbundles_105ca6a6a72935fd370f79f3a3e62b0e.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63881","deleted":false,"globallyUniqueId":"_appbundles","id":"63881","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_appbundles","numberOfShards":1,"planId":"63881","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.data.json.gz b/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.data.json.gz deleted file mode 100644 index d210671..0000000 Binary files a/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.structure.json b/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.structure.json deleted file mode 100644 index fce1a9d..0000000 --- a/examples/data/fraud_dump/_apps_c3f2c8489196d21e33f194f4bafb3f05.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[{"id":"63893","type":"hash","name":"idx_1654880607689244672","fields":["mount"],"unique":true,"sparse":true,"deduplicate":true}],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63878","deleted":false,"globallyUniqueId":"_apps","id":"63878","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_apps","numberOfShards":1,"planId":"63878","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.data.json.gz b/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.data.json.gz deleted file mode 100644 index d210671..0000000 Binary files a/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.structure.json b/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.structure.json deleted file mode 100644 index 9b42e3d..0000000 --- a/examples/data/fraud_dump/_aqlfunctions_8293af7a2caabc3098bc21db7ce2759d.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63869","deleted":false,"globallyUniqueId":"_aqlfunctions","id":"63869","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_aqlfunctions","numberOfShards":1,"planId":"63869","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.data.json.gz b/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.data.json.gz deleted file mode 100644 index fe7a64b..0000000 Binary files a/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.structure.json b/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.structure.json deleted file mode 100644 index 3823e26..0000000 --- a/examples/data/fraud_dump/_graphs_c827636f2b54efb49f1f02feeeacfb01.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63863","deleted":false,"globallyUniqueId":"_graphs","id":"63863","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_graphs","numberOfShards":1,"planId":"63863","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.data.json.gz b/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.data.json.gz deleted file mode 100644 index d210671..0000000 Binary files a/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.structure.json b/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.structure.json deleted file mode 100644 index 2d03476..0000000 --- a/examples/data/fraud_dump/_modules_5a8c8ba0d331b61fccfd1e88cfedce00.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63887","deleted":false,"globallyUniqueId":"_modules","id":"63887","isSmart":false,"isSystem":true,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":0},"minReplicationFactor":1,"name":"_modules","numberOfShards":1,"planId":"63887","replicationFactor":1,"shardKeys":["_key"],"shards":{},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.data.json.gz b/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.data.json.gz deleted file mode 100644 index 2d431e1..0000000 Binary files a/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.structure.json b/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.structure.json deleted file mode 100644 index 48627c6..0000000 --- a/examples/data/fraud_dump/accountHolder_2e31953e2b3a86325411a027c406e65a.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63921","deleted":false,"globallyUniqueId":"c6251112/","id":"63921","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":1000001610000049},"minReplicationFactor":1,"name":"accountHolder","numberOfShards":1,"planId":"6251112","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251113":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":3,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.data.json.gz b/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.data.json.gz deleted file mode 100644 index 15cdb8c..0000000 Binary files a/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.structure.json b/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.structure.json deleted file mode 100644 index 845b982..0000000 --- a/examples/data/fraud_dump/account_e268443e43d93dab7ebef303bbe9642f.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[{"id":"62842606","type":"persistent","name":"idx_1661656393880436736","fields":["branch_id"],"unique":false,"sparse":false,"deduplicate":false}],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63906","deleted":false,"globallyUniqueId":"c6251100/","id":"63906","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":10000044},"minReplicationFactor":1,"name":"account","numberOfShards":1,"planId":"6251100","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251101":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.data.json.gz b/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.data.json.gz deleted file mode 100644 index ba4383e..0000000 Binary files a/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.structure.json b/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.structure.json deleted file mode 100644 index 7096615..0000000 --- a/examples/data/fraud_dump/bank_bd5af1f610a12434c9128e4a399cef8a.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63909","deleted":false,"globallyUniqueId":"c6251102/","id":"63909","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":1548226},"minReplicationFactor":1,"name":"bank","numberOfShards":1,"planId":"6251102","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251103":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.data.json.gz b/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.data.json.gz deleted file mode 100644 index 601e87b..0000000 Binary files a/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.structure.json b/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.structure.json deleted file mode 100644 index e843f8a..0000000 --- a/examples/data/fraud_dump/branch_9603a224b40d7b67210b78f2e390d00f.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63912","deleted":false,"globallyUniqueId":"c6251104/","id":"63912","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":1548212},"minReplicationFactor":1,"name":"branch","numberOfShards":1,"planId":"6251104","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251105":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json.gz b/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json.gz deleted file mode 100644 index 29c9e9c..0000000 Binary files a/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.structure.json b/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.structure.json deleted file mode 100644 index a9750ca..0000000 --- a/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[{"id":"48122621","type":"persistent","name":"idx_1653203216113860608","fields":["Ssn"],"unique":false,"sparse":false,"deduplicate":false}],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63918","deleted":false,"globallyUniqueId":"c6251108/","id":"63918","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":10000016},"minReplicationFactor":1,"name":"customer","numberOfShards":1,"planId":"6251108","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251109":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":2,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/data/fraud_dump/dump.json b/examples/data/fraud_dump/dump.json deleted file mode 100644 index 34d27a7..0000000 --- a/examples/data/fraud_dump/dump.json +++ /dev/null @@ -1 +0,0 @@ -{"database":"fraud-detection","lastTickAtDumpStart":"63082802","properties":{"id":"63861","name":"fraud-detection","isSystem":true}} \ No newline at end of file diff --git a/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.data.json.gz b/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.data.json.gz deleted file mode 100644 index e2c2d28..0000000 Binary files a/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.data.json.gz and /dev/null differ diff --git a/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.structure.json b/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.structure.json deleted file mode 100644 index 569ceb7..0000000 --- a/examples/data/fraud_dump/transaction_f4d5b76a2418eba4baeabc1ed9142b54.structure.json +++ /dev/null @@ -1 +0,0 @@ -{"indexes":[],"parameters":{"allowUserKeys":true,"cacheEnabled":false,"cid":"63931","deleted":false,"globallyUniqueId":"c6251116/","id":"63931","isSmart":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional","lastValue":3152813},"minReplicationFactor":1,"name":"transaction","numberOfShards":1,"planId":"6251116","replicationFactor":3,"shardKeys":["_key"],"shards":{"s6251117":["PRMR-drkxnewt","PRMR-mefeyznw","PRMR-9tthmtzr"]},"status":3,"type":3,"version":8,"waitForSync":false,"writeConcern":1}} \ No newline at end of file diff --git a/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb b/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb index fa9100a..0dc3cbd 100644 --- a/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb +++ b/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb @@ -15,7 +15,7 @@ "id": "U1d45V4OeG89" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -34,7 +34,7 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 2.0.0\n", + "Version: 3.0.0\n", "\n", "Objective: Export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database, to [Deep Graph Library](https://www.dgl.ai/) (DGL), a python package for graph neural networks, and vice-versa." ] @@ -57,39 +57,55 @@ "outputs": [], "source": [ "%%capture\n", - "!pip install adbdgl-adapter==2.0.0\n", + "!pip install adbdgl-adapter==3.0.0\n", "!pip install adb-cloud-connector\n", - "!git clone -b 2.0.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", + "!git clone -b 3.0.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", "\n", - "## For drawing purposes \n", + "## For drawing purposes\n", "!pip install matplotlib\n", - "!pip install networkx " + "!pip install networkx" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { - "id": "niijQHqBM6zp" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "niijQHqBM6zp", + "outputId": "77df8f72-4000-44e8-9dd6-c56bbf33c07d" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DGL backend not selected or invalid. Assuming PyTorch for now.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setting the default backend to \"pytorch\". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable. Valid options are: pytorch, mxnet, tensorflow (all lowercase)\n" + ] + } + ], "source": [ "# All imports\n", "\n", + "import pandas\n", + "import torch\n", "import dgl\n", - "from dgl import remove_self_loop\n", - "from dgl.data import MiniGCDataset\n", "from dgl.data import KarateClubDataset\n", "\n", - "import torch\n", - "from torch import Tensor\n", - "\n", - "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", - "from adbdgl_adapter.typings import Json, ArangoMetagraph, DGLCanonicalEType, DGLDataDict\n", - "\n", "from arango import ArangoClient\n", "from adb_cloud_connector import get_temp_credentials\n", "\n", + "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", + "from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder\n", + "\n", "import json\n", "import logging\n", "\n", @@ -119,18 +135,18 @@ "\n", "DGL represents a directed graph as a `DGLGraph` object. You can construct a graph by specifying the number of nodes in the graph as well as the list of source and destination nodes. **Nodes in the graph have consecutive IDs starting from 0.**\n", "\n", - "The following code constructs a directed \"star\" homogeneous graph with 6 nodes and 5 edges. \n" + "The following code constructs a directed \"star\" homogeneous graph with 6 nodes and 5 edges.\n" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vf0350qvj8up", - "outputId": "fbf300df-5dcd-44e8-a746-cb554eba1dd8" + "outputId": "bb473200-893d-4d4e-ed6d-239ec497d0e3" }, "outputs": [ { @@ -169,13 +185,13 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oOS3AVAnkQEV", - "outputId": "3a7403db-d11b-4f7a-a0b7-6e8220186273" + "outputId": "5b5feaaa-2a6f-4e0e-ef89-68b9e365a6db" }, "outputs": [ { @@ -221,13 +237,13 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "meLon-KgkU4h", - "outputId": "fa57e121-5294-45f9-b3d0-3a2cfa212da7" + "outputId": "7517b39b-adfa-426d-ccae-89254cf642b5" }, "outputs": [ { @@ -240,11 +256,11 @@ "\n", "Node Data X attribute: tensor([151, 124, 41, 89, 76, 55])\n", "\n", - "Edge Data A attribute: tensor([[-0.6538, 1.5450, -1.7828, 1.2241],\n", - " [ 1.3176, -0.0545, 0.8196, 0.0695],\n", - " [-0.8568, 1.3135, 0.4980, -0.4290],\n", - " [ 1.5448, 0.2502, 2.3616, 1.2318],\n", - " [-0.9194, 0.2285, 0.0267, -0.0482]])\n" + "Edge Data A attribute: tensor([[ 0.6125, 0.4397, -0.4108, -0.6406],\n", + " [-0.4089, -0.3135, -0.8268, 0.2150],\n", + " [-0.5285, -1.7320, 0.5904, -0.2922],\n", + " [ 0.3878, 0.1858, 0.9546, -0.4877],\n", + " [ 1.4629, -1.9385, -2.1406, -0.1621]])\n" ] } ], @@ -277,13 +293,13 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zTebQ0LOlsGA", - "outputId": "f5c06fec-a3e3-41fb-b478-42e492af07de" + "outputId": "c871096b-b06e-4cd8-ad56-06758090600d" }, "outputs": [ { @@ -330,13 +346,13 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KsxNujb0mSqZ", - "outputId": "0cf12da9-c754-41a3-9496-5aea0a0faac9" + "outputId": "0b7b4106-7385-4489-e49a-399efbef0cb8" }, "outputs": [ { @@ -367,7 +383,7 @@ "id": "1M_isKWLnCfr" }, "source": [ - "For more info, visit https://docs.dgl.ai/en/0.6.x/. " + "For more info, visit https://docs.dgl.ai/en/0.6.x/." ] }, { @@ -381,13 +397,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2ekGwnJDeG8-", - "outputId": "02cf35c6-9416-44fb-be44-5c0f517e0f78" + "outputId": "84a1c36b-3dc1-47e2-dadf-8a4ebefd98c0" }, "outputs": [ { @@ -397,9 +413,9 @@ "Log: requesting new credentials...\n", "Succcess: new credentials acquired\n", "{\n", - " \"dbName\": \"TUT56z6dbtgsoeu5cc6aixs7d\",\n", - " \"username\": \"TUTtj3263blez70kmqdi3ts\",\n", - " \"password\": \"TUTf6tursgxqogdo3ww3nplb\",\n", + " \"dbName\": \"TUTk9nlikuz4zowwxfkusway\",\n", + " \"username\": \"TUT6h05us6483maimfr7o28jq\",\n", + " \"password\": \"TUTis4noysrzjeig2bqpdccaa\",\n", " \"hostname\": \"tutorials.arangodb.cloud\",\n", " \"port\": 8529,\n", " \"url\": \"https://tutorials.arangodb.cloud:8529\"\n", @@ -440,81 +456,83 @@ "id": "BM0iRYPDeG8_" }, "source": [ - "For demo purposes, we will be using the [ArangoDB Fraud Detection example graph](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." + "For demo purposes, we will be using the [ArangoDB IMDB example graph](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7bgGJ3QkeG8_", - "outputId": "15b25959-5a2f-4d1c-852e-5019845716a4" + "outputId": "1f490370-72f3-4d1b-8950-ef1d0f690218" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[0m2022-05-25T17:23:07Z [272] INFO [05c30] {restore} Connected to ArangoDB 'http+ssl://tutorials.arangodb.cloud:8529'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:08Z [272] INFO [abeb4] {restore} Database name in source dump is 'fraud-detection'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:08Z [272] INFO [9b414] {restore} # Re-creating document collection '_analyzers'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:08Z [272] INFO [9b414] {restore} # Re-creating document collection '_appbundles'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:12Z [272] INFO [9b414] {restore} # Re-creating document collection '_apps'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:13Z [272] INFO [9b414] {restore} # Re-creating document collection '_aqlfunctions'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:17Z [272] INFO [9b414] {restore} # Re-creating document collection '_graphs'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:17Z [272] INFO [9b414] {restore} # Re-creating document collection '_modules'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:17Z [272] INFO [9b414] {restore} # Re-creating document collection 'account'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:18Z [272] INFO [9b414] {restore} # Re-creating document collection 'bank'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:18Z [272] INFO [9b414] {restore} # Re-creating document collection 'branch'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:18Z [272] INFO [9b414] {restore} # Re-creating document collection 'Class'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:18Z [272] INFO [9b414] {restore} # Re-creating document collection 'customer'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:19Z [272] INFO [9b414] {restore} # Re-creating edge collection 'accountHolder'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:19Z [272] INFO [9b414] {restore} # Re-creating edge collection 'Relationship'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:19Z [272] INFO [9b414] {restore} # Re-creating edge collection 'transaction'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_analyzers', data size: 20 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_analyzers'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [f723c] {restore} # Creating views...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6d69f] {restore} # Dispatched 14 job(s), using 2 worker(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [d88c6] {restore} # Creating indexes for collection '_apps'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_appbundles', data size: 20 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_appbundles'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_aqlfunctions', data size: 20 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_aqlfunctions'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_graphs', data size: 292 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_graphs'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_modules', data size: 20 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_modules'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [d88c6] {restore} # Creating indexes for collection 'account'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection '_apps', data size: 20 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection '_apps'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection 'bank', data size: 183 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection 'account', data size: 1696 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection 'bank'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection 'branch', data size: 465 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection 'account'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection 'Class', data size: 196 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection 'branch'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [d88c6] {restore} # Creating indexes for collection 'customer'...\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection 'Class'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into edge collection 'accountHolder', data size: 1076 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into document collection 'customer', data size: 794 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored edge collection 'accountHolder'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into edge collection 'Relationship', data size: 275 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored document collection 'customer'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [94913] {restore} # Loading data into edge collection 'transaction', data size: 2292 byte(s)\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored edge collection 'Relationship'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [6ae09] {restore} # Successfully restored edge collection 'transaction'\n", - "\u001b[0m\u001b[0m2022-05-25T17:23:20Z [272] INFO [a66e1] {restore} Processed 14 collection(s) in 13.360950 s, read 50480 byte(s) from datafiles, sent 9 data batch(es) of 50471 byte(s) total size\n", + "\u001b[0m2022-08-05T20:32:43Z [308] INFO [05c30] {restore} Connected to ArangoDB 'http+ssl://tutorials.arangodb.cloud:8529'\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:43Z [308] INFO [abeb4] {restore} Database name in source dump is 'TUTdit9ohpgz1ntnbetsjstwi'\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:43Z [308] INFO [9b414] {restore} # Re-creating document collection 'Movies'...\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:43Z [308] INFO [9b414] {restore} # Re-creating document collection 'Users'...\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [9b414] {restore} # Re-creating edge collection 'Ratings'...\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [6d69f] {restore} # Dispatched 3 job(s), using 2 worker(s)\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [94913] {restore} # Loading data into document collection 'Movies', data size: 68107 byte(s)\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [94913] {restore} # Loading data into document collection 'Users', data size: 16717 byte(s)\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [6ae09] {restore} # Successfully restored document collection 'Users'\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [94913] {restore} # Loading data into edge collection 'Ratings', data size: 1407601 byte(s)\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:44Z [308] INFO [6ae09] {restore} # Successfully restored document collection 'Movies'\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:49Z [308] INFO [75e65] {restore} # Current restore progress: restored 2 of 3 collection(s), read 9270558 byte(s) from datafiles, sent 3 data batch(es) of 881948 byte(s) total size, queued jobs: 0, workers: 2\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:52Z [308] INFO [69a73] {restore} # Still loading data into edge collection 'Ratings', 10660073 byte(s) restored\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:52Z [308] INFO [6ae09] {restore} # Successfully restored edge collection 'Ratings'\n", + "\u001b[0m\u001b[0m2022-08-05T20:32:52Z [308] INFO [a66e1] {restore} Processed 3 collection(s) in 9.925065 s, read 11542023 byte(s) from datafiles, sent 4 data batch(es) of 11542020 byte(s) total size\n", "\u001b[0m" ] } ], "source": [ "!chmod -R 755 dgl-adapter/\n", - "!./dgl-adapter/tests/assets/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/examples/data/fraud_dump\" --include-system-collections true" + "!./dgl-adapter/tests/tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/tests/data/adb/imdb_dump\" --include-system-collections true" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XLiXYJPRlVYZ", + "outputId": "2666c5b3-1f62-4bfc-c9af-53bc53f0ffd8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create the IMDB graph\n", + "db.delete_graph(\"imdb\", ignore_missing=True)\n", + "db.create_graph(\n", + " \"imdb\",\n", + " edge_definitions=[\n", + " {\n", + " \"edge_collection\": \"Ratings\",\n", + " \"from_vertex_collections\": [\"Users\"],\n", + " \"to_vertex_collections\": [\"Movies\"],\n", + " },\n", + " ],\n", + ")" ] }, { @@ -537,20 +555,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oG496kBeeG9A", - "outputId": "792a3ad2-3d04-4132-d878-a5e52c58dc17" + "outputId": "e5d8657f-a644-4493-ca16-16a300ac4a87" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[2022/05/25 17:23:34 +0000] [60] [INFO] - adbdgl_adapter: Instantiated ADBDGL_Adapter with database 'TUT56z6dbtgsoeu5cc6aixs7d'\n" + "[2022/08/05 20:33:59 +0000] [61] [INFO] - adbdgl_adapter: Instantiated ADBDGL_Adapter with database 'TUTk9nlikuz4zowwxfkusway'\n" ] } ], @@ -561,130 +579,134 @@ { "cell_type": "markdown", "metadata": { - "id": "uByvwf9feG9A" + "id": "bvzJXSHHTi3v" }, "source": [ - "# ArangoDB to DGL\n", - "\n" + "# DGL to ArangoDB" ] }, { "cell_type": "markdown", "metadata": { - "id": "ZrEDmtqCVD0W" + "id": "UafSB_3JZNwK" }, "source": [ - "#### Via ArangoDB Graph" + "#### Karate Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "H8nlvWCryPW0" + "id": "tx-tjPfx0U_h" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Graph\n", + "Data\n", + "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_graph_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L198-L213)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. " + "Notes\n", + "* The `name` parameter in this case is simply for naming your ArangoDB graph." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 577, + "referenced_widgets": [ + "61d2a0426c324309ab51111933276e3d", + "77c208846c1e4503bc22a5b5504f89ee", + "2d1fc41d509e481cb779603827359184", + "87d9c9de620847f48b4088e8577cd653" + ] }, - "id": "zZ-Hu3lLVHgd", - "outputId": "d1c38c22-eebb-456d-8e4c-140ddd9baed8" + "id": "eRVbiBy4ZdE4", + "outputId": "74ac6cb8-824b-443a-ad6e-9f36b23060a1" }, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2022/05/25 17:23:40 +0000] [60] [INFO] - adbdgl_adapter: Created DGL 'fraud-detection' Graph\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "61d2a0426c324309ab51111933276e3d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "--------------------\n", - "Graph(num_nodes={'account': 54, 'customer': 17},\n", - " num_edges={('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", - " metagraph=[('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", - "['account', 'customer']\n", - "['accountHolder', 'transaction']\n" - ] - } - ], - "source": [ - "# Define graph name\n", - "graph_name = \"fraud-detection\"\n", - "\n", - "# Create DGL graph from ArangoDB graph\n", - "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name)\n", - "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = aadbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", - "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", - "\n", - "# Show graph data\n", - "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RQ4CknYfUEuz" - }, - "source": [ - "#### Via ArangoDB Collections" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bRcCmqWGy1Kf" - }, - "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", - "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_collections_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L169-L196)\n", - "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2d1fc41d509e481cb779603827359184", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "id": "i4XOpdRLUNlJ", - "outputId": "4d53a3d0-316b-40c2-d841-5fb29fa1358b" - }, - "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[2022/05/25 17:23:46 +0000] [60] [INFO] - adbdgl_adapter: Created DGL 'fraud-detection' Graph\n" + "[2022/08/05 20:34:04 +0000] [61] [INFO] - adbdgl_adapter: Created ArangoDB 'Karate' Graph\n" ] }, { @@ -693,467 +715,345 @@ "text": [ "\n", "--------------------\n", - "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n", - " num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", - " metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", - "['Class', 'account', 'customer']\n", - "['Relationship', 'accountHolder', 'transaction']\n" + "URL: https://tutorials.arangodb.cloud:8529\n", + "Username: TUT6h05us6483maimfr7o28jq\n", + "Password: TUTis4noysrzjeig2bqpdccaa\n", + "Database: TUTk9nlikuz4zowwxfkusway\n", + "--------------------\n", + "\n", + "View the created graph here: https://tutorials.arangodb.cloud:8529/_db/TUTk9nlikuz4zowwxfkusway/_admin/aardvark/index.html#graph/Karate\n", + "\n", + "View the original graph below:\n", + "\n" ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "# Define collection names\n", - "vertex_collections = {\"account\", \"Class\", \"customer\"}\n", - "edge_collections = {\"accountHolder\", \"Relationship\", \"transaction\"}\n", + "# Create the DGL graph & draw it\n", + "dgl_karate_graph = KarateClubDataset()[0]\n", + "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", "\n", - "# Create DGL from ArangoDB collections\n", - "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections)\n", + "name = \"Karate\"\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections, ttl=1000, stream=True)\n", - "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graph\n", + "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", + "\n", + "# You can also provide valid Python-Arango Import Bulk options to the command above, like such:\n", + "# adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph, batch_size=5, on_duplicate=\"replace\")\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk\n", "\n", - "# Show graph data\n", "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "qEH6OdSB23Ya" + "id": "CNj1xKhwoJoL" }, "source": [ - "#### Via ArangoDB Metagraph" + "\n", + "#### FakeHeterogeneous Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "PipFzJ0HzTMA" + "id": "CZ1UX9YX1Zzo" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 408, + "referenced_widgets": [ + "3fc8b14d794a46118b328893bd216405", + "c7e222474ff445fe86e4e599848b2ae2", + "289a6e16c3d640c29d96edf09908bd0f", + "61f3832c906445a3ab7e7ba9b41c0127", + "99bbe81a24db49ff9352987fd97649cd", + "21e50aa61c3d4de19b5cc0bbe27d53c9", + "f9fdfe6ce44e4e1c8f513f82efca3e0d", + "9b2b3abbe2c04af0bc232c9b16bfd90d", + "8444e147be8f44aba06ec1f8a880104e", + "80e69b3aa98b44e295efe3940c1146c2", + "ec7b8b0b853f463fa079dda845891391", + "dd2376f84c794b4989f385a5bb147bd8" + ] }, - "id": "7Kz8lXXq23Yk", - "outputId": "7804e7ba-3760-4eb5-8669-f6fa20948262" + "id": "jbJsvMMaoJoT", + "outputId": "c1606984-c2ef-41c1-e8b1-78a4ae40d93c" }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "[2022/05/25 17:23:50 +0000] [60] [INFO] - adbdgl_adapter: Created DGL 'FraudDetection' Graph\n" + "Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},\n", + " num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "--------------\n", - "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n", - " num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", - " metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", - "\n", - "--------------\n", - "defaultdict(, {'concrete': {'Class': tensor([True, True, True, True])}, 'customer_id': {'account': tensor([10000009, 10000004, 10000004, 10000010, 10000002, 10000011, 10000015,\n", - " 10000006, 10000010, 10810, 10000002, 10000014, 10000008, 0,\n", - " 10000002, 0, 10000008, 10000006, 10000012, 10000015, 10000001,\n", - " 10000010, 10000015, 10000005, 10000009, 10000008, 10000011, 10000014,\n", - " 10000010, 10000006, 10000002, 10000007, 10000006, 10000005, 0,\n", - " 10000010, 10810, 0, 10000009, 10000006, 10000002, 10000005,\n", - " 10000009, 10000012, 10000007, 10000002, 10000014, 0, 10810,\n", - " 10000016, 10000006, 10000016, 10000013, 10810])}, 'Balance': {'account': tensor([5331, 7630, 1433, 2201, 4837, 5817, 1689, 1042, 4104, 10, 2338, 10,\n", - " 3779, 0, 529, 0, 1992, 2912, 6367, 1819, 0, 221, 5062, 2372,\n", - " 841, 5393, 1138, 8414, 4064, 5686, 6294, 6540, 7358, 3452, 0, 3993,\n", - " 10, 0, 471, 8148, 5832, 1758, 1747, 1679, 6789, 1599, 8320, 0,\n", - " 10, 8626, 7199, 8644, 3879, 10])}, 'rank': {'account': tensor([0.0021, 0.0031, 0.0052, 0.0021, 0.0046, 0.0037, 0.0032, 0.0042, 0.0021,\n", - " 0.0021, 0.0030, 0.0037, 0.0040, 0.0037, 0.0021, 0.0046, 0.0040, 0.0030,\n", - " 0.0026, 0.0032, 0.0021, 0.0034, 0.0032, 0.0021, 0.0021, 0.0035, 0.0026,\n", - " 0.0026, 0.0046, 0.0021, 0.0021, 0.0035, 0.0036, 0.0036, 0.0038, 0.0055,\n", - " 0.0021, 0.0041, 0.0044, 0.0021, 0.0030, 0.0035, 0.0033, 0.0026, 0.0071,\n", - " 0.0036, 0.0032, 0.0059, 0.0021, 0.0090, 0.0057, 0.0032, 0.0026, 0.0021]), 'customer': tensor([0.0135, 0.0050, 0.0062, 0.0066, 0.0096, 0.0088, 0.0089, 0.0047, 0.0066,\n", - " 0.0045, 0.0062, 0.0103, 0.0081, 0.0039, 0.0054, 0.0044, 0.0093])}})\n", - "--------------\n", - "\n", - "defaultdict(, {'receiver_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000003, 10000000001, 10000000002, 10000000002,\n", - " 10000000003, 10000000001, 10000000003, 10000000001, 10000000003,\n", - " 10000000002, 10000000003, 0, 10000000003, 10000000003,\n", - " 0, 10000000001, 0, 10000000002, 10000000003,\n", - " 10000000003, 10000000003, 10000000001, 0, 10000000003,\n", - " 10000000002, 10000000003, 10000000003, 10000000001, 10000000001,\n", - " 10000000003, 10000000003, 10000000003, 10000000003, 10000000001,\n", - " 10000000002, 0, 10000000001, 10000000001, 10000000002,\n", - " 10000000001, 10000000003, 10000000003, 10000000003, 10000000001,\n", - " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", - " 10000000003, 0, 10000000003, 10000000003, 0,\n", - " 10000000003, 10000000002, 10000000002, 10000000001, 10000000003,\n", - " 10000000003, 10000000003])}, 'sender_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000002, 10000000001, 10000000001, 10000000002,\n", - " 10000000003, 10000000003, 10000000002, 10000000002, 10000000003,\n", - " 10000000001, 10000000001, 0, 10000000003, 10000000003,\n", - " 0, 10000000002, 0, 10000000001, 10000000003,\n", - " 10000000001, 10000000003, 10000000002, 0, 10000000003,\n", - " 10000000003, 10000000003, 10000000003, 10000000001, 10000000001,\n", - " 10000000002, 10000000001, 10000000003, 10000000003, 10000000001,\n", - " 10000000001, 0, 10000000003, 10000000002, 10000000001,\n", - " 10000000002, 10000000003, 10000000003, 10000000003, 10000000002,\n", - " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", - " 10000000001, 0, 10000000003, 10000000003, 0,\n", - " 10000000003, 10000000003, 10000000001, 10000000001, 10000000003,\n", - " 10000000003, 10000000002])}, 'transaction_amt': {('account', 'transaction', 'account'): tensor([9000, 299, 498, 954, 756, 627, 142, 946, 920, 9000, 421, 343,\n", - " 9000, 457, 9000, 9000, 53, 9000, 284, 120, 441, 9000, 364, 901,\n", - " 9000, 279, 9000, 9000, 273, 127, 952, 354, 795, 9000, 835, 761,\n", - " 9000, 478, 172, 804, 665, 995, 9000, 9000, 670, 9000, 340, 9000,\n", - " 747, 347, 52, 911, 762, 9000, 0, 790, 619, 491, 954, 9000,\n", - " 9000, 843])}})\n" - ] - } - ], - "source": [ - "# Define Metagraph\n", - "fraud_detection_metagraph = {\n", - " \"vertexCollections\": {\n", - " \"account\": {\"rank\", \"Balance\", \"customer_id\"},\n", - " \"Class\": {\"concrete\"},\n", - " \"customer\": {\"rank\"},\n", - " },\n", - " \"edgeCollections\": {\n", - " \"accountHolder\": {},\n", - " \"Relationship\": {},\n", - " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\"},\n", - " },\n", - "}\n", - "\n", - "# Create DGL Graph from attributes\n", - "dgl_g = adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", - "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection', fraud_detection_metagraph, ttl=1000, stream=True)\n", - "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", - "\n", - "# Show graph data\n", - "print('\\n--------------')\n", - "print(dgl_g)\n", - "print('\\n--------------')\n", - "print(dgl_g.ndata)\n", - "print('--------------\\n')\n", - "print(dgl_g.edata)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DqIKT1lO4ASw" - }, - "source": [ - "#### Via ArangoDB Metagraph with a custom controller and verbose logging" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PGkGh_KjzlYM" - }, - "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", - "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", - "* [`adbdgl_adapter.controller._adb_attribute_to_dgl_feature()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L21-L47)\n", - "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our ArangoDB vertex/edge attributes into DGL node/edge features. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3fc8b14d794a46118b328893bd216405", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "id": "U4_vSdU_4AS4", - "outputId": "8af82665-9ae6-40d4-ada2-248edd993291" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2022/05/25 17:23:56 +0000] [60] [INFO] - adbdgl_adapter: Instantiated ADBDGL_Adapter with database 'TUT56z6dbtgsoeu5cc6aixs7d'\n", - "[2022/05/25 17:23:56 +0000] [60] [DEBUG] - adbdgl_adapter: Starting arangodb_to_dgl(FraudDetection, ...):\n", - "[2022/05/25 17:23:56 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'account' vertices\n", - "[2022/05/25 17:23:56 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'Class' vertices\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'customer' vertices\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'accountHolder' edges\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'Relationship' edges\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 'transaction' edges\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'FraudDetection' homogenous? False\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 54 'rank' features into 'account'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 17 'rank' features into 'customer'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 4 'name' features into 'Class'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 4 'concrete' features into 'Class'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 17 'Ssn' features into 'customer'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 17 'Sex' features into 'customer'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 62 'trans_time' features into 'transaction'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 62 'transaction_amt' features into 'transaction'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 62 'receiver_bank_id' features into 'transaction'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 62 'transaction_date' features into 'transaction'\n", - "[2022/05/25 17:23:57 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting 62 'sender_bank_id' features into 'transaction'\n", - "[2022/05/25 17:23:57 +0000] [60] [INFO] - adbdgl_adapter: Created DGL 'FraudDetection' Graph\n" - ] + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "--------------\n",
-            "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n",
-            "      num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n",
-            "      metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n",
-            "\n",
-            "--------------\n",
-            "defaultdict(, {'name': {'Class': tensor([0, 1, 2, 3])}, 'concrete': {'Class': tensor([True, True, True, True])}, 'rank': {'account': tensor([0.0021, 0.0031, 0.0052, 0.0021, 0.0046, 0.0037, 0.0032, 0.0042, 0.0021,\n",
-            "        0.0021, 0.0030, 0.0037, 0.0040, 0.0037, 0.0021, 0.0046, 0.0040, 0.0030,\n",
-            "        0.0026, 0.0032, 0.0021, 0.0034, 0.0032, 0.0021, 0.0021, 0.0035, 0.0026,\n",
-            "        0.0026, 0.0046, 0.0021, 0.0021, 0.0035, 0.0036, 0.0036, 0.0038, 0.0055,\n",
-            "        0.0021, 0.0041, 0.0044, 0.0021, 0.0030, 0.0035, 0.0033, 0.0026, 0.0071,\n",
-            "        0.0036, 0.0032, 0.0059, 0.0021, 0.0090, 0.0057, 0.0032, 0.0026, 0.0021]), 'customer': tensor([0.0135, 0.0050, 0.0062, 0.0066, 0.0096, 0.0088, 0.0089, 0.0047, 0.0066,\n",
-            "        0.0045, 0.0062, 0.0103, 0.0081, 0.0039, 0.0054, 0.0044, 0.0093])}, 'Ssn': {'customer': tensor([123456786, 123456780, 123456780, 123456787, 123456780, 123456789,\n",
-            "        123456780, 123456785, 123456783, 123456784, 123456780, 123456788,\n",
-            "        123456782, 123456781, 123456780, 123456780, 111223333])}, 'Sex': {'customer': tensor([1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])}})\n",
-            "--------------\n",
-            "\n",
-            "defaultdict(, {'trans_time': {('account', 'transaction', 'account'): tensor([1136, 1516, 1340, 1030, 1552, 1116, 1450,  924, 1046, 1426, 1247, 1459,\n",
-            "           0, 1459, 1258,    0, 1758,    0, 1230, 1210, 1252, 1039, 1741,    0,\n",
-            "        1420, 1713, 1710, 1028, 1636, 1054, 1658, 1332, 1316,  955, 1629, 1642,\n",
-            "           0, 1710,  932, 1652, 1018, 1527, 1555, 1640, 1158, 1035, 1015, 1133,\n",
-            "        1320, 1514, 1213,    0, 1133, 1340,    0, 1026, 1312, 1027, 1745, 1342,\n",
-            "        1520, 1141])}, 'transaction_amt': {('account', 'transaction', 'account'): tensor([9000,  299,  498,  954,  756,  627,  142,  946,  920, 9000,  421,  343,\n",
-            "        9000,  457, 9000, 9000,   53, 9000,  284,  120,  441, 9000,  364,  901,\n",
-            "        9000,  279, 9000, 9000,  273,  127,  952,  354,  795, 9000,  835,  761,\n",
-            "        9000,  478,  172,  804,  665,  995, 9000, 9000,  670, 9000,  340, 9000,\n",
-            "         747,  347,   52,  911,  762, 9000,    0,  790,  619,  491,  954, 9000,\n",
-            "        9000,  843])}, 'receiver_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000003, 10000000001, 10000000002, 10000000002,\n",
-            "        10000000003, 10000000001, 10000000003, 10000000001, 10000000003,\n",
-            "        10000000002, 10000000003,           0, 10000000003, 10000000003,\n",
-            "                  0, 10000000001,           0, 10000000002, 10000000003,\n",
-            "        10000000003, 10000000003, 10000000001,           0, 10000000003,\n",
-            "        10000000002, 10000000003, 10000000003, 10000000001, 10000000001,\n",
-            "        10000000003, 10000000003, 10000000003, 10000000003, 10000000001,\n",
-            "        10000000002,           0, 10000000001, 10000000001, 10000000002,\n",
-            "        10000000001, 10000000003, 10000000003, 10000000003, 10000000001,\n",
-            "        10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n",
-            "        10000000003,           0, 10000000003, 10000000003,           0,\n",
-            "        10000000003, 10000000002, 10000000002, 10000000001, 10000000003,\n",
-            "        10000000003, 10000000003])}, 'transaction_date': {('account', 'transaction', 'account'): tensor([  201966,   201721,  2017528,  2018924,  2017516,  2018128,  2019213,\n",
-            "          201847,  2017914,   201966,  2017810, 20181020,        0,  2017724,\n",
-            "          201966,        0,  2019311,        0,  2018211,  2018125,   201932,\n",
-            "          201966,   201795,        0,   201966,  2017111,   201966,   201966,\n",
-            "         2019822,  2017317,  2019124,  2017121,  2017110,   201966,  2017717,\n",
-            "        20181012,        0, 20181023,  2019724,  2019611,  2019928,  2019117,\n",
-            "          201966,   201966,  2017328,   201966,  2019316,   201966,  2017914,\n",
-            "         2017521,   201713,        0,  2018124,   201966,        0,   201784,\n",
-            "          201713, 20171212,  2019413,   201966,   201966,   201887])}, 'sender_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000002, 10000000001, 10000000001, 10000000002,\n",
-            "        10000000003, 10000000003, 10000000002, 10000000002, 10000000003,\n",
-            "        10000000001, 10000000001,           0, 10000000003, 10000000003,\n",
-            "                  0, 10000000002,           0, 10000000001, 10000000003,\n",
-            "        10000000001, 10000000003, 10000000002,           0, 10000000003,\n",
-            "        10000000003, 10000000003, 10000000003, 10000000001, 10000000001,\n",
-            "        10000000002, 10000000001, 10000000003, 10000000003, 10000000001,\n",
-            "        10000000001,           0, 10000000003, 10000000002, 10000000001,\n",
-            "        10000000002, 10000000003, 10000000003, 10000000003, 10000000002,\n",
-            "        10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n",
-            "        10000000001,           0, 10000000003, 10000000003,           0,\n",
-            "        10000000003, 10000000003, 10000000001, 10000000001, 10000000003,\n",
-            "        10000000003, 10000000002])}})\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Define Metagraph\n",
-        "fraud_detection_metagraph = {\n",
-        "    \"vertexCollections\": {\n",
-        "        \"account\": {\"rank\"},\n",
-        "        \"Class\": {\"concrete\", \"name\"},\n",
-        "        \"customer\": {\"Sex\", \"Ssn\", \"rank\"},\n",
-        "    },\n",
-        "    \"edgeCollections\": {\n",
-        "        \"accountHolder\": {},\n",
-        "        \"Relationship\": {},\n",
-        "        \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\", \"transaction_date\", \"trans_time\"},\n",
-        "    },\n",
-        "}\n",
-        "\n",
-        "# A user-defined Controller class is REQUIRED when converting non-numerical\n",
-        "# ArangoDB attributes to DGL features.\n",
-        "class FraudDetection_ADBDGL_Controller(ADBDGL_Controller):\n",
-        "    \"\"\"ArangoDB-DGL controller.\n",
-        "\n",
-        "    Responsible for controlling how ArangoDB attributes\n",
-        "    are converted into DGL features, and vice-versa.\n",
-        "\n",
-        "    You can derive your own custom ADBDGL_Controller if you want to maintain\n",
-        "    consistency between your ArangoDB attributes & your DGL features.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    def _adb_attribute_to_dgl_feature(self, key: str, col: str, val):\n",
-        "        \"\"\"\n",
-        "        Given an ArangoDB attribute key, its assigned value (for an arbitrary document),\n",
-        "        and the collection it belongs to, convert it to a valid\n",
-        "        DGL feature: https://docs.dgl.ai/en/0.6.x/guide/graph-feature.html.\n",
-        "\n",
-        "        NOTE: You must override this function if you want to transfer non-numerical\n",
-        "        ArangoDB attributes to DGL (DGL only accepts 'attributes' (a.k.a features)\n",
-        "        of numerical types). Read more about DGL features here:\n",
-        "        https://docs.dgl.ai/en/0.6.x/new-tutorial/2_dglgraph.html#assigning-node-and-edge-features-to-graph.\n",
-        "\n",
-        "        :param key: The ArangoDB attribute key name\n",
-        "        :type key: str\n",
-        "        :param col: The ArangoDB collection of the ArangoDB document.\n",
-        "        :type col: str\n",
-        "        :param val: The assigned attribute value of the ArangoDB document.\n",
-        "        :type val: Any\n",
-        "        :return: The attribute's representation as a DGL Feature\n",
-        "        :rtype: Any\n",
-        "        \"\"\"\n",
-        "        try:\n",
-        "          if col == \"transaction\":\n",
-        "            if key == \"transaction_date\":\n",
-        "              return int(str(val).replace(\"-\", \"\"))\n",
-        "    \n",
-        "            if key == \"trans_time\":\n",
-        "              return int(str(val).replace(\":\", \"\"))\n",
-        "    \n",
-        "          if col == \"customer\":\n",
-        "            if key == \"Sex\":\n",
-        "              return {\n",
-        "                  \"M\": 0,\n",
-        "                  \"F\": 1\n",
-        "              }.get(val, -1)\n",
-        "\n",
-        "            if key == \"Ssn\":\n",
-        "              return int(str(val).replace(\"-\", \"\"))\n",
-        "\n",
-        "          if col == \"Class\":\n",
-        "            if key == \"name\":\n",
-        "              return {\n",
-        "                  \"Bank\": 0,\n",
-        "                  \"Branch\": 1,\n",
-        "                  \"Account\": 2,\n",
-        "                  \"Customer\": 3\n",
-        "              }.get(val, -1)\n",
-        "\n",
-        "        except (ValueError, TypeError, SyntaxError):\n",
-        "          return 0\n",
-        "\n",
-        "        # Rely on the parent Controller as a final measure\n",
-        "        return super()._adb_attribute_to_dgl_feature(key, col, val)\n",
-        "\n",
-        "# Instantiate the new adapter\n",
-        "fraud_adbdgl_adapter = ADBDGL_Adapter(db, FraudDetection_ADBDGL_Controller())\n",
-        "\n",
-        "# You can also change the adapter's logging level for access to \n",
-        "# silent, regular, or verbose logging (logging.WARNING, logging.INFO, logging.DEBUG)\n",
-        "fraud_adbdgl_adapter.set_logging(logging.DEBUG) # verbose logging\n",
-        "\n",
-        "# Create DGL Graph from attributes\n",
-        "dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl('FraudDetection',  fraud_detection_metagraph)\n",
-        "\n",
-        "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n",
-        "# dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection',  fraud_detection_metagraph, ttl=1000, stream=True)\n",
-        "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n",
-        "\n",
-        "# Show graph data\n",
-        "print('\\n--------------')\n",
-        "print(dgl_g)\n",
-        "print('\\n--------------')\n",
-        "print(dgl_g.ndata)\n",
-        "print('--------------\\n')\n",
-        "print(dgl_g.edata)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bvzJXSHHTi3v"
-      },
-      "source": [
-        "# DGL to ArangoDB"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UafSB_3JZNwK"
-      },
-      "source": [
-        "#### Karate Graph"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tx-tjPfx0U_h"
-      },
-      "source": [
-        "Data source\n",
-        "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n",
-        "\n",
-        "Package methods used\n",
-        "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n",
-        "\n",
-        "Important notes\n",
-        "* The `name` parameter in this case is simply for naming your ArangoDB graph."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 683
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "289a6e16c3d640c29d96edf09908bd0f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "99bbe81a24db49ff9352987fd97649cd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f9fdfe6ce44e4e1c8f513f82efca3e0d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8444e147be8f44aba06ec1f8a880104e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ec7b8b0b853f463fa079dda845891391", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "id": "eRVbiBy4ZdE4", - "outputId": "c629be2d-1bc9-4539-c7f2-d3ae46676659" - }, - "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Starting dgl_to_arangodb(Karate, ...):\n", - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Karate' using default canonical_etypes? True\n", - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Karate' homogenous? True\n", - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 34 'Karate_N' DGL nodes\n", - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 156 'Karate_E' DGL edges\n", - "[2022/05/25 17:24:04 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 34 documents into 'Karate_N'\n", - "[2022/05/25 17:24:05 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 156 documents into 'Karate_E'\n", - "[2022/05/25 17:24:05 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Karate' Graph\n" + "[2022/08/05 20:35:24 +0000] [61] [INFO] - adbdgl_adapter: Created ArangoDB 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created ArangoDB 'FakeHetero' Graph\n" ] }, { @@ -1163,41 +1063,39 @@ "\n", "--------------------\n", "URL: https://tutorials.arangodb.cloud:8529\n", - "Username: TUTtj3263blez70kmqdi3ts\n", - "Password: TUTf6tursgxqogdo3ww3nplb\n", - "Database: TUT56z6dbtgsoeu5cc6aixs7d\n", + "Username: TUT6h05us6483maimfr7o28jq\n", + "Password: TUTis4noysrzjeig2bqpdccaa\n", + "Database: TUTk9nlikuz4zowwxfkusway\n", "--------------------\n", "\n", - "View the created graph here: https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Karate\n", - "\n", + "View the created graph here: https://tutorials.arangodb.cloud:8529/_db/TUTk9nlikuz4zowwxfkusway/_admin/aardvark/index.html#graph/FakeHetero\n", "\n", "View the original graph below:\n", "\n" ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ - "# Create the DGL graph & draw it\n", - "dgl_karate_graph = KarateClubDataset()[0]\n", - "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", "\n", - "name = \"Karate\"\n", + "print(hetero_graph)\n", + "\n", + "name = \"FakeHetero\"\n", "\n", "# Delete the graph if it already exists\n", "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", "\n", - "# Create the ArangoDB graph\n", - "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", + "# Create the ArangoDB graphs\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph)\n", "\n", "print('\\n--------------------')\n", "print(\"URL: \" + con[\"url\"])\n", @@ -1212,98 +1110,74 @@ { "cell_type": "markdown", "metadata": { - "id": "gshTlSX_ZZsS" + "id": "n08RC_GtkDrC" }, "source": [ "\n", - "#### MiniGCDataset Graphs" + "#### FakeHeterogeneous Graph with a DGL-ArangoDB metagraph" ] }, { "cell_type": "markdown", "metadata": { - "id": "KaExiE2x0-M6" + "id": "rUD_y0yxkDrK" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `metagraph` parameter is an optional object mapping the DGL keys of the node & edge data to strings, list of strings, or user-defined functions." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 408, + "referenced_widgets": [ + "345a5984959c4e57b7e2715fa8eeef8f", + "99e6613c4187459396eea503453934cb", + "968020b1388e4883843575d9198af1cd", + "f1a08470110e4099af2a3d4cf4d0f956", + "6744eb60dfa04a8598fca3b998ce3077", + "09d25097c75c4fa8a2c7376f1965afc5", + "cb8167f00277413eaaa2ad6e0e162fab", + "8128e6d80fcb4a8ca0a72097bb8b6521", + "575205f1a4e64c5d977e69d4939a5605", + "d20843bfa9064d56b37aaea011789a26", + "8bf075c6f7834d3fa905b7ddc37cf128", + "b080f26fe35241fb9cca48e97bc9ef0c" + ] }, - "id": "dADiexlAioGH", - "outputId": "9921ec34-b860-49e8-f8cb-0b403029ead4" + "id": "xAdjZiJ8kDrK", + "outputId": "2822ed4b-8199-48e2-a753-4b1f60d648a0" }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Starting dgl_to_arangodb(Lollipop, ...):\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Lollipop' using default canonical_etypes? True\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Lollipop' homogenous? True\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 7 'Lollipop_N' DGL nodes\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 24 'Lollipop_E' DGL edges\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 7 documents into 'Lollipop_N'\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 24 documents into 'Lollipop_E'\n", - "[2022/05/25 17:24:48 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Lollipop' Graph\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Starting dgl_to_arangodb(Hypercube, ...):\n", - "[2022/05/25 17:24:48 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Hypercube' using default canonical_etypes? True\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Hypercube' homogenous? True\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 8 'Hypercube_N' DGL nodes\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 24 'Hypercube_E' DGL edges\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 8 documents into 'Hypercube_N'\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 24 documents into 'Hypercube_E'\n", - "[2022/05/25 17:24:49 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Hypercube' Graph\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Starting dgl_to_arangodb(Clique, ...):\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Clique' using default canonical_etypes? True\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Is graph 'Clique' homogenous? True\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 6 'Clique_N' DGL nodes\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Preparing 30 'Clique_E' DGL edges\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 6 documents into 'Clique_N'\n", - "[2022/05/25 17:24:49 +0000] [60] [DEBUG] - adbdgl_adapter: Inserting last 30 documents into 'Clique_E'\n", - "[2022/05/25 17:24:49 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Clique' Graph\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "--------------------\n", - "URL: https://tutorials.arangodb.cloud:8529\n", - "Username: TUTtj3263blez70kmqdi3ts\n", - "Password: TUTf6tursgxqogdo3ww3nplb\n", - "Database: TUT56z6dbtgsoeu5cc6aixs7d\n", - "--------------------\n", - "\n", - "\\View the created graphs here:\n", - "\n", - "1) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Lollipop\n", - "2) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Hypercube\n", - "3) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Clique\n", - "\n", - "View the original graphs below:\n", - "\n" + "Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},\n", + " num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])\n" ] }, { "data": { - "image/png": "", + "application/vnd.jupyter.widget-view+json": { + "model_id": "345a5984959c4e57b7e2715fa8eeef8f", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "
" + "Output()" ] }, "metadata": {}, @@ -1311,55 +1185,299 @@ }, { "data": { - "image/png": "", - "text/plain": [ - "
" - ] + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
           },
           "metadata": {},
           "output_type": "display_data"
         },
         {
           "data": {
-            "image/png": "",
+            "text/html": [
+              "
\n",
+              "
\n" + ], "text/plain": [ - "
" + "\n" ] }, "metadata": {}, "output_type": "display_data" - } - ], + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "968020b1388e4883843575d9198af1cd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6744eb60dfa04a8598fca3b998ce3077", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cb8167f00277413eaaa2ad6e0e162fab", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "575205f1a4e64c5d977e69d4939a5605", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8bf075c6f7834d3fa905b7ddc37cf128", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:35:56 +0000] [61] [INFO] - adbdgl_adapter: Created ArangoDB 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created ArangoDB 'FakeHetero' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------------\n", + "URL: https://tutorials.arangodb.cloud:8529\n", + "Username: TUT6h05us6483maimfr7o28jq\n", + "Password: TUTis4noysrzjeig2bqpdccaa\n", + "Database: TUTk9nlikuz4zowwxfkusway\n", + "--------------------\n", + "\n", + "View the created graph here: https://tutorials.arangodb.cloud:8529/_db/TUTk9nlikuz4zowwxfkusway/_admin/aardvark/index.html#graph/FakeHetero\n", + "\n", + "View the original graph below:\n", + "\n" + ] + } + ], "source": [ - "# Load the dgl graphs & draw:\n", - "## 1) Lollipop Graph\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "plt.figure(1)\n", - "nx.draw(dgl_lollipop_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 2) Hypercube Graph\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "plt.figure(2)\n", - "nx.draw(dgl_hypercube_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 3) Clique Graph\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "plt.figure(3)\n", - "nx.draw(dgl_clique_graph.to_networkx(), with_labels=True)\n", - "\n", - "lollipop = \"Lollipop\"\n", - "hypercube = \"Hypercube\"\n", - "clique = \"Clique\"\n", - "\n", - "# Delete the graphs from ArangoDB if they already exist\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "print(hetero_graph)\n", + "\n", + "name = \"FakeHetero\"\n", + "\n", + "# Define the metagraph\n", + "def label_tensor_to_2_column_dataframe(dgl_tensor):\n", + " \"\"\"\n", + " A user-defined function to create two\n", + " ArangoDB attributes out of the 'user' label tensor\n", + "\n", + " NOTE: user-defined functions must return a Pandas Dataframe\n", + " \"\"\"\n", + " label_map = {0: \"Class A\", 1: \"Class B\", 2: \"Class C\"}\n", + "\n", + " df = pandas.DataFrame(columns=[\"label_num\", \"label_str\"])\n", + " df[\"label_num\"] = dgl_tensor.tolist()\n", + " df[\"label_str\"] = df[\"label_num\"].map(label_map)\n", + "\n", + " return df\n", + "\n", + "\n", + "metagraph = {\n", + " \"nodeTypes\": {\n", + " \"user\": {\n", + " \"features\": \"user_age\", # 1) you can specify a string value for attribute renaming\n", + " \"label\": label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame\n", + " },\n", + " # 3) You can specify set of strings if you want to preserve the same DGL attribute names for the node/edge type\n", + " \"game\": {\"features\"} # this is equivalent to {\"features\": \"features\"}\n", + " },\n", + " \"edgeTypes\": {\n", + " (\"user\", \"plays\", \"game\"): {\n", + " # 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance)\n", + " \"features\": [\"hours_played\", \"is_satisfied_with_game\"]\n", + " },\n", + " },\n", + "}\n", + "\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", "\n", "# Create the ArangoDB graphs\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = adbdgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph)\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=False)\n", + "\n", + "# Create the ArangoDB graph with `explicit_metagraph=True`\n", + "# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.\n", + "# Only 'user', 'game', and ('user', 'plays', 'game') will be brought over (i.e 'topic', ('user', 'follows', 'user'), ... are ignored)\n", + "## adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=True)\n", "\n", "print('\\n--------------------')\n", "print(\"URL: \" + con[\"url\"])\n", @@ -1367,60 +1485,1907 @@ "print(\"Password: \" + con[\"password\"])\n", "print(\"Database: \" + con[\"dbName\"])\n", "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")\n", - "print(f\"View the original graphs below:\\n\")" + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mk6m0hBRkkkT" + }, + "source": [ + "\n", + "#### FakeHeterogeneous Graph with a user-defined ADBDGL Controller" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KG7kFoOUkkkb" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", + "\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", + "\n", + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `ADBDGL_Controller` is an optional user-defined class for controlling how nodes & edges are handled when transitioning from DGL to ArangoDB. **It is interpreted as the alternative to the `metagraph` parameter.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 443, + "referenced_widgets": [ + "ea5e9803c5de4d2bbb48782069b9829b", + "3f633be94c7d466ea40571e805a76948", + "96e57d98afce44cd8269204dd19ff6e0", + "da43ef4a8c6a41f9bda153a0cd14c2d7", + "3bc228aa98454dc59a604c8f7ff6b2a0", + "65138d18c9c449d1aaaad387293c5ede", + "3ea99b2a6b4246d3abf628ca743f9f24", + "841ce4f5d391457e858c3c48185e259d", + "987bf80aee4b4b97bfad1699f8384af8", + "4ab3c113235746cab5fde158756ab420", + "09e8c93741bf45acb69ba9e757107564", + "d7d06973b2984eb19fa050409bf62222" + ] + }, + "id": "A-DtrD2Ykkkb", + "outputId": "f2672554-16e4-4b88-e24b-f567ff13bb3f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},\n", + " num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:36:18 +0000] [61] [INFO] - adbdgl_adapter: Instantiated ADBDGL_Adapter with database 'TUTk9nlikuz4zowwxfkusway'\n", + "INFO:adbdgl_adapter:Instantiated ADBDGL_Adapter with database 'TUTk9nlikuz4zowwxfkusway'\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea5e9803c5de4d2bbb48782069b9829b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "96e57d98afce44cd8269204dd19ff6e0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3bc228aa98454dc59a604c8f7ff6b2a0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3ea99b2a6b4246d3abf628ca743f9f24", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "987bf80aee4b4b97bfad1699f8384af8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "09e8c93741bf45acb69ba9e757107564", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:36:20 +0000] [61] [INFO] - adbdgl_adapter: Created ArangoDB 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created ArangoDB 'FakeHetero' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------------\n", + "URL: https://tutorials.arangodb.cloud:8529\n", + "Username: TUT6h05us6483maimfr7o28jq\n", + "Password: TUTis4noysrzjeig2bqpdccaa\n", + "Database: TUTk9nlikuz4zowwxfkusway\n", + "--------------------\n", + "\n", + "View the created graph here: https://tutorials.arangodb.cloud:8529/_db/TUTk9nlikuz4zowwxfkusway/_admin/aardvark/index.html#graph/FakeHetero\n", + "\n", + "View the original graph below:\n", + "\n" + ] + } + ], + "source": [ + "# Create the DGL graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "print(hetero_graph)\n", + "\n", + "name = \"FakeHetero\"\n", + "\n", + "# Create a custom ADBDGL_Controller\n", + "class Custom_ADBDGL_Controller(ADBDGL_Controller):\n", + " def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict:\n", + " \"\"\"Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.\n", + "\n", + " :param dgl_node: The DGL node object to (optionally) modify.\n", + " :param node_type: The DGL Node Type of the node.\n", + " :return: The DGL Node object\n", + " \"\"\"\n", + " dgl_node[\"foo\"] = \"bar\"\n", + " return dgl_node\n", + "\n", + " def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict:\n", + " \"\"\"Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.\n", + "\n", + " :param dgl_edge: The DGL edge object to (optionally) modify.\n", + " :param edge_type: The Edge Type of the DGL edge. Formatted\n", + " as (from_collection, edge_collection, to_collection)\n", + " :return: The DGL Edge object\n", + " \"\"\"\n", + " dgl_edge[\"bar\"] = \"foo\"\n", + " return dgl_edge\n", + "\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb(name, hetero_graph)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uByvwf9feG9A" + }, + "source": [ + "# ArangoDB to DGL\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165, + "referenced_widgets": [ + "c6cffa0a64434e56879ba2a8c9de018a", + "0083494093574c50952dd066502a708d", + "1dea128bde204a8fa53e094e014183fe", + "50f8ff3637ee4fc7af8c811cd5d177be", + "6582a9d3fe044d5380d8e918f3bc5a6d", + "40da9dd52dd6443684b990f74b6cb876", + "80d19dc0d20842c3b5c7313c0ad23d24", + "0478c90ef8234f3a8987dbe9cd3030b2", + "c61e3997250d4f93a8e0494db674892d", + "97e7543f202749c197515a9c5c79adbe", + "88e83ddc1ca1464291e1631b8fced847", + "a9c14a3f339445338119631c8e56ff68" + ] + }, + "id": "rnMe3iMz2K7j", + "outputId": "b1485ec1-64bf-43d5-a5fe-7d6bd5fc2da1" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c6cffa0a64434e56879ba2a8c9de018a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1dea128bde204a8fa53e094e014183fe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6582a9d3fe044d5380d8e918f3bc5a6d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "80d19dc0d20842c3b5c7313c0ad23d24", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c61e3997250d4f93a8e0494db674892d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "88e83ddc1ca1464291e1631b8fced847", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:36:46 +0000] [61] [INFO] - adbdgl_adapter: Created ArangoDB 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created ArangoDB 'FakeHetero' Graph\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Start from scratch! (with the same DGL graph)\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "db.delete_graph(\"FakeHetero\", drop_collections=True, ignore_missing=True)\n", + "adbdgl_adapter.dgl_to_arangodb(\"FakeHetero\", hetero_graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZrEDmtqCVD0W" + }, + "source": [ + "#### Via ArangoDB Graph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H8nlvWCryPW0" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", + "\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_graph_to_dgl()`\n", + "\n", + "Notes\n", + "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance.\n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 184, + "referenced_widgets": [ + "9403e71c2bbe46bd9e6d49d555264554", + "34c4ef0c4aa5454893c0f0fa35902fbd", + "1690574b32cc4b48a8b87520458d5066", + "a9edf4f85a4a4504b155608bb740178a", + "fd2db543279f4a13ab6376b9c23160e0", + "5c310145af4f4c90b659dee771185ab6", + "31a9f782f36d407f8cc42b19679c5c2c", + "9fd8d07a43cd4c06a2d448047ede846c", + "2c2900512b5244d3a0fcaf7409446d0e", + "c5d064af7f4a49dca6716f98d052e951" + ] + }, + "id": "zZ-Hu3lLVHgd", + "outputId": "85729665-feb3-4382-e84b-4286162581c3" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9403e71c2bbe46bd9e6d49d555264554", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1690574b32cc4b48a8b87520458d5066", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fd2db543279f4a13ab6376b9c23160e0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "31a9f782f36d407f8cc42b19679c5c2c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2c2900512b5244d3a0fcaf7409446d0e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:37:12 +0000] [61] [INFO] - adbdgl_adapter: Created DGL 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created DGL 'FakeHetero' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------------\n", + "defaultdict(, {})\n" + ] + } + ], + "source": [ + "# Define graph name\n", + "name = \"FakeHetero\"\n", + "\n", + "# Create the DGL Graph from the ArangoDB graph\n", + "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(name)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print('\\n--------------------')\n", + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RQ4CknYfUEuz" + }, + "source": [ + "#### Via ArangoDB Collections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRcCmqWGy1Kf" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", + "\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_collections_to_dgl()`\n", + "\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance.\n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 253, + "referenced_widgets": [ + "f01997b9b43d43368d632e26ba9732ad", + "14b29dc1f2b8454fa9acc1d79dcd4870", + "5f5c119141a24cab907ceb2da27e0244", + "46b88027e41a43578ebcc47513dd6911", + "7a43c4b816da4a40b0eed167a85eef22", + "eb376d5cf782424aaccbce31f0d3ede5", + "7a4db2b18c634bef932fb9b1157d4af1", + "b5be8c1e4ab3415c9fffbb61aeb0fff3", + "4e085418ce1b41e1bc24ad6acea92fc4", + "7b5dba3f4d50466eb2071cb13548ef1b" + ] + }, + "id": "i4XOpdRLUNlJ", + "outputId": "c0fa5973-3e46-4227-8b0c-48b4f14736e5" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f01997b9b43d43368d632e26ba9732ad", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f5c119141a24cab907ceb2da27e0244", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7a43c4b816da4a40b0eed167a85eef22", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7a4db2b18c634bef932fb9b1157d4af1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4e085418ce1b41e1bc24ad6acea92fc4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:37:50 +0000] [61] [INFO] - adbdgl_adapter: Created DGL 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created DGL 'FakeHetero' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------------\n", + "Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},\n", + " num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])\n", + "defaultdict(, {})\n", + "defaultdict(, {})\n" + ] + } + ], + "source": [ + "name = \"FakeHetero\"\n", + "\n", + "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\n", + " name,\n", + " v_cols={\"user\", \"game\"},\n", + " e_cols={\"plays\", \"follows\"}\n", + ")\n", + "\n", + "# Show graph data (notice that the \"topic\" data is skipped)\n", + "print('\\n--------------------')\n", + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qEH6OdSB23Ya" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PipFzJ0HzTMA" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", + "\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", + "\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. It should contain collections & associated document attributes names that exist within your ArangoDB instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 409, + "referenced_widgets": [ + "77b31c42e914410aaea93044f1390121", + "8349f1e6b1f34680bacd7de1a1937122", + "38aaa492d75c48f38de60ea0cc5fa93f", + "63845b04ecbc40de8bcc017d754ac907", + "4b7f5f21b98b4c5d8475929bf1f01a65", + "404a19cadaca4b85a957cad231b73cbb", + "bd8b6caa7d2d4df1a99b1870ecc0ae46", + "13d0f7da120b40b993ce3c0b257d5788", + "ea88ab86e9774ed78ea62daa6e338637", + "712770e675424d7eb0c8efd6c34f2012" + ] + }, + "id": "7Kz8lXXq23Yk", + "outputId": "b17433d7-d344-4748-ffe3-f0abca6fb112" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "77b31c42e914410aaea93044f1390121", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "38aaa492d75c48f38de60ea0cc5fa93f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4b7f5f21b98b4c5d8475929bf1f01a65", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bd8b6caa7d2d4df1a99b1870ecc0ae46", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea88ab86e9774ed78ea62daa6e338637", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:38:02 +0000] [61] [INFO] - adbdgl_adapter: Created DGL 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created DGL 'FakeHetero' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------\n", + "Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},\n", + " num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')])\n", + "\n", + "--------------\n", + "defaultdict(, {'dgl_game_features': {'game': tensor([[0, 0],\n", + " [0, 1],\n", + " [1, 0],\n", + " [1, 1],\n", + " [1, 1]])}, 'label': {'user': tensor([1, 2, 0, 1])}, 'features': {'user': tensor([21, 44, 16, 25])}})\n", + "--------------\n", + "\n", + "defaultdict(, {'dgl_plays_features': {('user', 'plays', 'game'): tensor([[ 6, 1],\n", + " [1000, 0]])}})\n" + ] + } + ], + "source": [ + "# Define the Metagraph that transfers ArangoDB attributes \"as is\",\n", + "# meaning the data is already formatted to DGL data standards\n", + "metagraph_v1 = {\n", + " \"vertexCollections\": {\n", + " # Move the \"features\" & \"label\" ArangoDB attributes to DGL as \"features\" & \"label\" Tensors\n", + " \"user\": {\"features\", \"label\"}, # equivalent to {\"features\": \"features\", \"label\": \"label\"}\n", + " \"game\": {\"dgl_game_features\": \"features\"},\n", + " \"topic\": {},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"dgl_plays_features\": \"features\"},\n", + " \"follows\": {}\n", + " },\n", + "}\n", + "\n", + "# Create the DGL graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v1)\n", + "\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0806IB4o3WRz" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cnByWtpa3WR7" + }, + "source": [ + "Data\n", + "* [ArangoDB IMDB Movie Dataset](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)\n", + "\n", + "API\n", + "* `adbddgl_adapter.adapter.arangodb_to_dgl()`\n", + "\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. In this example, we rely on user-defined encoders to build DGL-ready tensors (i.e feature matrices) from ArangoDB attributes. See https://pytorch-geometric.readthedocs.io/en/latest/notes/load_csv.html for an example on using encoders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 499, + "referenced_widgets": [ + "2b13e46a722e4be384fad74e1b3e6461", + "848230df62434c77b5b18f9a43e2d14f", + "59405e2d0c164d5b965680cc9d9cd8f3", + "2a380fe111794c3a951cdafa4a2bf0b3", + "3d081c88cd2945fa9534de722669ada9", + "82f996185e8444ada5e18602e2f8e105" + ] + }, + "id": "cKqLoawE3WR7", + "outputId": "02a8bfed-44ae-4c76-9eea-ba7348738707" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2b13e46a722e4be384fad74e1b3e6461", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "59405e2d0c164d5b965680cc9d9cd8f3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3d081c88cd2945fa9534de722669ada9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2022/08/05 20:38:44 +0000] [61] [INFO] - adbdgl_adapter: Created DGL 'IMDB' Graph\n", + "INFO:adbdgl_adapter:Created DGL 'IMDB' Graph\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--------------\n", + "Graph(num_nodes={'Movies': 1682, 'Users': 943},\n", + " num_edges={('Users', 'Ratings', 'Movies'): 65499},\n", + " metagraph=[('Users', 'Movies', 'Ratings')])\n", + "\n", + "--------------\n", + "defaultdict(, {'features': {'Movies': tensor([[0, 0],\n", + " [1, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 1],\n", + " [0, 0],\n", + " [0, 1]]), 'Users': tensor([[ 0., 35.],\n", + " [ 1., 53.],\n", + " [ 0., 23.],\n", + " ...,\n", + " [ 0., 20.],\n", + " [ 1., 48.],\n", + " [ 0., 22.]])}, 'label': {'Movies': tensor([1, 0, 0, ..., 0, 1, 0])}})\n", + "--------------\n", + "\n", + "{'weight': tensor([4, 4, 3, ..., 4, 4, 4])}\n" + ] + } + ], + "source": [ + "# Define the Metagraph that transfers attributes via user-defined encoders\n", + "metagraph_v2 = {\n", + " \"vertexCollections\": {\n", + " \"Movies\": {\n", + " \"features\": { # Build a feature matrix from the \"Action\" & \"Drama\" document attributes\n", + " \"Action\": IdentityEncoder(dtype=torch.long),\n", + " \"Drama\": IdentityEncoder(dtype=torch.long),\n", + " },\n", + " \"label\": \"Comedy\",\n", + " },\n", + " \"Users\": {\n", + " \"features\": {\n", + " \"Gender\": CategoricalEncoder(), # CategoricalEncoder(mapping={\"M\": 0, \"F\": 1}),\n", + " \"Age\": IdentityEncoder(dtype=torch.long),\n", + " }\n", + " },\n", + " },\n", + " \"edgeCollections\": {\"Ratings\": {\"weight\": \"Rating\"}},\n", + "}\n", + "\n", + "# Create the DGL Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"IMDB\", metagraph_v2)\n", + "\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" ] }, { "cell_type": "markdown", "metadata": { - "id": "CNj1xKhwoJoL" + "id": "d5ijSCcY4bYs" }, "source": [ - "\n", - "#### MiniGCDataset Graphs with attributes" + "#### Via ArangoDB-DGL metagraph 3" ] }, { "cell_type": "markdown", "metadata": { - "id": "CZ1UX9YX1Zzo" + "id": "P1aKzxxZrUXJ" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", - "* [`adbdgl_adapter.controller._dgl_feature_to_adb_attribute()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L49-L70)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our DGL node/edge features into ArangoDB vertex/edge attributes. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. In this example, we rely on user-defined functions to handle ArangoDB attribute to DGL feature conversion." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 377, + "referenced_widgets": [ + "e4b7b35461e848f5819b9f38d67ee652", + "9968f928e28147f7a0956aff8412a608", + "54801c3c74494fe8bf9e2a7fb64bde48", + "903622e283524c7f89635599920c2b14", + "f0d4515c88a44775be59c4e1a0b3c60a", + "9e1eb071f0b24cb6a8d206477b10b831" + ] }, - "id": "jbJsvMMaoJoT", - "outputId": "6dba7563-84b8-4934-a07f-1525ef67bd5e" + "id": "t-lNli3d4bY0", + "outputId": "7bc48392-81a7-4232-aad2-931ff3c8ca48" }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e4b7b35461e848f5819b9f38d67ee652", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "54801c3c74494fe8bf9e2a7fb64bde48", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f0d4515c88a44775be59c4e1a0b3c60a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+            ],
+            "text/plain": []
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "
\n",
+              "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", "text": [ - "[2022/05/25 17:25:16 +0000] [60] [INFO] - adbdgl_adapter: Instantiated ADBDGL_Adapter with database 'TUT56z6dbtgsoeu5cc6aixs7d'\n", - "[2022/05/25 17:25:17 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Lollipop_With_Attributes' Graph\n", - "[2022/05/25 17:25:17 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Hypercube_With_Attributes' Graph\n", - "[2022/05/25 17:25:18 +0000] [60] [INFO] - adbdgl_adapter: Created ArangoDB 'Clique_With_Attributes' Graph\n" + "[2022/08/05 20:39:00 +0000] [61] [INFO] - adbdgl_adapter: Created DGL 'FakeHetero' Graph\n", + "INFO:adbdgl_adapter:Created DGL 'FakeHetero' Graph\n" ] }, { @@ -1428,128 +3393,83 @@ "output_type": "stream", "text": [ "\n", - "--------------------\n", - "URL: https://tutorials.arangodb.cloud:8529\n", - "Username: TUTtj3263blez70kmqdi3ts\n", - "Password: TUTf6tursgxqogdo3ww3nplb\n", - "Database: TUT56z6dbtgsoeu5cc6aixs7d\n", - "--------------------\n", + "--------------\n", + "Graph(num_nodes={'game': 5, 'user': 4},\n", + " num_edges={('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'game', 'plays')])\n", "\n", - "\\View the created graphs here:\n", + "--------------\n", + "defaultdict(, {'features': {'game': tensor([[0, 0],\n", + " [0, 1],\n", + " [1, 0],\n", + " [1, 1],\n", + " [1, 1]]), 'user': tensor([21, 44, 16, 25])}, 'label': {'user': tensor([1, 2, 0, 1])}})\n", + "--------------\n", "\n", - "1) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Lollipop_With_Attributes\n", - "2) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Hypercube_With_Attributes\n", - "3) https://tutorials.arangodb.cloud:8529/_db/TUT56z6dbtgsoeu5cc6aixs7d/_admin/aardvark/index.html#graph/Clique_With_Attributes\n", - "\n" + "{'features': tensor([[ 6, 1],\n", + " [1000, 0]])}\n" ] } ], "source": [ - "# Load the dgl graphs\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "\n", - " # Add DGL Node & Edge Features to each graph\n", - "dgl_lollipop_graph.ndata[\"random_ndata\"] = torch.tensor(\n", - " [[i, i, i] for i in range(0, dgl_lollipop_graph.num_nodes())]\n", - ")\n", - "dgl_lollipop_graph.edata[\"random_edata\"] = torch.rand(dgl_lollipop_graph.num_edges())\n", - "\n", - "dgl_hypercube_graph.ndata[\"random_ndata\"] = torch.rand(dgl_hypercube_graph.num_nodes())\n", - "dgl_hypercube_graph.edata[\"random_edata\"] = torch.tensor(\n", - " [[[i], [i], [i]] for i in range(0, dgl_hypercube_graph.num_edges())]\n", - ")\n", - "\n", - "dgl_clique_graph.ndata['clique_ndata'] = torch.tensor([1,2,3,4,5,6])\n", - "dgl_clique_graph.edata['clique_edata'] = torch.tensor(\n", - " [1 if i % 2 == 0 else 0 for i in range(0, dgl_clique_graph.num_edges())]\n", - ")\n", - "\n", - "# A user-defined Controller class is OPTIONAL when converting DGL features\n", - "# to ArangoDB attributes. NOTE: A custom Controller is NOT needed if you want to\n", - "# keep the numerical-based values of your DGL features.\n", - "class Clique_ADBDGL_Controller(ADBDGL_Controller):\n", - " \"\"\"ArangoDB-DGL controller.\n", - "\n", - " Responsible for controlling how ArangoDB attributes\n", - " are converted into DGL features, and vice-versa.\n", + "# Define the metagraph that transfers attributes via user-defined functions\n", + "def udf_user_features(user_df):\n", + " # process the user_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # user_df[\"features\"] = ...\n", + " return torch.tensor(user_df[\"features\"].to_list())\n", "\n", - " You can derive your own custom ADBDGL_Controller if you want to maintain\n", - " consistency between your ArangoDB attributes & your DGL features.\n", - " \"\"\"\n", - "\n", - " def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor):\n", - " \"\"\"\n", - " Given a DGL feature key, its assigned value (for an arbitrary node or edge),\n", - " and the collection it belongs to, convert it to a valid ArangoDB attribute\n", - " (e.g string, list, number, ...).\n", - "\n", - " NOTE: No action is needed here if you want to keep the numerical-based values\n", - " of your DGL features.\n", - "\n", - " :param key: The DGL attribute key name\n", - " :type key: str\n", - " :param col: The ArangoDB collection of the (soon-to-be) ArangoDB document.\n", - " :type col: str\n", - " :param val: The assigned attribute value of the DGL node.\n", - " :type val: Tensor\n", - " :return: The feature's representation as an ArangoDB Attribute\n", - " :rtype: Any\n", - " \"\"\"\n", "\n", - " if key == \"clique_ndata\":\n", - " try:\n", - " return [\"Eins\", \"Zwei\", \"Drei\", \"Vier\", \"Fünf\", \"Sechs\"][key-1]\n", - " except:\n", - " return -1\n", + "def udf_game_features(game_df):\n", + " # process the game_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # game_df[\"features\"] = ...\n", + " return torch.tensor(game_df[\"features\"].to_list())\n", "\n", - " if key == \"clique_edata\":\n", - " return bool(val)\n", - "\n", - " return super()._dgl_feature_to_adb_attribute(key, col, val)\n", - "\n", - "# Re-instantiate a new adapter specifically for the Clique Graph Conversion\n", - "clique_adbgl_adapter = ADBDGL_Adapter(db, Clique_ADBDGL_Controller())\n", - "\n", - "# Create the ArangoDB graphs\n", - "lollipop = \"Lollipop_With_Attributes\"\n", - "hypercube = \"Hypercube_With_Attributes\"\n", - "clique = \"Clique_With_Attributes\"\n", "\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "metagraph_v3 = {\n", + " \"vertexCollections\": {\n", + " \"user\": {\n", + " \"features\": udf_user_features, # supports named functions\n", + " \"label\": lambda df: torch.tensor(df[\"label\"].to_list()), # also supports lambda functions\n", + " },\n", + " \"game\": {\"features\": udf_game_features},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"features\": (lambda df: torch.tensor(df[\"features\"].to_list()))},\n", + " },\n", + "}\n", "\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = clique_adbgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph) # Notice the new adapter here!\n", + "# Create the DGL Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v3)\n", "\n", - "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")" + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" ] } ], "metadata": { "colab": { "collapsed_sections": [ - "KS9c-vE5eG89", "ot1oJqn7m78n", "Oc__NAd1eG8-", "7y81WHO8eG8_", "QfE_tKxneG9A", + "bvzJXSHHTi3v", + "UafSB_3JZNwK", + "CNj1xKhwoJoL", + "n08RC_GtkDrC", + "mk6m0hBRkkkT", "uByvwf9feG9A", - "bvzJXSHHTi3v" + "ZrEDmtqCVD0W", + "RQ4CknYfUEuz", + "qEH6OdSB23Ya", + "0806IB4o3WRz", + "d5ijSCcY4bYs" ], - "name": "ArangoDB_DGL_Adapter_v2.ipynb", "provenance": [] }, "kernelspec": { @@ -1568,6 +3488,3723 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0083494093574c50952dd066502a708d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0478c90ef8234f3a8987dbe9cd3030b2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09d25097c75c4fa8a2c7376f1965afc5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09e8c93741bf45acb69ba9e757107564": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_d7d06973b2984eb19fa050409bf62222", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'plays', 'game') (2) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'plays', 'game') (2)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "13d0f7da120b40b993ce3c0b257d5788": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14b29dc1f2b8454fa9acc1d79dcd4870": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1690574b32cc4b48a8b87520458d5066": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_a9edf4f85a4a4504b155608bb740178a", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): topic ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): topic\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "1dea128bde204a8fa53e094e014183fe": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_50f8ff3637ee4fc7af8c811cd5d177be", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): topic (3) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): topic (3)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "21e50aa61c3d4de19b5cc0bbe27d53c9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "289a6e16c3d640c29d96edf09908bd0f": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_61f3832c906445a3ab7e7ba9b41c0127", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): topic (3) ▰▰▰▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): topic (3)\u001b[0m \u001b[38;2;153;70;2m▰▰▰▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "2a380fe111794c3a951cdafa4a2bf0b3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2b13e46a722e4be384fad74e1b3e6461": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_848230df62434c77b5b18f9a43e2d14f", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): Movies ▰▰▰▰▰▰▰ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): Movies\u001b[0m \u001b[38;2;252;253;252m▰▰▰▰▰▰▰\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "2c2900512b5244d3a0fcaf7409446d0e": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_c5d064af7f4a49dca6716f98d052e951", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): plays ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): plays\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "2d1fc41d509e481cb779603827359184": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_87d9c9de620847f48b4088e8577cd653", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('Karate_N', 'Karate_E', 'Karate_N') (156) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('Karate_N', 'Karate_E', 'Karate_N') (156)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "31a9f782f36d407f8cc42b19679c5c2c": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_9fd8d07a43cd4c06a2d448047ede846c", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): follows ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): follows\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "345a5984959c4e57b7e2715fa8eeef8f": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_99e6613c4187459396eea503453934cb", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): game (5) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): game (5)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "34c4ef0c4aa5454893c0f0fa35902fbd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38aaa492d75c48f38de60ea0cc5fa93f": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_63845b04ecbc40de8bcc017d754ac907", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): game ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): game\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "3bc228aa98454dc59a604c8f7ff6b2a0": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_65138d18c9c449d1aaaad387293c5ede", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): user (4) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): user (4)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "3d081c88cd2945fa9534de722669ada9": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_82f996185e8444ada5e18602e2f8e105", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): Ratings ▰▱▱▱▱▱▱ 0:00:06\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): Ratings\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:06\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "3ea99b2a6b4246d3abf628ca743f9f24": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_841ce4f5d391457e858c3c48185e259d", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'topic') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'topic') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "3f633be94c7d466ea40571e805a76948": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3fc8b14d794a46118b328893bd216405": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_c7e222474ff445fe86e4e599848b2ae2", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): game (5) ▰▰▰▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): game (5)\u001b[0m \u001b[38;2;153;70;2m▰▰▰▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "404a19cadaca4b85a957cad231b73cbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "40da9dd52dd6443684b990f74b6cb876": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "46b88027e41a43578ebcc47513dd6911": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ab3c113235746cab5fde158756ab420": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b7f5f21b98b4c5d8475929bf1f01a65": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_404a19cadaca4b85a957cad231b73cbb", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): topic ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): topic\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "4e085418ce1b41e1bc24ad6acea92fc4": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_7b5dba3f4d50466eb2071cb13548ef1b", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): plays ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): plays\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "50f8ff3637ee4fc7af8c811cd5d177be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "54801c3c74494fe8bf9e2a7fb64bde48": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_903622e283524c7f89635599920c2b14", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): game ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): game\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "575205f1a4e64c5d977e69d4939a5605": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_d20843bfa9064d56b37aaea011789a26", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'user') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'user') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "59405e2d0c164d5b965680cc9d9cd8f3": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_2a380fe111794c3a951cdafa4a2bf0b3", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): Users ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): Users\u001b[0m \u001b[38;2;252;253;252m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "5c310145af4f4c90b659dee771185ab6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f5c119141a24cab907ceb2da27e0244": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_46b88027e41a43578ebcc47513dd6911", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): topic ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): topic\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "61d2a0426c324309ab51111933276e3d": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_77c208846c1e4503bc22a5b5504f89ee", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): Karate_N (34) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): Karate_N (34)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "61f3832c906445a3ab7e7ba9b41c0127": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "63845b04ecbc40de8bcc017d754ac907": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "65138d18c9c449d1aaaad387293c5ede": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6582a9d3fe044d5380d8e918f3bc5a6d": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_40da9dd52dd6443684b990f74b6cb876", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): user (4) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): user (4)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "6744eb60dfa04a8598fca3b998ce3077": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_09d25097c75c4fa8a2c7376f1965afc5", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): user (4) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): user (4)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "712770e675424d7eb0c8efd6c34f2012": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "77b31c42e914410aaea93044f1390121": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_8349f1e6b1f34680bacd7de1a1937122", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): user ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): user\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "77c208846c1e4503bc22a5b5504f89ee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a43c4b816da4a40b0eed167a85eef22": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_eb376d5cf782424aaccbce31f0d3ede5", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): game ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): game\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "7a4db2b18c634bef932fb9b1157d4af1": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_b5be8c1e4ab3415c9fffbb61aeb0fff3", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): follows ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): follows\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "7b5dba3f4d50466eb2071cb13548ef1b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "80d19dc0d20842c3b5c7313c0ad23d24": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_0478c90ef8234f3a8987dbe9cd3030b2", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'topic') (2) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'topic') (2)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "80e69b3aa98b44e295efe3940c1146c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8128e6d80fcb4a8ca0a72097bb8b6521": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "82f996185e8444ada5e18602e2f8e105": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8349f1e6b1f34680bacd7de1a1937122": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "841ce4f5d391457e858c3c48185e259d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8444e147be8f44aba06ec1f8a880104e": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_80e69b3aa98b44e295efe3940c1146c2", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'user') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'user') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "848230df62434c77b5b18f9a43e2d14f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87d9c9de620847f48b4088e8577cd653": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88e83ddc1ca1464291e1631b8fced847": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_a9c14a3f339445338119631c8e56ff68", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'plays', 'game') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'plays', 'game') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "8bf075c6f7834d3fa905b7ddc37cf128": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_b080f26fe35241fb9cca48e97bc9ef0c", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'plays', 'game') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'plays', 'game') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "903622e283524c7f89635599920c2b14": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9403e71c2bbe46bd9e6d49d555264554": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_34c4ef0c4aa5454893c0f0fa35902fbd", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): game ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): game\u001b[0m \u001b[38;2;252;253;252m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "968020b1388e4883843575d9198af1cd": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_f1a08470110e4099af2a3d4cf4d0f956", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): topic (3) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): topic (3)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "96e57d98afce44cd8269204dd19ff6e0": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_da43ef4a8c6a41f9bda153a0cd14c2d7", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): topic (3) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): topic (3)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "97e7543f202749c197515a9c5c79adbe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "987bf80aee4b4b97bfad1699f8384af8": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_4ab3c113235746cab5fde158756ab420", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'user') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'user') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "9968f928e28147f7a0956aff8412a608": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "99bbe81a24db49ff9352987fd97649cd": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_21e50aa61c3d4de19b5cc0bbe27d53c9", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): user (4) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): user (4)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "99e6613c4187459396eea503453934cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9b2b3abbe2c04af0bc232c9b16bfd90d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9e1eb071f0b24cb6a8d206477b10b831": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9fd8d07a43cd4c06a2d448047ede846c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9c14a3f339445338119631c8e56ff68": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9edf4f85a4a4504b155608bb740178a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b080f26fe35241fb9cca48e97bc9ef0c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5be8c1e4ab3415c9fffbb61aeb0fff3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bd8b6caa7d2d4df1a99b1870ecc0ae46": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_13d0f7da120b40b993ce3c0b257d5788", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): plays ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): plays\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "c5d064af7f4a49dca6716f98d052e951": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c61e3997250d4f93a8e0494db674892d": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_97e7543f202749c197515a9c5c79adbe", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'user') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'user') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "c6cffa0a64434e56879ba2a8c9de018a": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_0083494093574c50952dd066502a708d", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): game (5) ▰▰▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): game (5)\u001b[0m \u001b[38;2;153;70;2m▰▰▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "c7e222474ff445fe86e4e599848b2ae2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb8167f00277413eaaa2ad6e0e162fab": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_8128e6d80fcb4a8ca0a72097bb8b6521", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'topic') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'topic') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "d20843bfa9064d56b37aaea011789a26": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d7d06973b2984eb19fa050409bf62222": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "da43ef4a8c6a41f9bda153a0cd14c2d7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd2376f84c794b4989f385a5bb147bd8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e4b7b35461e848f5819b9f38d67ee652": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_9968f928e28147f7a0956aff8412a608", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): user ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): user\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "ea5e9803c5de4d2bbb48782069b9829b": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_3f633be94c7d466ea40571e805a76948", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): game (5) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): game (5)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "ea88ab86e9774ed78ea62daa6e338637": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_712770e675424d7eb0c8efd6c34f2012", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): follows ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): follows\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "eb376d5cf782424aaccbce31f0d3ede5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ec7b8b0b853f463fa079dda845891391": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_dd2376f84c794b4989f385a5bb147bd8", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'plays', 'game') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'plays', 'game') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "f01997b9b43d43368d632e26ba9732ad": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_14b29dc1f2b8454fa9acc1d79dcd4870", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): user ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): user\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "f0d4515c88a44775be59c4e1a0b3c60a": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_9e1eb071f0b24cb6a8d206477b10b831", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): plays ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): plays\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "f1a08470110e4099af2a3d4cf4d0f956": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f9fdfe6ce44e4e1c8f513f82efca3e0d": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_9b2b3abbe2c04af0bc232c9b16bfd90d", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(DGL → ADB): ('user', 'follows', 'topic') (2) ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;151;196;35m(DGL → ADB): ('user', 'follows', 'topic') (2)\u001b[0m \u001b[38;2;153;70;2m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "fd2db543279f4a13ab6376b9c23160e0": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_5c310145af4f4c90b659dee771185ab6", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
(ADB → DGL): user ▰▱▱▱▱▱▱ 0:00:00\n
\n", + "text/plain": "\u001b[38;2;49;155;245m(ADB → DGL): user\u001b[0m \u001b[38;2;252;253;252m▰▱▱▱▱▱▱\u001b[0m \u001b[33m0:00:00\u001b[0m\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + } + } } }, "nbformat": 4, diff --git a/setup.cfg b/setup.cfg index 475af62..a91261b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ profile = black [flake8] max-line-length = 88 -extend-ignore = E203, E741, W503 +extend-ignore = E203, E741, W503, E731 exclude =.git .idea .*_cache dist venv [mypy] diff --git a/setup.py b/setup.py index 037c9c1..0b56dea 100644 --- a/setup.py +++ b/setup.py @@ -11,24 +11,26 @@ long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/arangoml/dgl-adapter", - keywords=["arangodb", "dgl", "adapter"], + keywords=["arangodb", "dgl", "deep graph library", "adapter"], packages=["adbdgl_adapter"], include_package_data=True, - python_requires=">=3.7", + python_requires=">=3.8", license="Apache Software License", install_requires=[ "requests>=2.27.1", - "dgl>=0.6.1", - "torch>=1.10.2", - "python-arango>=7.4.1", + "rich>=12.5.1", + "pandas>=1.3.5", + "dgl~=1.0", + "torch>=1.12.0", + "python-arango~=7.6", "setuptools>=45", ], extras_require={ "dev": [ - "black", - "flake8>=3.8.0", - "isort>=5.0.0", - "mypy>=0.790", + "black==23.3.0", + "flake8==6.0.0", + "isort==5.12.0", + "mypy==1.4.1", "pytest>=6.0.0", "pytest-cov>=2.0.0", "coveralls>=3.3.1", @@ -41,9 +43,10 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Utilities", "Typing :: Typed", ], diff --git a/tests/conftest.py b/tests/conftest.py index f31c304..0c8acaf 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,17 +2,19 @@ import os import subprocess from pathlib import Path -from typing import Any +from typing import Any, Callable, Dict from arango import ArangoClient from arango.database import StandardDatabase -from dgl import DGLGraph, heterograph, remove_self_loop +from dgl import DGLGraph, DGLHeteroGraph, heterograph, remove_self_loop from dgl.data import KarateClubDataset, MiniGCDataset -from torch import ones, rand, tensor, zeros +from pandas import DataFrame +from torch import Tensor, rand, tensor -from adbdgl_adapter import ADBDGL_Adapter -from adbdgl_adapter.typings import Json +from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller +from adbdgl_adapter.typings import DGLCanonicalEType, Json +con: Json db: StandardDatabase adbdgl_adapter: ADBDGL_Adapter PROJECT_DIR = Path(__file__).parent.parent @@ -26,6 +28,7 @@ def pytest_addoption(parser: Any) -> None: def pytest_configure(config: Any) -> None: + global con con = { "url": config.getoption("url"), "username": config.getoption("username"), @@ -48,37 +51,34 @@ def pytest_configure(config: Any) -> None: global adbdgl_adapter adbdgl_adapter = ADBDGL_Adapter(db, logging_lvl=logging.DEBUG) - if db.has_graph("fraud-detection") is False: - arango_restore(con, "examples/data/fraud_dump") - db.create_graph( - "fraud-detection", - edge_definitions=[ - { - "edge_collection": "accountHolder", - "from_vertex_collections": ["customer"], - "to_vertex_collections": ["account"], - }, - { - "edge_collection": "transaction", - "from_vertex_collections": ["account"], - "to_vertex_collections": ["account"], - }, - ], - ) + +def pytest_exception_interact(node: Any, call: Any, report: Any) -> None: + try: + if report.failed: + params: Dict[str, Any] = node.callspec.params + + graph_name = params.get("name") + adapter = params.get("adapter") + if graph_name and adapter: + db: StandardDatabase = adapter.db + db.delete_graph(graph_name, drop_collections=True, ignore_missing=True) + except AttributeError: + print(node) + print(dir(node)) + print("Could not delete graph") def arango_restore(con: Json, path_to_data: str) -> None: - restore_prefix = "./assets/" if os.getenv("GITHUB_ACTIONS") else "" + restore_prefix = "./tools/" if os.getenv("GITHUB_ACTIONS") else "" protocol = "http+ssl://" if "https://" in con["url"] else "tcp://" url = protocol + con["url"].partition("://")[-1] - # A small hack to work around empty passwords - password = f"--server.password {con['password']}" if con["password"] else "" subprocess.check_call( - f'chmod -R 755 ./assets/arangorestore && {restore_prefix}arangorestore \ + f'chmod -R 755 ./tools/arangorestore && {restore_prefix}arangorestore \ -c none --server.endpoint {url} --server.database {con["dbName"]} \ - --server.username {con["username"]} {password} \ - --input-directory "{PROJECT_DIR}/{path_to_data}"', + --server.username {con["username"]} \ + --server.password "{con["password"]}" \ + --input-directory "{PROJECT_DIR}/{path_to_data}"', cwd=f"{PROJECT_DIR}/tests", shell=True, ) @@ -88,41 +88,90 @@ def get_karate_graph() -> DGLGraph: return KarateClubDataset()[0] -def get_lollipop_graph() -> DGLGraph: - dgl_g = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0]) - dgl_g.ndata["random_ndata"] = tensor( - [[i, i, i] for i in range(0, dgl_g.num_nodes())] - ) - dgl_g.edata["random_edata"] = rand(dgl_g.num_edges()) - return dgl_g - - def get_hypercube_graph() -> DGLGraph: dgl_g = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0]) - dgl_g.ndata["random_ndata"] = rand(dgl_g.num_nodes()) - dgl_g.edata["random_edata"] = tensor( + dgl_g.ndata["node_features"] = rand(dgl_g.num_nodes()) + dgl_g.edata["edge_features"] = tensor( [[[i], [i], [i]] for i in range(0, dgl_g.num_edges())] ) return dgl_g -def get_clique_graph() -> DGLGraph: - dgl_g = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0]) - dgl_g.ndata["random_ndata"] = ones(dgl_g.num_nodes()) - dgl_g.edata["random_edata"] = zeros(dgl_g.num_edges()) +def get_fake_hetero_dataset() -> DGLHeteroGraph: + data_dict = { + ("v0", "e0", "v0"): (tensor([0, 1, 2, 3, 4, 5]), tensor([5, 4, 3, 2, 1, 0])), + ("v0", "e0", "v1"): (tensor([0, 1, 2, 3, 4, 5]), tensor([0, 5, 1, 4, 2, 3])), + ("v0", "e0", "v2"): (tensor([0, 1, 2, 3, 4, 5]), tensor([1, 1, 1, 5, 5, 5])), + ("v1", "e0", "v1"): (tensor([0, 1, 2, 3, 4, 5]), tensor([3, 3, 3, 3, 3, 3])), + ("v1", "e0", "v2"): (tensor([0, 1, 2, 3, 4, 5]), tensor([0, 1, 2, 3, 4, 5])), + ("v2", "e0", "v2"): (tensor([0, 1, 2, 3, 4, 5]), tensor([5, 4, 3, 2, 1, 0])), + } + + dgl_g: DGLHeteroGraph = heterograph(data_dict) + dgl_g.nodes["v0"].data["features"] = rand(6) + dgl_g.nodes["v0"].data["label"] = tensor([1, 3, 2, 1, 3, 2]) + dgl_g.nodes["v1"].data["features"] = rand(6, 1) + dgl_g.nodes["v2"].data["features"] = rand(6, 2) + dgl_g.edata["features"] = {("v0", "e0", "v0"): rand(6, 3)} + return dgl_g -def get_social_graph() -> DGLGraph: +def get_social_graph() -> DGLHeteroGraph: dgl_g = heterograph( { ("user", "follows", "user"): (tensor([0, 1]), tensor([1, 2])), - ("user", "follows", "game"): (tensor([0, 1, 2]), tensor([0, 1, 2])), - ("user", "plays", "game"): (tensor([3, 3]), tensor([1, 2])), + ("user", "follows", "topic"): (tensor([1, 1]), tensor([1, 2])), + ("user", "plays", "game"): (tensor([0, 3]), tensor([3, 4])), } ) - dgl_g.nodes["user"].data["age"] = tensor([21, 16, 38, 64]) - dgl_g.edges["plays"].data["hours_played"] = tensor([3, 5]) + dgl_g.nodes["user"].data["features"] = tensor([21, 44, 16, 25]) + dgl_g.nodes["user"].data["label"] = tensor([1, 2, 0, 1]) + dgl_g.nodes["game"].data["features"] = tensor( + [[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]] + ) + dgl_g.edges[("user", "plays", "game")].data["features"] = tensor( + [[6, 1], [1000, 0]] + ) return dgl_g + + +# For DGL to ArangoDB testing purposes +def udf_users_features_tensor_to_df(t: Tensor, adb_df: DataFrame) -> DataFrame: + adb_df[["age", "gender"]] = t.tolist() + adb_df["gender"] = adb_df["gender"].map({0: "Male", 1: "Female"}) + return adb_df + + +# For ArangoDB to DGL testing purposes +def udf_features_df_to_tensor(df: DataFrame) -> Tensor: + return tensor(df["features"].to_list()) + + +# For ArangoDB to DGL testing purposes +def udf_key_df_to_tensor(key: str) -> Callable[[DataFrame], Tensor]: + def f(df: DataFrame) -> Tensor: + return tensor(df[key].to_list()) + + return f + + +def label_tensor_to_2_column_dataframe(dgl_tensor: Tensor, df: DataFrame) -> DataFrame: + label_map = {0: "Class A", 1: "Class B", 2: "Class C"} + + df["label_num"] = dgl_tensor.tolist() + df["label_str"] = df["label_num"].map(label_map) + + return df + + +class Custom_ADBDGL_Controller(ADBDGL_Controller): + def _prepare_dgl_node(self, dgl_node: Json, node_type: str) -> Json: + dgl_node["foo"] = "bar" + return dgl_node + + def _prepare_dgl_edge(self, dgl_edge: Json, edge_type: DGLCanonicalEType) -> Json: + dgl_edge["bar"] = "foo" + return dgl_edge diff --git a/tests/data/adb/imdb_dump/ENCRYPTION b/tests/data/adb/imdb_dump/ENCRYPTION new file mode 100644 index 0000000..c86c3f3 --- /dev/null +++ b/tests/data/adb/imdb_dump/ENCRYPTION @@ -0,0 +1 @@ +none \ No newline at end of file diff --git a/tests/data/adb/imdb_dump/Movies.structure.json b/tests/data/adb/imdb_dump/Movies.structure.json new file mode 100644 index 0000000..eb9d80c --- /dev/null +++ b/tests/data/adb/imdb_dump/Movies.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2730595280/","id":"2730595280","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Movies","numberOfShards":1,"planId":"2730595280","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2730595281":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":2,"waitForSync":false,"writeConcern":1},"planVersion":10402} \ No newline at end of file diff --git a/tests/data/adb/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz b/tests/data/adb/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz new file mode 100644 index 0000000..b838d29 Binary files /dev/null and b/tests/data/adb/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz differ diff --git a/tests/data/adb/imdb_dump/Ratings.structure.json b/tests/data/adb/imdb_dump/Ratings.structure.json new file mode 100644 index 0000000..8571f0d --- /dev/null +++ b/tests/data/adb/imdb_dump/Ratings.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2728580616/","id":"2728580616","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Ratings","numberOfShards":1,"planId":"2728580616","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2728580617":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":3,"waitForSync":false,"writeConcern":1},"planVersion":10408} \ No newline at end of file diff --git a/tests/data/adb/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz b/tests/data/adb/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz new file mode 100644 index 0000000..b604626 Binary files /dev/null and b/tests/data/adb/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz differ diff --git a/tests/data/adb/imdb_dump/Users.structure.json b/tests/data/adb/imdb_dump/Users.structure.json new file mode 100644 index 0000000..e5420b3 --- /dev/null +++ b/tests/data/adb/imdb_dump/Users.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2728580582/","id":"2728580582","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Users","numberOfShards":1,"planId":"2728580582","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2728580583":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":2,"waitForSync":false,"writeConcern":1},"planVersion":10405} \ No newline at end of file diff --git a/tests/data/adb/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz b/tests/data/adb/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz new file mode 100644 index 0000000..4eb3a4c Binary files /dev/null and b/tests/data/adb/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz differ diff --git a/tests/data/adb/imdb_dump/dump.json b/tests/data/adb/imdb_dump/dump.json new file mode 100644 index 0000000..b2a69d9 --- /dev/null +++ b/tests/data/adb/imdb_dump/dump.json @@ -0,0 +1 @@ +{"database":"TUTdit9ohpgz1ntnbetsjstwi","lastTickAtDumpStart":"2732644865","properties":{"id":"2728554641","name":"TUTdit9ohpgz1ntnbetsjstwi","isSystem":false,"sharding":"","replicationFactor":1,"writeConcern":1,"path":""}} \ No newline at end of file diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 84cbd9f..4d913e4 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -1,23 +1,39 @@ -from typing import Any, Dict, Set, Union +from collections import defaultdict +from typing import Any, Dict, List, Optional, Set, Union import pytest -from arango.database import StandardDatabase -from arango.graph import Graph as ArangoGraph -from dgl import DGLGraph -from dgl.heterograph import DGLHeteroGraph -from torch import Tensor +from dgl import DGLGraph, DGLHeteroGraph +from dgl.view import EdgeSpace, NodeSpace +from pandas import DataFrame +from torch import Tensor, cat, long, tensor from adbdgl_adapter import ADBDGL_Adapter -from adbdgl_adapter.typings import ArangoMetagraph +from adbdgl_adapter.encoders import CategoricalEncoder, IdentityEncoder +from adbdgl_adapter.exceptions import ADBMetagraphError, DGLMetagraphError +from adbdgl_adapter.typings import ( + ADBMap, + ADBMetagraph, + ADBMetagraphValues, + DGLCanonicalEType, + DGLMetagraph, + DGLMetagraphValues, +) +from adbdgl_adapter.utils import validate_adb_metagraph, validate_dgl_metagraph from .conftest import ( + Custom_ADBDGL_Controller, adbdgl_adapter, + arango_restore, + con, db, - get_clique_graph, + get_fake_hetero_dataset, get_hypercube_graph, get_karate_graph, - get_lollipop_graph, get_social_graph, + label_tensor_to_2_column_dataframe, + udf_features_df_to_tensor, + udf_key_df_to_tensor, + udf_users_features_tensor_to_df, ) @@ -28,210 +44,866 @@ class Bad_ADBDGL_Controller: pass with pytest.raises(TypeError): - ADBDGL_Adapter(bad_db) + ADBDGL_Adapter(bad_db) # type: ignore with pytest.raises(TypeError): ADBDGL_Adapter(db, Bad_ADBDGL_Controller()) # type: ignore @pytest.mark.parametrize( - "adapter, name, metagraph", - [ + "bad_metagraph", + [ # empty metagraph + ({}), + # missing required parent key + ({"vertexCollections": {}}), + # empty sub-metagraph + ({"vertexCollections": {}, "edgeCollections": {}}), + # bad collection name ( - adbdgl_adapter, - "fraud-detection", { "vertexCollections": { - "account": {"Balance", "rank"}, - "customer": {"rank"}, - "Class": {}, + 1: {}, + # other examples include: + # True: {}, + # ('a'): {} }, - "edgeCollections": { - "transaction": { - "transaction_amt", - "sender_bank_id", - "receiver_bank_id", - }, - "accountHolder": {}, - "Relationship": {}, + "edgeCollections": {}, + } + ), + # bad collection metagraph + ( + { + "vertexCollections": { + "vcol_a": None, + # other examples include: + # "vcol_a": 1, + # "vcol_a": 'foo', }, - }, + "edgeCollections": {}, + } + ), + # bad collection metagraph 2 + ( + { + "vertexCollections": { + "vcol_a": {"a", "b", 3}, + # other examples include: + # "vcol_a": 1, + # "vcol_a": 'foo', + }, + "edgeCollections": {}, + } + ), + # bad meta_key + ( + { + "vertexCollections": { + "vcol_a": { + 1: {}, + # other example include: + # True: {}, + # ("x"): {}, + } + }, + "edgeCollections": {}, + } + ), + # bad meta_val + ( + { + "vertexCollections": { + "vcol_a": { + "x": True, + # other example include: + # 'x': ('a'), + # 'x': ['a'], + # 'x': 5 + } + }, + "edgeCollections": {}, + } + ), + # bad meta_val encoder key + ( + { + "vertexCollections": {"vcol_a": {"x": {1: IdentityEncoder()}}}, + "edgeCollections": {}, + } + ), + # bad meta_val encoder value + ( + { + "vertexCollections": { + "vcol_a": { + "x": { + "Action": True, + # other examples include: + # 'Action': {} + # 'Action': (lambda : 1)() + } + } + }, + "edgeCollections": {}, + } ), ], ) -def test_adb_to_dgl( - adapter: ADBDGL_Adapter, name: str, metagraph: ArangoMetagraph -) -> None: - dgl_g = adapter.arangodb_to_dgl(name, metagraph) - assert_dgl_data(db, dgl_g, metagraph) +def test_validate_adb_metagraph(bad_metagraph: Dict[Any, Any]) -> None: + with pytest.raises(ADBMetagraphError): + validate_adb_metagraph(bad_metagraph) + + +@pytest.mark.parametrize( + "bad_metagraph", + [ + # bad node type + ( + { + "nodeTypes": { + ("a", "b", "c"): {}, + # other examples include: + # 1: {}, + # True: {} + } + } + ), + # bad edge type + ( + { + "edgeTypes": { + "b": {}, + # other examples include: + # 1: {}, + # True: {} + } + } + ), + # bad edge type 2 + ( + { + "edgeTypes": { + ("a", "b", 3): {}, + # other examples include: + # 1: {}, + # True: {} + } + } + ), + # bad data type metagraph + ( + { + "nodeTypes": { + "ntype_a": None, + # other examples include: + # "ntype_a": 1, + # "ntype_a": 'foo', + } + } + ), + # bad data type metagraph 2 + ({"nodeTypes": {"ntype_a": {"a", "b", 3}}}), + # bad meta_val + ( + { + "nodeTypes": { + "ntype_a'": { + "x": True, + # other example include: + # 'x': ('a'), + # 'x': (lambda: 1)(), + } + } + } + ), + # bad meta_val list + ( + { + "nodeTypes": { + "ntype_a'": { + "x": ["a", 3], + # other example include: + # 'x': ('a'), + # 'x': (lambda: 1)(), + } + } + } + ), + ], +) +def test_validate_dgl_metagraph(bad_metagraph: Dict[Any, Any]) -> None: + with pytest.raises(DGLMetagraphError): + validate_dgl_metagraph(bad_metagraph) @pytest.mark.parametrize( - "adapter, name, v_cols, e_cols", + "adapter, name, dgl_g, metagraph, \ + explicit_metagraph, overwrite_graph, batch_size, adb_import_kwargs", [ ( adbdgl_adapter, - "fraud-detection", - {"account", "Class", "customer"}, - {"accountHolder", "Relationship", "transaction"}, - ) + "Karate_1", + get_karate_graph(), + {"nodeTypes": {"Karate_1_N": {"label": "node_label"}}}, + False, + False, + 33, + {}, + ), + ( + adbdgl_adapter, + "Karate_2", + get_karate_graph(), + {"nodeTypes": {"Karate_2_N": {}}}, + True, + False, + 1000, + {}, + ), + ( + adbdgl_adapter, + "Social_1", + get_social_graph(), + { + "nodeTypes": { + "user": { + "features": "user_age", + "label": label_tensor_to_2_column_dataframe, + }, + "game": {"features": ["is_multiplayer", "is_free_to_play"]}, + }, + "edgeTypes": { + ("user", "plays", "game"): { + "features": ["hours_played", "is_satisfied_with_game"] + }, + }, + }, + True, + False, + 1, + {}, + ), + ( + adbdgl_adapter, + "Social_2", + get_social_graph(), + { + "edgeTypes": { + ("user", "plays", "game"): { + "features": ["hours_played", "is_satisfied_with_game"] + }, + }, + }, + True, + False, + 1000, + {}, + ), + ( + adbdgl_adapter, + "Social_3", + get_social_graph(), + {}, + False, + False, + None, + {}, + ), + ( + adbdgl_adapter, + "FakeHeterogeneous_1", + get_fake_hetero_dataset(), + { + "nodeTypes": { + "v0": {"features": "adb_node_features", "label": "adb_node_label"} + }, + "edgeTypes": {("v0", "e0", "v0"): {"features": "adb_edge_features"}}, + }, + True, + False, + None, + {}, + ), + ( + adbdgl_adapter, + "FakeHeterogeneous_2", + get_fake_hetero_dataset(), + {}, + False, + False, + None, + {}, + ), + ( + adbdgl_adapter, + "FakeHeterogeneous_3", + get_fake_hetero_dataset(), + { + "nodeTypes": {"v0": {"features", "label"}}, + "edgeTypes": {("v0", "e0", "v0"): {"features"}}, + }, + True, + True, + None, + {}, + ), ], ) -def test_adb_collections_to_dgl( - adapter: ADBDGL_Adapter, name: str, v_cols: Set[str], e_cols: Set[str] +def test_dgl_to_adb( + adapter: ADBDGL_Adapter, + name: str, + dgl_g: Union[DGLGraph, DGLHeteroGraph], + metagraph: DGLMetagraph, + explicit_metagraph: bool, + overwrite_graph: bool, + batch_size: Optional[int], + adb_import_kwargs: Any, ) -> None: - dgl_g = adapter.arangodb_collections_to_dgl( + db.delete_graph(name, drop_collections=True, ignore_missing=True) + adapter.dgl_to_arangodb( name, - v_cols, - e_cols, - ) - assert_dgl_data( - db, dgl_g, - metagraph={ - "vertexCollections": {col: set() for col in v_cols}, - "edgeCollections": {col: set() for col in e_cols}, - }, + metagraph, + explicit_metagraph, + overwrite_graph, + batch_size, + **adb_import_kwargs ) + assert_dgl_to_adb(name, dgl_g, metagraph, explicit_metagraph) + db.delete_graph(name, drop_collections=True) -@pytest.mark.parametrize( - "adapter, name", - [(adbdgl_adapter, "fraud-detection")], -) -def test_adb_graph_to_dgl(adapter: ADBDGL_Adapter, name: str) -> None: - arango_graph = db.graph(name) - v_cols = arango_graph.vertex_collections() - e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()} - - dgl_g: DGLGraph = adapter.arangodb_graph_to_dgl(name) - assert_dgl_data( - db, - dgl_g, - metagraph={ - "vertexCollections": {col: set() for col in v_cols}, - "edgeCollections": {col: set() for col in e_cols}, - }, - ) +def test_dgl_to_adb_with_controller() -> None: + name = "Karate_3" + data = get_karate_graph() + db.delete_graph(name, drop_collections=True, ignore_missing=True) + + ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb(name, data) + + for doc in db.collection(name + "_N"): # type: ignore + assert "foo" in doc + assert doc["foo"] == "bar" + + for edge in db.collection(name + "_E"): # type: ignore + assert "bar" in edge + assert edge["bar"] == "foo" + + db.delete_graph(name, drop_collections=True) @pytest.mark.parametrize( - "adapter, name, dgl_g, overwrite_graph, import_options", + "adapter, name, metagraph, dgl_g_old, batch_size", [ ( adbdgl_adapter, - "Clique", - get_clique_graph(), - False, - {"batch_size": 3, "on_duplicate": "replace"}, + "Karate", + { + "vertexCollections": { + "Karate_N": {"karate_label": "label"}, + }, + "edgeCollections": { + "Karate_E": {}, + }, + }, + get_karate_graph(), + 1, ), - (adbdgl_adapter, "Lollipop", get_lollipop_graph(), False, {"overwrite": True}), ( adbdgl_adapter, - "Hypercube", - get_hypercube_graph(), - False, - {"batch_size": 1000, "on_duplicate": "replace"}, + "Karate_2", + { + "vertexCollections": { + "Karate_2_N": {"karate_label": "label"}, + }, + "edgeCollections": { + "Karate_2_E": {}, + }, + }, + get_karate_graph(), + 33, ), ( adbdgl_adapter, "Hypercube", + { + "vertexCollections": { + "Hypercube_N": {"node_features": "node_features"}, + }, + "edgeCollections": { + "Hypercube_E": {"edge_features": "edge_features"}, + }, + }, get_hypercube_graph(), - False, - {"overwrite": True}, + 1000, ), - (adbdgl_adapter, "Karate", get_karate_graph(), False, {"overwrite": True}), ( adbdgl_adapter, "Social", + { + "vertexCollections": { + "user": {"node_features": "features", "label": "label"}, + "game": {"node_features": "features"}, + "topic": {}, + }, + "edgeCollections": { + "plays": {"edge_features": "features"}, + "follows": {}, + }, + }, get_social_graph(), - True, - {"on_duplicate": "replace"}, + 1, + ), + ( + adbdgl_adapter, + "Heterogeneous", + { + "vertexCollections": { + "v0": {"features": "features", "label": "label"}, + "v1": {"features": "features"}, + "v2": {"features": "features"}, + }, + "edgeCollections": { + "e0": {}, + }, + }, + get_fake_hetero_dataset(), + 1000, + ), + ( + adbdgl_adapter, + "HeterogeneousSimpleMetagraph", + { + "vertexCollections": { + "v0": {"features", "label"}, + "v1": {"features"}, + "v2": {"features"}, + }, + "edgeCollections": { + "e0": {}, + }, + }, + get_fake_hetero_dataset(), + None, + ), + ( + adbdgl_adapter, + "HeterogeneousOverComplicatedMetagraph", + { + "vertexCollections": { + "v0": {"features": {"features": None}, "label": {"label": None}}, + "v1": {"features": "features"}, + "v2": {"features": {"features": None}}, + }, + "edgeCollections": { + "e0": {}, + }, + }, + get_fake_hetero_dataset(), + None, + ), + ( + adbdgl_adapter, + "HeterogeneousUserDefinedFunctions", + { + "vertexCollections": { + "v0": { + "features": (lambda df: tensor(df["features"].to_list())), + "label": (lambda df: tensor(df["label"].to_list())), + }, + "v1": {"features": udf_features_df_to_tensor}, + "v2": {"features": udf_key_df_to_tensor("features")}, + }, + "edgeCollections": { + "e0": {}, + }, + }, + get_fake_hetero_dataset(), + None, ), ], ) -def test_dgl_to_adb( +def test_adb_to_dgl( adapter: ADBDGL_Adapter, name: str, - dgl_g: Union[DGLGraph, DGLHeteroGraph], - overwrite_graph: bool, - import_options: Any, + metagraph: ADBMetagraph, + dgl_g_old: Optional[Union[DGLGraph, DGLHeteroGraph]], + batch_size: Optional[None], +) -> None: + if dgl_g_old: + db.delete_graph(name, drop_collections=True, ignore_missing=True) + adapter.dgl_to_arangodb(name, dgl_g_old) + + dgl_g_new = adapter.arangodb_to_dgl(name, metagraph, batch_size=batch_size) + assert_adb_to_dgl(dgl_g_new, metagraph) + + if dgl_g_old: + db.delete_graph(name, drop_collections=True) + + +def test_adb_partial_to_dgl() -> None: + dgl_g = get_social_graph() + + name = "Social" + db.delete_graph(name, drop_collections=True, ignore_missing=True) + adbdgl_adapter.dgl_to_arangodb(name, dgl_g) + + metagraph: ADBMetagraph + + # Case 1: Partial edge collection import turns the graph homogeneous + metagraph = { + "vertexCollections": { + "user": {"features": "features", "label": "label"}, + }, + "edgeCollections": { + "follows": {}, + }, + } + + dgl_g_new = adbdgl_adapter.arangodb_to_dgl( + "HeterogeneousTurnedHomogeneous", metagraph + ) + + assert dgl_g_new.is_homogeneous + assert ( + dgl_g.ndata["features"]["user"].tolist() == dgl_g_new.ndata["features"].tolist() + ) + assert dgl_g.ndata["label"]["user"].tolist() == dgl_g_new.ndata["label"].tolist() + + # Grab the nodes from the Heterogeneous graph + from_nodes, to_nodes = dgl_g.edges(etype=("user", "follows", "user")) + # Grab the same nodes from the Homogeneous graph + from_nodes_new, to_nodes_new = dgl_g_new.edges(etype=None) + + assert from_nodes.tolist() == from_nodes_new.tolist() + assert to_nodes.tolist() == to_nodes_new.tolist() + + # Case 2: Partial edge collection import keeps the graph heterogeneous + metagraph = { + "vertexCollections": { + "user": {"features": "features", "label": "label"}, + "game": {"features": "features"}, + }, + "edgeCollections": {"follows": {}, "plays": {"features": "features"}}, + } + + dgl_g_new = adbdgl_adapter.arangodb_to_dgl( + "HeterogeneousWithOneLessNodeType", metagraph + ) + + assert type(dgl_g_new) is DGLHeteroGraph + assert set(dgl_g_new.ntypes) == {"user", "game"} + for n_type in dgl_g_new.ntypes: + for k, v in dgl_g_new.nodes[n_type].data.items(): + assert v.tolist() == dgl_g.nodes[n_type].data[k].tolist() + + for e_type in dgl_g_new.canonical_etypes: + for k, v in dgl_g_new.edges[e_type].data.items(): + assert v.tolist() == dgl_g.edges[e_type].data[k].tolist() + + db.delete_graph(name, drop_collections=True) + + +@pytest.mark.parametrize( + "adapter, name, v_cols, e_cols, dgl_g_old", + [ + ( + adbdgl_adapter, + "SocialGraph", + {"user", "game"}, + {"plays", "follows"}, + get_social_graph(), + ) + ], +) +def test_adb_collections_to_dgl( + adapter: ADBDGL_Adapter, + name: str, + v_cols: Set[str], + e_cols: Set[str], + dgl_g_old: Union[DGLGraph, DGLHeteroGraph], ) -> None: - adb_g = adapter.dgl_to_arangodb(name, dgl_g, overwrite_graph, **import_options) - assert_arangodb_data(name, dgl_g, adb_g) + if dgl_g_old: + db.delete_graph(name, drop_collections=True, ignore_missing=True) + adapter.dgl_to_arangodb(name, dgl_g_old) + + dgl_g_new = adapter.arangodb_collections_to_dgl( + name, + v_cols, + e_cols, + ) + + assert_adb_to_dgl( + dgl_g_new, + metagraph={ + "vertexCollections": {col: {} for col in v_cols}, + "edgeCollections": {col: {} for col in e_cols}, + }, + ) + + if dgl_g_old: + db.delete_graph(name, drop_collections=True) + + +@pytest.mark.parametrize( + "adapter, name, dgl_g_old", + [ + (adbdgl_adapter, "Heterogeneous", get_fake_hetero_dataset()), + ], +) +def test_adb_graph_to_dgl( + adapter: ADBDGL_Adapter, name: str, dgl_g_old: Union[DGLGraph, DGLHeteroGraph] +) -> None: + if dgl_g_old: + db.delete_graph(name, drop_collections=True, ignore_missing=True) + adapter.dgl_to_arangodb(name, dgl_g_old) + + dgl_g_new = adapter.arangodb_graph_to_dgl(name) + + graph = db.graph(name) + v_cols: Set[str] = graph.vertex_collections() # type: ignore + edge_definitions: List[Dict[str, Any]] = graph.edge_definitions() # type: ignore + e_cols: Set[str] = {c["edge_collection"] for c in edge_definitions} + + assert_adb_to_dgl( + dgl_g_new, + metagraph={ + "vertexCollections": {col: {} for col in v_cols}, + "edgeCollections": {col: {} for col in e_cols}, + }, + ) + + if dgl_g_old: + db.delete_graph(name, drop_collections=True) -def assert_dgl_data( - db: StandardDatabase, dgl_g: DGLGraph, metagraph: ArangoMetagraph +def test_full_cycle_imdb() -> None: + name = "imdb" + db.delete_graph(name, drop_collections=True, ignore_missing=True) + arango_restore(con, "tests/data/adb/imdb_dump") + db.create_graph( + name, + edge_definitions=[ + { + "edge_collection": "Ratings", + "from_vertex_collections": ["Users"], + "to_vertex_collections": ["Movies"], + }, + ], + ) + + adb_to_dgl_metagraph: ADBMetagraph = { + "vertexCollections": { + "Movies": { + "label": "Comedy", + "features": { + "Action": IdentityEncoder(dtype=long), + "Drama": IdentityEncoder(dtype=long), + # etc.... + }, + }, + "Users": { + "features": { + "Age": IdentityEncoder(dtype=long), + "Gender": CategoricalEncoder(), + } + }, + }, + "edgeCollections": {"Ratings": {"weight": "Rating"}}, + } + + dgl_g = adbdgl_adapter.arangodb_to_dgl(name, adb_to_dgl_metagraph) + assert_adb_to_dgl(dgl_g, adb_to_dgl_metagraph) + + dgl_to_adb_metagraph: DGLMetagraph = { + "nodeTypes": { + "Movies": { + "label": "comedy", + "features": ["action", "drama"], + }, + "Users": {"features": udf_users_features_tensor_to_df}, + }, + "edgeTypes": {("Users", "Ratings", "Movies"): {"weight": "rating"}}, + } + adbdgl_adapter.dgl_to_arangodb(name, dgl_g, dgl_to_adb_metagraph, overwrite=True) + assert_dgl_to_adb(name, dgl_g, dgl_to_adb_metagraph) + + db.delete_graph(name, drop_collections=True) + + +def assert_adb_to_dgl( + dgl_g: Union[DGLGraph, DGLHeteroGraph], metagraph: ADBMetagraph ) -> None: - has_one_ntype = len(metagraph["vertexCollections"]) == 1 - has_one_etype = len(metagraph["edgeCollections"]) == 1 - - for col, atribs in metagraph["vertexCollections"].items(): - num_nodes = dgl_g.num_nodes(col) - assert num_nodes == db.collection(col).count() - - for atrib in atribs: - assert atrib in dgl_g.ndata - if has_one_ntype: - assert len(dgl_g.ndata[atrib]) == num_nodes - else: - assert col in dgl_g.ndata[atrib] - assert len(dgl_g.ndata[atrib][col]) == num_nodes - - for col, atribs in metagraph["edgeCollections"].items(): - num_edges = dgl_g.num_edges(col) - assert num_edges == db.collection(col).count() - - canon_etype = dgl_g.to_canonical_etype(col) - for atrib in atribs: - assert atrib in dgl_g.edata - if has_one_etype: - assert len(dgl_g.edata[atrib]) == num_edges - else: - assert canon_etype in dgl_g.edata[atrib] - assert len(dgl_g.edata[atrib][canon_etype]) == num_edges - - -def assert_arangodb_data( + has_one_ntype = len(dgl_g.ntypes) == 1 + has_one_etype = len(dgl_g.canonical_etypes) == 1 + + # Maps ArangoDB Vertex _keys to DGL Node ids + adb_map: ADBMap = defaultdict(dict) + + for v_col, meta in metagraph["vertexCollections"].items(): + n_key = None if has_one_ntype else v_col + collection = db.collection(v_col) + assert collection.count() == dgl_g.num_nodes(n_key) + + df = DataFrame(collection.all()) + adb_map[v_col] = {adb_id: dgl_id for dgl_id, adb_id in enumerate(df["_key"])} + + assert_adb_to_dgl_meta(meta, df, dgl_g.nodes[n_key].data) + + et_df: DataFrame + v_cols: List[str] = list(metagraph["vertexCollections"].keys()) + for e_col, meta in metagraph["edgeCollections"].items(): + collection = db.collection(e_col) + assert collection.count() <= dgl_g.num_edges(None) + + df = DataFrame(collection.all()) + df[["from_col", "from_key"]] = df["_from"].str.split(pat="/", n=1, expand=True) + df[["to_col", "to_key"]] = df["_to"].str.split(pat="/", n=1, expand=True) + + for (from_col, to_col), count in ( + df[["from_col", "to_col"]].value_counts().items() + ): + edge_type = (from_col, e_col, to_col) + if from_col not in v_cols or to_col not in v_cols: + continue + + e_key = None if has_one_etype else edge_type + assert count == dgl_g.num_edges(e_key) + + et_df = df[(df["from_col"] == from_col) & (df["to_col"] == to_col)] + from_nodes = et_df["from_key"].map(adb_map[from_col]).tolist() + to_nodes = et_df["to_key"].map(adb_map[to_col]).tolist() + + assert from_nodes == dgl_g.edges(etype=e_key)[0].tolist() + assert to_nodes == dgl_g.edges(etype=e_key)[1].tolist() + + assert_adb_to_dgl_meta(meta, et_df, dgl_g.edges[e_key].data) + + +def assert_adb_to_dgl_meta( + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + df: DataFrame, + dgl_data: Union[NodeSpace, EdgeSpace], +) -> None: + valid_meta: Dict[str, ADBMetagraphValues] + valid_meta = meta if type(meta) is dict else {m: m for m in meta} + + for k, v in valid_meta.items(): + assert k in dgl_data + assert type(dgl_data[k]) is Tensor + + t = dgl_data[k].tolist() + if type(v) is str: + data = df[v].tolist() + assert len(data) == len(t) + assert data == t + + if type(v) is dict: + data = [] + for attr, encoder in v.items(): + if encoder is None: + data.append(tensor(df[attr].to_list())) + if callable(encoder): + data.append(encoder(df[attr])) + + cat_data = cat(data, dim=-1).tolist() + assert len(cat_data) == len(t) + assert cat_data == t + + if callable(v): + data = v(df).tolist() + assert len(data) == len(t) + assert data == t + + +def assert_dgl_to_adb( name: str, dgl_g: Union[DGLGraph, DGLHeteroGraph], - adb_g: ArangoGraph, + metagraph: DGLMetagraph, + explicit_metagraph: bool = False, ) -> None: - is_default_type = dgl_g.canonical_etypes == adbdgl_adapter.DEFAULT_CANONICAL_ETYPE - - node: Tensor - for ntype in dgl_g.ntypes: - adb_v_col = f"{name}_N" if is_default_type else ntype - attributes = dgl_g.node_attr_schemes(ntype).keys() - col = adb_g.vertex_collection(adb_v_col) - - for node in dgl_g.nodes(ntype): - vertex = col.get(str(node.item())) - assert vertex - for atrib in attributes: - assert atrib in vertex - - from_node: Tensor - to_node: Tensor - for c_etype in dgl_g.canonical_etypes: - dgl_from_col, dgl_e_col, dgl_to_col = c_etype - attributes = dgl_g.edge_attr_schemes(c_etype).keys() - - adb_e_col = f"{name}_E" if is_default_type else dgl_e_col - adb_from_col = f"{name}_N" if is_default_type else dgl_from_col - adb_to_col = f"{name}_N" if is_default_type else dgl_to_col - - col = adb_g.edge_collection(adb_e_col) - - from_nodes, to_nodes = dgl_g.edges(etype=c_etype) - for from_node, to_node in zip(from_nodes, to_nodes): - edge = col.find( - { - "_from": f"{adb_from_col}/{str(from_node.item())}", - "_to": f"{adb_to_col}/{str(to_node.item())}", - } - ).next() - assert edge - for atrib in attributes: - assert atrib in edge + has_one_ntype = len(dgl_g.ntypes) == 1 + has_one_etype = len(dgl_g.canonical_etypes) == 1 + has_default_canonical_etypes = dgl_g.canonical_etypes == [("_N", "_E", "_N")] + + node_types: List[str] + edge_types: List[DGLCanonicalEType] + explicit_metagraph = metagraph != {} and explicit_metagraph + if explicit_metagraph: + node_types = metagraph.get("nodeTypes", {}).keys() # type: ignore + edge_types = metagraph.get("edgeTypes", {}).keys() # type: ignore + + elif has_default_canonical_etypes: + n_type = name + "_N" + node_types = [n_type] + edge_types = [(n_type, name + "_E", n_type)] + + else: + node_types = dgl_g.ntypes + edge_types = dgl_g.canonical_etypes + + n_meta = metagraph.get("nodeTypes", {}) + for n_type in node_types: + n_key = None if has_one_ntype else n_type + collection = db.collection(n_type) + assert collection.count() == dgl_g.num_nodes(n_key) + + df = DataFrame(collection.all()) + meta = n_meta.get(n_type, {}) + assert_dgl_to_adb_meta(df, meta, dgl_g.nodes[n_key].data, explicit_metagraph) + + e_meta = metagraph.get("edgeTypes", {}) + for e_type in edge_types: + e_key = None if has_one_etype else e_type + from_col, e_col, to_col = e_type + collection = db.collection(e_col) + + df = DataFrame(collection.all()) + df[["from_col", "from_key"]] = df["_from"].str.split(pat="/", n=1, expand=True) + df[["to_col", "to_key"]] = df["_to"].str.split(pat="/", n=1, expand=True) + + et_df = df[(df["from_col"] == from_col) & (df["to_col"] == to_col)] + assert len(et_df) == dgl_g.num_edges(e_key) + + from_nodes = dgl_g.edges(etype=e_key)[0].tolist() + to_nodes = dgl_g.edges(etype=e_key)[1].tolist() + + assert from_nodes == et_df["from_key"].astype(int).tolist() + assert to_nodes == et_df["to_key"].astype(int).tolist() + + meta = e_meta.get(e_type, {}) + assert_dgl_to_adb_meta(et_df, meta, dgl_g.edges[e_key].data, explicit_metagraph) + + +def assert_dgl_to_adb_meta( + df: DataFrame, + meta: Union[Set[str], Dict[Any, DGLMetagraphValues]], + dgl_data: Union[NodeSpace, EdgeSpace], + explicit_metagraph: bool, +) -> None: + valid_meta: Dict[Any, DGLMetagraphValues] + valid_meta = meta if type(meta) is dict else {m: m for m in meta} + + if explicit_metagraph: + dgl_keys = set(valid_meta.keys()) + else: + dgl_keys = dgl_data.keys() + + for k in dgl_keys: + data = dgl_data[k] + meta_val = valid_meta.get(k, str(k)) + + assert len(data) == len(df) + + if type(data) is Tensor: + if type(meta_val) is str: + assert meta_val in df + assert df[meta_val].tolist() == data.tolist() + + if type(meta_val) is list: + assert all([e in df for e in meta_val]) + assert df[meta_val].values.tolist() == data.tolist() + + if callable(meta_val): + udf_df = meta_val(data, DataFrame(index=range(len(data)))) + assert all([column in df for column in udf_df.columns]) + for column in udf_df.columns: + assert df[column].tolist() == udf_df[column].tolist() diff --git a/tests/assets/arangorestore b/tests/tools/arangorestore similarity index 100% rename from tests/assets/arangorestore rename to tests/tools/arangorestore