From 582da57cad8aa8352034f45fa18dbbbf872c4b13 Mon Sep 17 00:00:00 2001 From: aMahanna <anthony.mahanna@gmail.com> Date: Fri, 5 Aug 2022 16:45:12 -0400 Subject: [PATCH] new: adbdgl 3.0.0 notebook --- examples/ArangoDB_DGL_Adapter.ipynb | 1030 +++++++++++++++++---------- 1 file changed, 638 insertions(+), 392 deletions(-) diff --git a/examples/ArangoDB_DGL_Adapter.ipynb b/examples/ArangoDB_DGL_Adapter.ipynb index 918fecd..93039c4 100644 --- a/examples/ArangoDB_DGL_Adapter.ipynb +++ b/examples/ArangoDB_DGL_Adapter.ipynb @@ -15,7 +15,7 @@ "id": "U1d45V4OeG89" }, "source": [ - "<a href=\"https://colab.research.google.com/github/arangoml/dgl-adapter/blob/2.1.0/examples/ArangoDB_DGL_Adapter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + "<a href=\"https://colab.research.google.com/github/arangoml/dgl-adapter/blob/3.0.0/examples/ArangoDB_DGL_Adapter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { @@ -34,7 +34,7 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 2.1.0\n", + "Version: 3.0.0\n", "\n", "Objective: Export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database, to [Deep Graph Library](https://www.dgl.ai/) (DGL), a python package for graph neural networks, and vice-versa." ] @@ -57,9 +57,9 @@ "outputs": [], "source": [ "%%capture\n", - "!pip install adbdgl-adapter==2.1.0\n", + "!pip install adbdgl-adapter==3.0.0\n", "!pip install adb-cloud-connector\n", - "!git clone -b 2.1.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", + "!git clone -b 3.0.0 --single-branch https://github.com/arangoml/dgl-adapter.git\n", "\n", "## For drawing purposes \n", "!pip install matplotlib\n", @@ -70,26 +70,27 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "niijQHqBM6zp" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "niijQHqBM6zp", + "outputId": "77df8f72-4000-44e8-9dd6-c56bbf33c07d" }, "outputs": [], "source": [ "# All imports\n", "\n", + "import pandas\n", + "import torch\n", "import dgl\n", - "from dgl import remove_self_loop\n", - "from dgl.data import MiniGCDataset\n", "from dgl.data import KarateClubDataset\n", "\n", - "import torch\n", - "from torch import Tensor\n", - "\n", - "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", - "from adbdgl_adapter.typings import Json, ArangoMetagraph, DGLCanonicalEType, DGLDataDict\n", - "\n", "from arango import ArangoClient\n", "from adb_cloud_connector import get_temp_credentials\n", "\n", + "from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller\n", + "from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder\n", + "\n", "import json\n", "import logging\n", "\n", @@ -130,7 +131,7 @@ "base_uri": "https://localhost:8080/" }, "id": "vf0350qvj8up", - "outputId": "fbf300df-5dcd-44e8-a746-cb554eba1dd8" + "outputId": "bb473200-893d-4d4e-ed6d-239ec497d0e3" }, "outputs": [], "source": [ @@ -163,7 +164,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oOS3AVAnkQEV", - "outputId": "3a7403db-d11b-4f7a-a0b7-6e8220186273" + "outputId": "5b5feaaa-2a6f-4e0e-ef89-68b9e365a6db" }, "outputs": [], "source": [ @@ -199,7 +200,7 @@ "base_uri": "https://localhost:8080/" }, "id": "meLon-KgkU4h", - "outputId": "fa57e121-5294-45f9-b3d0-3a2cfa212da7" + "outputId": "7517b39b-adfa-426d-ccae-89254cf642b5" }, "outputs": [], "source": [ @@ -237,7 +238,7 @@ "base_uri": "https://localhost:8080/" }, "id": "zTebQ0LOlsGA", - "outputId": "f5c06fec-a3e3-41fb-b478-42e492af07de" + "outputId": "c871096b-b06e-4cd8-ad56-06758090600d" }, "outputs": [], "source": [ @@ -280,7 +281,7 @@ "base_uri": "https://localhost:8080/" }, "id": "KsxNujb0mSqZ", - "outputId": "0cf12da9-c754-41a3-9496-5aea0a0faac9" + "outputId": "0b7b4106-7385-4489-e49a-399efbef0cb8" }, "outputs": [], "source": [ @@ -323,7 +324,7 @@ "base_uri": "https://localhost:8080/" }, "id": "2ekGwnJDeG8-", - "outputId": "02cf35c6-9416-44fb-be44-5c0f517e0f78" + "outputId": "84a1c36b-3dc1-47e2-dadf-8a4ebefd98c0" }, "outputs": [], "source": [ @@ -359,7 +360,7 @@ "id": "BM0iRYPDeG8_" }, "source": [ - "For demo purposes, we will be using the [ArangoDB Fraud Detection example graph](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." + "For demo purposes, we will be using the [ArangoDB IMDB example graph](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)." ] }, { @@ -370,12 +371,38 @@ "base_uri": "https://localhost:8080/" }, "id": "7bgGJ3QkeG8_", - "outputId": "15b25959-5a2f-4d1c-852e-5019845716a4" + "outputId": "1f490370-72f3-4d1b-8950-ef1d0f690218" }, "outputs": [], "source": [ "!chmod -R 755 dgl-adapter/\n", - "!./dgl-adapter/tests/assets/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/examples/data/fraud_dump\" --include-system-collections true" + "!./dgl-adapter/tests/tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/tests/data/adb/imdb_dump\" --include-system-collections true" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XLiXYJPRlVYZ", + "outputId": "2666c5b3-1f62-4bfc-c9af-53bc53f0ffd8" + }, + "outputs": [], + "source": [ + "# Create the IMDB graph\n", + "db.delete_graph(\"imdb\", ignore_missing=True)\n", + "db.create_graph(\n", + " \"imdb\",\n", + " edge_definitions=[\n", + " {\n", + " \"edge_collection\": \"Ratings\",\n", + " \"from_vertex_collections\": [\"Users\"],\n", + " \"to_vertex_collections\": [\"Movies\"],\n", + " },\n", + " ],\n", + ")" ] }, { @@ -404,7 +431,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oG496kBeeG9A", - "outputId": "792a3ad2-3d04-4132-d878-a5e52c58dc17" + "outputId": "e5d8657f-a644-4493-ca16-16a300ac4a87" }, "outputs": [], "source": [ @@ -414,36 +441,35 @@ { "cell_type": "markdown", "metadata": { - "id": "uByvwf9feG9A" + "id": "bvzJXSHHTi3v" }, "source": [ - "# <u>ArangoDB to DGL</u>\n", - "\n" + "# <u>DGL to ArangoDB</u>" ] }, { "cell_type": "markdown", "metadata": { - "id": "ZrEDmtqCVD0W" + "id": "UafSB_3JZNwK" }, "source": [ - "#### Via ArangoDB Graph" + "#### Karate Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "H8nlvWCryPW0" + "id": "tx-tjPfx0U_h" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Graph\n", + "Data\n", + "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_graph_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L198-L213)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. " + "Notes\n", + "* The `name` parameter in this case is simply for naming your ArangoDB graph." ] }, { @@ -451,54 +477,70 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 577, + "referenced_widgets": [ + "61d2a0426c324309ab51111933276e3d", + "77c208846c1e4503bc22a5b5504f89ee", + "2d1fc41d509e481cb779603827359184", + "87d9c9de620847f48b4088e8577cd653" + ] }, - "id": "zZ-Hu3lLVHgd", - "outputId": "d1c38c22-eebb-456d-8e4c-140ddd9baed8" + "id": "eRVbiBy4ZdE4", + "outputId": "74ac6cb8-824b-443a-ad6e-9f36b23060a1" }, "outputs": [], "source": [ - "# Define graph name\n", - "graph_name = \"fraud-detection\"\n", + "# Create the DGL graph & draw it\n", + "dgl_karate_graph = KarateClubDataset()[0]\n", + "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", "\n", - "# Create DGL graph from ArangoDB graph\n", - "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name)\n", + "name = \"Karate\"\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = aadbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graph\n", + "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", + "\n", + "# You can also provide valid Python-Arango Import Bulk options to the command above, like such:\n", + "# adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph, batch_size=5, on_duplicate=\"replace\")\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk\n", "\n", - "# Show graph data\n", "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "RQ4CknYfUEuz" + "id": "CNj1xKhwoJoL" }, "source": [ - "#### Via ArangoDB Collections" + "\n", + "#### FakeHeterogeneous Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "bRcCmqWGy1Kf" + "id": "CZ1UX9YX1Zzo" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_collections_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L169-L196)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph." ] }, { @@ -506,55 +548,84 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 408, + "referenced_widgets": [ + "3fc8b14d794a46118b328893bd216405", + "c7e222474ff445fe86e4e599848b2ae2", + "289a6e16c3d640c29d96edf09908bd0f", + "61f3832c906445a3ab7e7ba9b41c0127", + "99bbe81a24db49ff9352987fd97649cd", + "21e50aa61c3d4de19b5cc0bbe27d53c9", + "f9fdfe6ce44e4e1c8f513f82efca3e0d", + "9b2b3abbe2c04af0bc232c9b16bfd90d", + "8444e147be8f44aba06ec1f8a880104e", + "80e69b3aa98b44e295efe3940c1146c2", + "ec7b8b0b853f463fa079dda845891391", + "dd2376f84c794b4989f385a5bb147bd8" + ] }, - "id": "i4XOpdRLUNlJ", - "outputId": "4d53a3d0-316b-40c2-d841-5fb29fa1358b" + "id": "jbJsvMMaoJoT", + "outputId": "c1606984-c2ef-41c1-e8b1-78a4ae40d93c" }, "outputs": [], "source": [ - "# Define collection names\n", - "vertex_collections = {\"account\", \"Class\", \"customer\"}\n", - "edge_collections = {\"accountHolder\", \"Relationship\", \"transaction\"}\n", + "# Create the PyG graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", "\n", - "# Create DGL from ArangoDB collections\n", - "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections)\n", + "print(hetero_graph)\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "name = \"FakeHetero\"\n", + "\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph)\n", "\n", - "# Show graph data\n", "print('\\n--------------------')\n", - "print(dgl_g)\n", - "print(dgl_g.ntypes)\n", - "print(dgl_g.etypes)" + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "qEH6OdSB23Ya" + "id": "n08RC_GtkDrC" }, "source": [ - "#### Via ArangoDB Metagraph" + "\n", + "#### FakeHeterogeneous Graph with a DGL-ArangoDB metagraph" ] }, { "cell_type": "markdown", "metadata": { - "id": "PipFzJ0HzTMA" + "id": "rUD_y0yxkDrK" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `metagraph` parameter is an optional object mapping the PyG keys of the node & edge data to strings, list of strings, or user-defined functions." ] }, { @@ -562,69 +633,123 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 408, + "referenced_widgets": [ + "345a5984959c4e57b7e2715fa8eeef8f", + "99e6613c4187459396eea503453934cb", + "968020b1388e4883843575d9198af1cd", + "f1a08470110e4099af2a3d4cf4d0f956", + "6744eb60dfa04a8598fca3b998ce3077", + "09d25097c75c4fa8a2c7376f1965afc5", + "cb8167f00277413eaaa2ad6e0e162fab", + "8128e6d80fcb4a8ca0a72097bb8b6521", + "575205f1a4e64c5d977e69d4939a5605", + "d20843bfa9064d56b37aaea011789a26", + "8bf075c6f7834d3fa905b7ddc37cf128", + "b080f26fe35241fb9cca48e97bc9ef0c" + ] }, - "id": "7Kz8lXXq23Yk", - "outputId": "7804e7ba-3760-4eb5-8669-f6fa20948262" + "id": "xAdjZiJ8kDrK", + "outputId": "2822ed4b-8199-48e2-a753-4b1f60d648a0" }, "outputs": [], "source": [ - "# Define Metagraph\n", - "fraud_detection_metagraph = {\n", - " \"vertexCollections\": {\n", - " \"account\": {\"rank\", \"Balance\", \"customer_id\"},\n", - " \"Class\": {\"concrete\"},\n", - " \"customer\": {\"rank\"},\n", + "# Create the PyG graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "print(hetero_graph)\n", + "\n", + "name = \"FakeHetero\"\n", + "\n", + "# Define the metagraph\n", + "def label_tensor_to_2_column_dataframe(dgl_tensor):\n", + " \"\"\"\n", + " A user-defined function to create two\n", + " ArangoDB attributes out of the 'user' label tensor\n", + "\n", + " NOTE: user-defined functions must return a Pandas Dataframe\n", + " \"\"\"\n", + " label_map = {0: \"Class A\", 1: \"Class B\", 2: \"Class C\"}\n", + "\n", + " df = pandas.DataFrame(columns=[\"label_num\", \"label_str\"])\n", + " df[\"label_num\"] = dgl_tensor.tolist()\n", + " df[\"label_str\"] = df[\"label_num\"].map(label_map)\n", + "\n", + " return df\n", + "\n", + "\n", + "metagraph = {\n", + " \"nodeTypes\": {\n", + " \"user\": {\n", + " \"features\": \"user_age\", # 1) you can specify a string value for attribute renaming\n", + " \"label\": label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame\n", + " },\n", + " # 3) You can specify set of strings if you want to preserve the same PyG attribute names for the node/edge type\n", + " \"game\": {\"features\"} # this is equivalent to {\"features\": \"features\"}\n", " },\n", - " \"edgeCollections\": {\n", - " \"accountHolder\": {},\n", - " \"Relationship\": {},\n", - " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\"},\n", + " \"edgeTypes\": {\n", + " (\"user\", \"plays\", \"game\"): {\n", + " # 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance)\n", + " \"features\": [\"hours_played\", \"is_satisfied_with_game\"]\n", + " },\n", " },\n", "}\n", "\n", - "# Create DGL Graph from attributes\n", - "dgl_g = adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", "\n", - "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", - "# dgl_g = adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection', fraud_detection_metagraph, ttl=1000, stream=True)\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "# Create the ArangoDB graphs\n", + "adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=False)\n", "\n", - "# Show graph data\n", - "print('\\n--------------')\n", - "print(dgl_g)\n", - "print('\\n--------------')\n", - "print(dgl_g.ndata)\n", - "print('--------------\\n')\n", - "print(dgl_g.edata)" + "# Create the ArangoDB graph with `explicit_metagraph=True`\n", + "# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.\n", + "# Only 'user', 'game', and ('user', 'plays', 'game') will be brought over (i.e 'topic', ('user', 'follows', 'user'), ... are ignored)\n", + "## adb_hetero_graph = adbdgl_adapter.dgl_to_arangodb(name, hetero_graph, metagraph, explicit_metagraph=True)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "DqIKT1lO4ASw" + "id": "mk6m0hBRkkkT" }, "source": [ - "#### Via ArangoDB Metagraph with a custom controller and verbose logging" + "\n", + "#### FakeHeterogeneous Graph with a user-defined ADBDGL Controller" ] }, { "cell_type": "markdown", "metadata": { - "id": "PGkGh_KjzlYM" + "id": "KG7kFoOUkkkb" }, "source": [ - "Data source\n", - "* ArangoDB Fraud-Detection Collections\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L70-L167)\n", - "* [`adbdgl_adapter.controller._adb_attribute_to_dgl_feature()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L21-L47)\n", + "API\n", + "* `adbdgl_adapter.adapter.dgl_to_arangodb()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your DGL graph.\n", - "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our ArangoDB vertex/edge attributes into DGL node/edge features. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." + "Notes\n", + "* The `name` parameter is used to name your ArangoDB graph.\n", + "* The `ADBDGL_Controller` is an optional user-defined class for controlling how nodes & edges are handled when transitioning from PyG to ArangoDB. **It is interpreted as the alternative to the `metagraph` parameter.**" ] }, { @@ -632,143 +757,158 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 443, + "referenced_widgets": [ + "ea5e9803c5de4d2bbb48782069b9829b", + "3f633be94c7d466ea40571e805a76948", + "96e57d98afce44cd8269204dd19ff6e0", + "da43ef4a8c6a41f9bda153a0cd14c2d7", + "3bc228aa98454dc59a604c8f7ff6b2a0", + "65138d18c9c449d1aaaad387293c5ede", + "3ea99b2a6b4246d3abf628ca743f9f24", + "841ce4f5d391457e858c3c48185e259d", + "987bf80aee4b4b97bfad1699f8384af8", + "4ab3c113235746cab5fde158756ab420", + "09e8c93741bf45acb69ba9e757107564", + "d7d06973b2984eb19fa050409bf62222" + ] }, - "id": "U4_vSdU_4AS4", - "outputId": "8af82665-9ae6-40d4-ada2-248edd993291" + "id": "A-DtrD2Ykkkb", + "outputId": "f2672554-16e4-4b88-e24b-f567ff13bb3f" }, "outputs": [], "source": [ - "# Define Metagraph\n", - "fraud_detection_metagraph = {\n", - " \"vertexCollections\": {\n", - " \"account\": {\"rank\"},\n", - " \"Class\": {\"concrete\", \"name\"},\n", - " \"customer\": {\"Sex\", \"Ssn\", \"rank\"},\n", - " },\n", - " \"edgeCollections\": {\n", - " \"accountHolder\": {},\n", - " \"Relationship\": {},\n", - " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\", \"transaction_date\", \"trans_time\"},\n", - " },\n", - "}\n", + "# Create the PyG graph\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", "\n", - "# A user-defined Controller class is REQUIRED when converting non-numerical\n", - "# ArangoDB attributes to DGL features.\n", - "class FraudDetection_ADBDGL_Controller(ADBDGL_Controller):\n", - " \"\"\"ArangoDB-DGL controller.\n", + "print(hetero_graph)\n", "\n", - " Responsible for controlling how ArangoDB attributes\n", - " are converted into DGL features, and vice-versa.\n", + "name = \"FakeHetero\"\n", "\n", - " You can derive your own custom ADBDGL_Controller if you want to maintain\n", - " consistency between your ArangoDB attributes & your DGL features.\n", - " \"\"\"\n", + "# Create a custom ADBDGL_Controller\n", + "class Custom_ADBDGL_Controller(ADBDGL_Controller):\n", + " def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict:\n", + " \"\"\"Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.\n", "\n", - " def _adb_attribute_to_dgl_feature(self, key: str, col: str, val):\n", + " :param dgl_node: The DGL node object to (optionally) modify.\n", + " :param node_type: The DGL Node Type of the node.\n", + " :return: The DGL Node object\n", " \"\"\"\n", - " Given an ArangoDB attribute key, its assigned value (for an arbitrary document),\n", - " and the collection it belongs to, convert it to a valid\n", - " DGL feature: https://docs.dgl.ai/en/0.6.x/guide/graph-feature.html.\n", - "\n", - " NOTE: You must override this function if you want to transfer non-numerical\n", - " ArangoDB attributes to DGL (DGL only accepts 'attributes' (a.k.a features)\n", - " of numerical types). Read more about DGL features here:\n", - " https://docs.dgl.ai/en/0.6.x/new-tutorial/2_dglgraph.html#assigning-node-and-edge-features-to-graph.\n", - "\n", - " :param key: The ArangoDB attribute key name\n", - " :type key: str\n", - " :param col: The ArangoDB collection of the ArangoDB document.\n", - " :type col: str\n", - " :param val: The assigned attribute value of the ArangoDB document.\n", - " :type val: Any\n", - " :return: The attribute's representation as a DGL Feature\n", - " :rtype: Any\n", + " dgl_node[\"foo\"] = \"bar\"\n", + " return dgl_node\n", + "\n", + " def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict:\n", + " \"\"\"Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.\n", + "\n", + " :param dgl_edge: The DGL edge object to (optionally) modify.\n", + " :param edge_type: The Edge Type of the DGL edge. Formatted\n", + " as (from_collection, edge_collection, to_collection)\n", + " :return: The DGL Edge object\n", " \"\"\"\n", - " try:\n", - " if col == \"transaction\":\n", - " if key == \"transaction_date\":\n", - " return int(str(val).replace(\"-\", \"\"))\n", - " \n", - " if key == \"trans_time\":\n", - " return int(str(val).replace(\":\", \"\"))\n", - " \n", - " if col == \"customer\":\n", - " if key == \"Sex\":\n", - " return {\n", - " \"M\": 0,\n", - " \"F\": 1\n", - " }.get(val, -1)\n", - "\n", - " if key == \"Ssn\":\n", - " return int(str(val).replace(\"-\", \"\"))\n", - "\n", - " if col == \"Class\":\n", - " if key == \"name\":\n", - " return {\n", - " \"Bank\": 0,\n", - " \"Branch\": 1,\n", - " \"Account\": 2,\n", - " \"Customer\": 3\n", - " }.get(val, -1)\n", - "\n", - " except (ValueError, TypeError, SyntaxError):\n", - " return 0\n", - "\n", - " # Rely on the parent Controller as a final measure\n", - " return super()._adb_attribute_to_dgl_feature(key, col, val)\n", - "\n", - "# Instantiate the new adapter\n", - "fraud_adbdgl_adapter = ADBDGL_Adapter(db, FraudDetection_ADBDGL_Controller())\n", - "\n", - "# You can also change the adapter's logging level for access to \n", - "# silent, regular, or verbose logging (logging.WARNING, logging.INFO, logging.DEBUG)\n", - "fraud_adbdgl_adapter.set_logging(logging.DEBUG) # verbose logging\n", - "\n", - "# Create DGL Graph from attributes\n", - "dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + " dgl_edge[\"bar\"] = \"foo\"\n", + " return dgl_edge\n", "\n", - "# Show graph data\n", - "print('\\n--------------')\n", - "print(dgl_g)\n", - "print('\\n--------------')\n", - "print(dgl_g.ndata)\n", - "print('--------------\\n')\n", - "print(dgl_g.edata)" + "# Delete the graph if it already exists\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb(name, hetero_graph)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"URL: \" + con[\"url\"])\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\\n\")" ] }, { "cell_type": "markdown", "metadata": { - "id": "bvzJXSHHTi3v" + "id": "uByvwf9feG9A" }, "source": [ - "# <u>DGL to ArangoDB</u>" + "# <u>ArangoDB to DGL</u>\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165, + "referenced_widgets": [ + "c6cffa0a64434e56879ba2a8c9de018a", + "0083494093574c50952dd066502a708d", + "1dea128bde204a8fa53e094e014183fe", + "50f8ff3637ee4fc7af8c811cd5d177be", + "6582a9d3fe044d5380d8e918f3bc5a6d", + "40da9dd52dd6443684b990f74b6cb876", + "80d19dc0d20842c3b5c7313c0ad23d24", + "0478c90ef8234f3a8987dbe9cd3030b2", + "c61e3997250d4f93a8e0494db674892d", + "97e7543f202749c197515a9c5c79adbe", + "88e83ddc1ca1464291e1631b8fced847", + "a9c14a3f339445338119631c8e56ff68" + ] + }, + "id": "rnMe3iMz2K7j", + "outputId": "b1485ec1-64bf-43d5-a5fe-7d6bd5fc2da1" + }, + "outputs": [], + "source": [ + "# Start from scratch! (with the same DGL graph)\n", + "hetero_graph = dgl.heterograph({\n", + " (\"user\", \"follows\", \"user\"): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"follows\", \"topic\"): (torch.tensor([1, 1]), torch.tensor([1, 2])),\n", + " (\"user\", \"plays\", \"game\"): (torch.tensor([0, 3]), torch.tensor([3, 4])),\n", + "})\n", + "hetero_graph.nodes[\"user\"].data[\"features\"] = torch.tensor([21, 44, 16, 25])\n", + "hetero_graph.nodes[\"user\"].data[\"label\"] = torch.tensor([1, 2, 0, 1])\n", + "hetero_graph.nodes[\"game\"].data[\"features\"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])\n", + "hetero_graph.edges[(\"user\", \"plays\", \"game\")].data[\"features\"] = torch.tensor([[6, 1], [1000, 0]])\n", + "\n", + "db.delete_graph(\"FakeHetero\", drop_collections=True, ignore_missing=True)\n", + "adbdgl_adapter.dgl_to_arangodb(\"FakeHetero\", hetero_graph)" ] }, { "cell_type": "markdown", "metadata": { - "id": "UafSB_3JZNwK" + "id": "ZrEDmtqCVD0W" }, "source": [ - "#### Karate Graph" + "#### Via ArangoDB Graph" ] }, { "cell_type": "markdown", "metadata": { - "id": "tx-tjPfx0U_h" + "id": "H8nlvWCryPW0" }, "source": [ - "Data source\n", - "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_graph_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameter in this case is simply for naming your ArangoDB graph." + "Notes\n", + "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. \n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." ] }, { @@ -777,63 +917,67 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 683 + "height": 184, + "referenced_widgets": [ + "9403e71c2bbe46bd9e6d49d555264554", + "34c4ef0c4aa5454893c0f0fa35902fbd", + "1690574b32cc4b48a8b87520458d5066", + "a9edf4f85a4a4504b155608bb740178a", + "fd2db543279f4a13ab6376b9c23160e0", + "5c310145af4f4c90b659dee771185ab6", + "31a9f782f36d407f8cc42b19679c5c2c", + "9fd8d07a43cd4c06a2d448047ede846c", + "2c2900512b5244d3a0fcaf7409446d0e", + "c5d064af7f4a49dca6716f98d052e951" + ] }, - "id": "eRVbiBy4ZdE4", - "outputId": "c629be2d-1bc9-4539-c7f2-d3ae46676659" + "id": "zZ-Hu3lLVHgd", + "outputId": "85729665-feb3-4382-e84b-4286162581c3" }, "outputs": [], "source": [ - "# Create the DGL graph & draw it\n", - "dgl_karate_graph = KarateClubDataset()[0]\n", - "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", - "\n", - "name = \"Karate\"\n", + "# Define graph name\n", + "name = \"FakeHetero\"\n", "\n", - "# Delete the graph if it already exists\n", - "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "# Create DGL graph from the ArangoDB graph\n", + "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(name)\n", "\n", - "# Create the ArangoDB graph\n", - "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", - "\n", - "# You can also provide valid Python-Arango Import Bulk options to the command above, like such:\n", - "# adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph, batch_size=5, on_duplicate=\"replace\")\n", - "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", + "# See the full parameter list at https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", "\n", + "# Show graph data\n", "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(f\"View the created graph here: {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", - "print(f\"View the original graph below:\\n\")" + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" ] }, { "cell_type": "markdown", "metadata": { - "id": "gshTlSX_ZZsS" + "id": "RQ4CknYfUEuz" }, "source": [ - "\n", - "#### MiniGCDataset Graphs" + "#### Via ArangoDB Collections" ] }, { "cell_type": "markdown", "metadata": { - "id": "KaExiE2x0-M6" + "id": "bRcCmqWGy1Kf" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_collections_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph." + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance.\n", + "* Due to risk of ambiguity, this method does **not** carry over ArangoDB attributes to DGL." ] }, { @@ -842,82 +986,64 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 253, + "referenced_widgets": [ + "f01997b9b43d43368d632e26ba9732ad", + "14b29dc1f2b8454fa9acc1d79dcd4870", + "5f5c119141a24cab907ceb2da27e0244", + "46b88027e41a43578ebcc47513dd6911", + "7a43c4b816da4a40b0eed167a85eef22", + "eb376d5cf782424aaccbce31f0d3ede5", + "7a4db2b18c634bef932fb9b1157d4af1", + "b5be8c1e4ab3415c9fffbb61aeb0fff3", + "4e085418ce1b41e1bc24ad6acea92fc4", + "7b5dba3f4d50466eb2071cb13548ef1b" + ] }, - "id": "dADiexlAioGH", - "outputId": "9921ec34-b860-49e8-f8cb-0b403029ead4" + "id": "i4XOpdRLUNlJ", + "outputId": "c0fa5973-3e46-4227-8b0c-48b4f14736e5" }, "outputs": [], "source": [ - "# Load the dgl graphs & draw:\n", - "## 1) Lollipop Graph\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "plt.figure(1)\n", - "nx.draw(dgl_lollipop_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 2) Hypercube Graph\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "plt.figure(2)\n", - "nx.draw(dgl_hypercube_graph.to_networkx(), with_labels=True)\n", - "\n", - "## 3) Clique Graph\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "plt.figure(3)\n", - "nx.draw(dgl_clique_graph.to_networkx(), with_labels=True)\n", - "\n", - "lollipop = \"Lollipop\"\n", - "hypercube = \"Hypercube\"\n", - "clique = \"Clique\"\n", - "\n", - "# Delete the graphs from ArangoDB if they already exist\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "name = \"FakeHetero\"\n", "\n", - "# Create the ArangoDB graphs\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = adbdgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph)\n", + "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\n", + " name, \n", + " v_cols={\"user\", \"game\"},\n", + " e_cols={\"plays\", \"follows\"}\n", + ")\n", "\n", + "# Show graph data (notice that the \"topic\" data is skipped)\n", "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")\n", - "print(f\"View the original graphs below:\\n\")" + "print(dgl_g)\n", + "print(dgl_g.ndata) # note how this is empty\n", + "print(dgl_g.edata) # note how this is empty" ] }, { "cell_type": "markdown", "metadata": { - "id": "CNj1xKhwoJoL" + "id": "qEH6OdSB23Ya" }, "source": [ - "\n", - "#### MiniGCDataset Graphs with attributes" + "#### Via ArangoDB-DGL metagraph 1" ] }, { "cell_type": "markdown", "metadata": { - "id": "CZ1UX9YX1Zzo" + "id": "PipFzJ0HzTMA" }, "source": [ - "Data source\n", - "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - "Package methods used\n", - "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/adapter.py#L215-L311)\n", - "* [`adbdgl_adapter.controller._dgl_feature_to_adb_attribute()`](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L49-L70)\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - "Important notes\n", - "* The `name` parameters in this case are simply for naming your ArangoDB graph.\n", - "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our DGL node/edge features into ArangoDB vertex/edge attributes. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/2.0.0/adbdgl_adapter/controller.py#L11)." + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. It should contain collections & associated document attributes names that exist within your ArangoDB instance." ] }, { @@ -925,118 +1051,238 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 409, + "referenced_widgets": [ + "77b31c42e914410aaea93044f1390121", + "8349f1e6b1f34680bacd7de1a1937122", + "38aaa492d75c48f38de60ea0cc5fa93f", + "63845b04ecbc40de8bcc017d754ac907", + "4b7f5f21b98b4c5d8475929bf1f01a65", + "404a19cadaca4b85a957cad231b73cbb", + "bd8b6caa7d2d4df1a99b1870ecc0ae46", + "13d0f7da120b40b993ce3c0b257d5788", + "ea88ab86e9774ed78ea62daa6e338637", + "712770e675424d7eb0c8efd6c34f2012" + ] }, - "id": "jbJsvMMaoJoT", - "outputId": "6dba7563-84b8-4934-a07f-1525ef67bd5e" + "id": "7Kz8lXXq23Yk", + "outputId": "b17433d7-d344-4748-ffe3-f0abca6fb112" }, "outputs": [], "source": [ - "# Load the dgl graphs\n", - "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", - "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", - "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", - "\n", - " # Add DGL Node & Edge Features to each graph\n", - "dgl_lollipop_graph.ndata[\"random_ndata\"] = torch.tensor(\n", - " [[i, i, i] for i in range(0, dgl_lollipop_graph.num_nodes())]\n", - ")\n", - "dgl_lollipop_graph.edata[\"random_edata\"] = torch.rand(dgl_lollipop_graph.num_edges())\n", + "# Define the Metagraph that transfers ArangoDB attributes \"as is\",\n", + "# meaning the data is already formatted to DGL data standards\n", + "metagraph_v1 = {\n", + " \"vertexCollections\": {\n", + " # Move the \"features\" & \"label\" ArangoDB attributes to DGL as \"features\" & \"label\" Tensors\n", + " \"user\": {\"features\", \"label\"}, # equivalent to {\"features\": \"features\", \"label\": \"label\"}\n", + " \"game\": {\"dgl_game_features\": \"features\"},\n", + " \"topic\": {},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"dgl_plays_features\": \"features\"}, \n", + " \"follows\": {}\n", + " },\n", + "}\n", "\n", - "dgl_hypercube_graph.ndata[\"random_ndata\"] = torch.rand(dgl_hypercube_graph.num_nodes())\n", - "dgl_hypercube_graph.edata[\"random_edata\"] = torch.tensor(\n", - " [[[i], [i], [i]] for i in range(0, dgl_hypercube_graph.num_edges())]\n", - ")\n", + "# Create the DGL graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v1)\n", "\n", - "dgl_clique_graph.ndata['clique_ndata'] = torch.tensor([1,2,3,4,5,6])\n", - "dgl_clique_graph.edata['clique_edata'] = torch.tensor(\n", - " [1 if i % 2 == 0 else 0 for i in range(0, dgl_clique_graph.num_edges())]\n", - ")\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0806IB4o3WRz" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cnByWtpa3WR7" + }, + "source": [ + "Data\n", + "* [ArangoDB IMDB Movie Dataset](https://www.arangodb.com/docs/stable/arangosearch-example-datasets.html#imdb-movie-dataset)\n", "\n", - "# A user-defined Controller class is OPTIONAL when converting DGL features\n", - "# to ArangoDB attributes. NOTE: A custom Controller is NOT needed if you want to\n", - "# keep the numerical-based values of your DGL features.\n", - "class Clique_ADBDGL_Controller(ADBDGL_Controller):\n", - " \"\"\"ArangoDB-DGL controller.\n", + "API\n", + "* `adbddgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - " Responsible for controlling how ArangoDB attributes\n", - " are converted into DGL features, and vice-versa.\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become PyG features/labels. In this example, we rely on user-defined encoders to build PyG-ready tensors (i.e feature matrices) from ArangoDB attributes. See https://pytorch-geometric.readthedocs.io/en/latest/notes/load_csv.html for an example on using encoders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 499, + "referenced_widgets": [ + "2b13e46a722e4be384fad74e1b3e6461", + "848230df62434c77b5b18f9a43e2d14f", + "59405e2d0c164d5b965680cc9d9cd8f3", + "2a380fe111794c3a951cdafa4a2bf0b3", + "3d081c88cd2945fa9534de722669ada9", + "82f996185e8444ada5e18602e2f8e105" + ] + }, + "id": "cKqLoawE3WR7", + "outputId": "02a8bfed-44ae-4c76-9eea-ba7348738707" + }, + "outputs": [], + "source": [ + "# Define the Metagraph that transfers attributes via user-defined encoders\n", + "metagraph_v2 = {\n", + " \"vertexCollections\": {\n", + " \"Movies\": {\n", + " \"features\": { # Build a feature matrix from the \"Action\" & \"Drama\" document attributes\n", + " \"Action\": IdentityEncoder(dtype=torch.long),\n", + " \"Drama\": IdentityEncoder(dtype=torch.long),\n", + " },\n", + " \"label\": \"Comedy\",\n", + " },\n", + " \"Users\": {\n", + " \"features\": {\n", + " \"Gender\": CategoricalEncoder(), # CategoricalEncoder(mapping={\"M\": 0, \"F\": 1}),\n", + " \"Age\": IdentityEncoder(dtype=torch.long),\n", + " }\n", + " },\n", + " },\n", + " \"edgeCollections\": {\"Ratings\": {\"weight\": \"Rating\"}},\n", + "}\n", "\n", - " You can derive your own custom ADBDGL_Controller if you want to maintain\n", - " consistency between your ArangoDB attributes & your DGL features.\n", - " \"\"\"\n", + "# Create the DGL Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"IMDB\", metagraph_v2)\n", "\n", - " def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor):\n", - " \"\"\"\n", - " Given a DGL feature key, its assigned value (for an arbitrary node or edge),\n", - " and the collection it belongs to, convert it to a valid ArangoDB attribute\n", - " (e.g string, list, number, ...).\n", - "\n", - " NOTE: No action is needed here if you want to keep the numerical-based values\n", - " of your DGL features.\n", - "\n", - " :param key: The DGL attribute key name\n", - " :type key: str\n", - " :param col: The ArangoDB collection of the (soon-to-be) ArangoDB document.\n", - " :type col: str\n", - " :param val: The assigned attribute value of the DGL node.\n", - " :type val: Tensor\n", - " :return: The feature's representation as an ArangoDB Attribute\n", - " :rtype: Any\n", - " \"\"\"\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d5ijSCcY4bYs" + }, + "source": [ + "#### Via ArangoDB-DGL metagraph 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aKzxxZrUXJ" + }, + "source": [ + "Data\n", + "* A fake DGL Heterogeneous graph\n", "\n", - " if key == \"clique_ndata\":\n", - " try:\n", - " return [\"Eins\", \"Zwei\", \"Drei\", \"Vier\", \"Fünf\", \"Sechs\"][val-1]\n", - " except:\n", - " return -1\n", + "API\n", + "* `adbdgl_adapter.adapter.arangodb_to_dgl()`\n", "\n", - " if key == \"clique_edata\":\n", - " return bool(val)\n", + "Notes\n", + "* The `name` parameter is purely for documentation purposes in this case.\n", + "* The `metagraph` parameter is an object defining vertex & edge collections to import to DGL, along with collection-level specifications to indicate which ArangoDB attributes will become DGL features/labels. In this example, we rely on user-defined functions to handle ArangoDB attribute to DGL feature conversion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 377, + "referenced_widgets": [ + "e4b7b35461e848f5819b9f38d67ee652", + "9968f928e28147f7a0956aff8412a608", + "54801c3c74494fe8bf9e2a7fb64bde48", + "903622e283524c7f89635599920c2b14", + "f0d4515c88a44775be59c4e1a0b3c60a", + "9e1eb071f0b24cb6a8d206477b10b831" + ] + }, + "id": "t-lNli3d4bY0", + "outputId": "7bc48392-81a7-4232-aad2-931ff3c8ca48" + }, + "outputs": [], + "source": [ + "# Define the metagraph that transfers attributes via user-defined functions\n", + "def udf_user_features(user_df):\n", + " # process the user_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # user_df[\"features\"] = ...\n", + " return torch.tensor(user_df[\"features\"].to_list())\n", "\n", - " return super()._dgl_feature_to_adb_attribute(key, col, val)\n", "\n", - "# Re-instantiate a new adapter specifically for the Clique Graph Conversion\n", - "clique_adbgl_adapter = ADBDGL_Adapter(db, Clique_ADBDGL_Controller())\n", + "def udf_game_features(game_df):\n", + " # process the game_df Pandas DataFrame to return a feature matrix in a tensor\n", + " # game_df[\"features\"] = ...\n", + " return torch.tensor(game_df[\"features\"].to_list())\n", "\n", - "# Create the ArangoDB graphs\n", - "lollipop = \"Lollipop_With_Attributes\"\n", - "hypercube = \"Hypercube_With_Attributes\"\n", - "clique = \"Clique_With_Attributes\"\n", "\n", - "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "metagraph_v3 = {\n", + " \"vertexCollections\": {\n", + " \"user\": {\n", + " \"features\": udf_user_features, # supports named functions\n", + " \"label\": lambda df: torch.tensor(df[\"label\"].to_list()), # also supports lambda functions\n", + " },\n", + " \"game\": {\"features\": udf_game_features},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"plays\": {\"features\": (lambda df: torch.tensor(df[\"features\"].to_list()))},\n", + " },\n", + "}\n", "\n", - "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", - "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", - "adb_clique_graph = clique_adbgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph) # Notice the new adapter here!\n", + "# Create PyG Graph\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl(\"FakeHetero\", metagraph_v3)\n", "\n", - "print('\\n--------------------')\n", - "print(\"URL: \" + con[\"url\"])\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])\n", - "print('--------------------\\n')\n", - "print(\"View the created graphs here:\\n\")\n", - "print(f\"1) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", - "print(f\"2) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", - "print(f\"3) {con['url']}/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")" + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" ] } ], "metadata": { "colab": { "collapsed_sections": [ - "KS9c-vE5eG89", "ot1oJqn7m78n", "Oc__NAd1eG8-", "7y81WHO8eG8_", "QfE_tKxneG9A", + "bvzJXSHHTi3v", + "UafSB_3JZNwK", + "CNj1xKhwoJoL", + "n08RC_GtkDrC", + "mk6m0hBRkkkT", "uByvwf9feG9A", - "bvzJXSHHTi3v" + "ZrEDmtqCVD0W", + "RQ4CknYfUEuz", + "qEH6OdSB23Ya", + "0806IB4o3WRz", + "d5ijSCcY4bYs" ], - "name": "ArangoDB_DGL_Adapter_v2.ipynb", + "name": "ArangoDB_DGL_Adapter_v3.ipynb", "provenance": [] }, "kernelspec": {