From 2834772afc155f03820e63954f3793dc3161373f Mon Sep 17 00:00:00 2001
From: niklas <niklas.finken@aleph-alpha.com>
Date: Tue, 31 Oct 2023 16:32:28 +0100
Subject: [PATCH] Fix document_index notebook

---
 src/examples/document_index.ipynb | 39 ++++++++++++-------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/examples/document_index.ipynb b/src/examples/document_index.ipynb
index 6c27a55f8..ae4bbf4e8 100644
--- a/src/examples/document_index.ipynb
+++ b/src/examples/document_index.ipynb
@@ -73,9 +73,17 @@
    "outputs": [],
    "source": [
     "# change this value if you want to use a collection of a different name\n",
+    "from intelligence_layer.connectors.document_index.document_index import CollectionPath\n",
+    "\n",
+    "\n",
     "COLLECTION = \"demo\"\n",
     "\n",
-    "document_index.create_collection(namespace=NAMESPACE, collection=COLLECTION)"
+    "collection_path = CollectionPath(\n",
+    "    namespace=NAMESPACE,\n",
+    "    collection=COLLECTION\n",
+    ")\n",
+    "\n",
+    "document_index.create_collection(collection_path)"
    ]
   },
   {
@@ -170,8 +178,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from intelligence_layer.connectors.document_index.document_index import DocumentContents, DocumentPath\n",
+    "\n",
+    "\n",
     "for doc in documents:\n",
-    "    document_index.add_document(namespace=NAMESPACE, collection=COLLECTION, name=doc[\"name\"], content=doc[\"content\"])"
+    "    document_path = DocumentPath(collection_path=collection_path, document_name=doc[\"name\"])\n",
+    "    document_index.add_document(document_path, contents=DocumentContents.from_text(doc[\"content\"]))"
    ]
   },
   {
@@ -187,28 +199,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "document_index.list_documents(namespace=NAMESPACE, collection=COLLECTION)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Once the documents are uploaded, they are split into chunks.\n",
-    "Chunks are the subparts of the original texts, usually a few hundred tokens long.\n",
-    "You can think of a chunk as a paragraph of text.\n",
-    "While uploading, the DI splits each document into chunks and generates one embedding each.\n",
-    "\n",
-    "Let's see what a chunked document looks like:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "document_index.get_document(namespace=NAMESPACE, collection=COLLECTION, name=\"robert_moses\", get_chunks=True)"
+    "document_index.list_documents(collection_path)"
    ]
   },
   {