From 2834772afc155f03820e63954f3793dc3161373f Mon Sep 17 00:00:00 2001 From: niklas Date: Tue, 31 Oct 2023 16:32:28 +0100 Subject: [PATCH] Fix document_index notebook --- src/examples/document_index.ipynb | 39 ++++++++++++------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/src/examples/document_index.ipynb b/src/examples/document_index.ipynb index 6c27a55f8..ae4bbf4e8 100644 --- a/src/examples/document_index.ipynb +++ b/src/examples/document_index.ipynb @@ -73,9 +73,17 @@ "outputs": [], "source": [ "# change this value if you want to use a collection of a different name\n", + "from intelligence_layer.connectors.document_index.document_index import CollectionPath\n", + "\n", + "\n", "COLLECTION = \"demo\"\n", "\n", - "document_index.create_collection(namespace=NAMESPACE, collection=COLLECTION)" + "collection_path = CollectionPath(\n", + " namespace=NAMESPACE,\n", + " collection=COLLECTION\n", + ")\n", + "\n", + "document_index.create_collection(collection_path)" ] }, { @@ -170,8 +178,12 @@ "metadata": {}, "outputs": [], "source": [ + "from intelligence_layer.connectors.document_index.document_index import DocumentContents, DocumentPath\n", + "\n", + "\n", "for doc in documents:\n", - " document_index.add_document(namespace=NAMESPACE, collection=COLLECTION, name=doc[\"name\"], content=doc[\"content\"])" + " document_path = DocumentPath(collection_path=collection_path, document_name=doc[\"name\"])\n", + " document_index.add_document(document_path, contents=DocumentContents.from_text(doc[\"content\"]))" ] }, { @@ -187,28 +199,7 @@ "metadata": {}, "outputs": [], "source": [ - "document_index.list_documents(namespace=NAMESPACE, collection=COLLECTION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the documents are uploaded, they are split into chunks.\n", - "Chunks are the subparts of the original texts, usually a few hundred tokens long.\n", - "You can think of a chunk as a paragraph of text.\n", - "While uploading, the DI splits each document into chunks and generates one embedding each.\n", - "\n", - "Let's see what a chunked document looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "document_index.get_document(namespace=NAMESPACE, collection=COLLECTION, name=\"robert_moses\", get_chunks=True)" + "document_index.list_documents(collection_path)" ] }, {