From 63aea62762dcd17fadad69cd7c5c1903d99d395f Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 22:23:23 +0530 Subject: [PATCH] Remove comparison, since the ranking is identical even with varying embedding --- docs/examples/Retrieval_with_FastEmbed.ipynb | 76 ++++++-------------- 1 file changed, 23 insertions(+), 53 deletions(-) diff --git a/docs/examples/Retrieval_with_FastEmbed.ipynb b/docs/examples/Retrieval_with_FastEmbed.ipynb index 33ba9e97..4d3ed1c3 100644 --- a/docs/examples/Retrieval_with_FastEmbed.ipynb +++ b/docs/examples/Retrieval_with_FastEmbed.ipynb @@ -39,11 +39,19 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-02-07 22:20:57.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mfastembed.embedding\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[33m\u001b[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated. Use TextEmbedding instead.\u001b[0m\n" + ] + } + ], "source": [ "from typing import List\n", "import numpy as np\n", - "from fastembed.embedding import FlagEmbedding as Embedding" + "from fastembed import TextEmbedding" ] }, { @@ -84,7 +92,7 @@ " \"His life has been depicted in various films, TV shows, and books\",\n", "]\n", "# Initialize the DefaultEmbedding class with the desired parameters\n", - "embedding_model = Embedding(model_name=\"BAAI/bge-small-en\", max_length=512)\n", + "embedding_model = TextEmbedding(model_name=\"BAAI/bge-small-en\", max_length=512)\n", "\n", "# We'll use the passage_embed method to get the embeddings for the documents\n", "embeddings: List[np.ndarray] = list(\n", @@ -124,65 +132,27 @@ " print(f\"Rank {i+1}: {documents[sorted_scores[i]]}\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running and Comparing Queries\n", - "Finally, we run our sample query using the `print_top_k` function.\n", - "\n", - "The differences between using query embeddings and plain embeddings can be observed in the retrieved ranks:\n", - "\n", - "Using query embeddings (from `query_embed` method):" - ] - }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank 1: Maharana Pratap was a Rajput warrior king from Mewar\n", - "Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule\n", - "Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments\n", - "Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar\n", - "Rank 5: He fought against the Mughal Empire led by Akbar\n" - ] - } - ], - "source": [ - "print_top_k(query_embedding, embeddings, documents)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using plain embeddings (from `embed` method):" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Rank 1: Maharana Pratap was a Rajput warrior king from Mewar\n", - "Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule\n", - "Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments\n", - "Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar\n", - "Rank 5: He fought against the Mughal Empire led by Akbar\n" - ] + "data": { + "text/plain": [ + "(array([-0.04393955, 0.04452892, -0.00760788, -0.03399807, 0.01951348],\n", + " dtype=float32),\n", + " array([-0.06002192, 0.04322132, -0.00545516, -0.04419701, -0.00542277],\n", + " dtype=float32))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print_top_k(plain_query_embedding, embeddings, documents)" + "query_embedding[:5], plain_query_embedding[:5]" ] }, {