From 7ff857c2b247a0bda0993539a5797d0019f6355c Mon Sep 17 00:00:00 2001 From: Anush008 Date: Wed, 31 Jan 2024 05:57:48 +0530 Subject: [PATCH] docs: supported models update --- docs/examples/Supported_Models.ipynb | 142 ++++++++++++++++----------- fastembed/embedding.py | 4 +- 2 files changed, 89 insertions(+), 57 deletions(-) diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb index 18b64899..44f51db3 100644 --- a/docs/examples/Supported_Models.ipynb +++ b/docs/examples/Supported_Models.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -45,47 +45,47 @@ " \n", " \n", " 0\n", + " BAAI/bge-base-en\n", + " 768\n", + " Base English model\n", + " 0.50\n", + " \n", + " \n", + " 1\n", + " BAAI/bge-base-en-v1.5\n", + " 768\n", + " Base English model, v1.5\n", + " 0.44\n", + " \n", + " \n", + " 2\n", + " BAAI/bge-large-en-v1.5\n", + " 1024\n", + " Large English model, v1.5\n", + " 1.34\n", + " \n", + " \n", + " 3\n", " BAAI/bge-small-en\n", " 384\n", " Fast English model\n", " 0.20\n", " \n", " \n", - " 1\n", + " 4\n", " BAAI/bge-small-en-v1.5\n", " 384\n", " Fast and Default English model\n", " 0.13\n", " \n", " \n", - " 2\n", + " 5\n", " BAAI/bge-small-zh-v1.5\n", " 512\n", " Fast and recommended Chinese model\n", " 0.10\n", " \n", " \n", - " 3\n", - " BAAI/bge-base-en\n", - " 768\n", - " Base English model\n", - " 0.50\n", - " \n", - " \n", - " 4\n", - " BAAI/bge-base-en-v1.5\n", - " 768\n", - " Base English model, v1.5\n", - " 0.44\n", - " \n", - " \n", - " 5\n", - " sentence-transformers/all-MiniLM-L6-v2\n", - " 384\n", - " Sentence Transformer model, MiniLM-L6-v2\n", - " 0.09\n", - " \n", - " \n", " 6\n", " intfloat/multilingual-e5-large\n", " 1024\n", @@ -106,46 +106,76 @@ " English embedding model supporting 8192 sequence length\n", " 0.13\n", " \n", + " \n", + " 9\n", + " sentence-transformers/all-MiniLM-L6-v2\n", + " 384\n", + " Sentence Transformer model, MiniLM-L6-v2\n", + " 0.09\n", + " \n", + " \n", + " 10\n", + " xenova/multilingual-e5-large\n", + " 1024\n", + " Multilingual model. Recommended for non-English languages\n", + " 2.24\n", + " \n", + " \n", + " 11\n", + " xenova/paraphrase-multilingual-mpnet-base-v2\n", + " 768\n", + " Sentence-transformers model for tasks like clustering or semantic search\n", + " 1.11\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " model dim \\\n", - "0 BAAI/bge-small-en 384 \n", - "1 BAAI/bge-small-en-v1.5 384 \n", - "2 BAAI/bge-small-zh-v1.5 512 \n", - "3 BAAI/bge-base-en 768 \n", - "4 BAAI/bge-base-en-v1.5 768 \n", - "5 sentence-transformers/all-MiniLM-L6-v2 384 \n", - "6 intfloat/multilingual-e5-large 1024 \n", - "7 jinaai/jina-embeddings-v2-base-en 768 \n", - "8 jinaai/jina-embeddings-v2-small-en 512 \n", + " model dim \\\n", + "0 BAAI/bge-base-en 768 \n", + "1 BAAI/bge-base-en-v1.5 768 \n", + "2 BAAI/bge-large-en-v1.5 1024 \n", + "3 BAAI/bge-small-en 384 \n", + "4 BAAI/bge-small-en-v1.5 384 \n", + "5 BAAI/bge-small-zh-v1.5 512 \n", + "6 intfloat/multilingual-e5-large 1024 \n", + "7 jinaai/jina-embeddings-v2-base-en 768 \n", + "8 jinaai/jina-embeddings-v2-small-en 512 \n", + "9 sentence-transformers/all-MiniLM-L6-v2 384 \n", + "10 xenova/multilingual-e5-large 1024 \n", + "11 xenova/paraphrase-multilingual-mpnet-base-v2 768 \n", "\n", - " description \\\n", - "0 Fast English model \n", - "1 Fast and Default English model \n", - "2 Fast and recommended Chinese model \n", - "3 Base English model \n", - "4 Base English model, v1.5 \n", - "5 Sentence Transformer model, MiniLM-L6-v2 \n", - "6 Multilingual model, e5-large. Recommend using this model for non-English languages \n", - "7 English embedding model supporting 8192 sequence length \n", - "8 English embedding model supporting 8192 sequence length \n", + " description \\\n", + "0 Base English model \n", + "1 Base English model, v1.5 \n", + "2 Large English model, v1.5 \n", + "3 Fast English model \n", + "4 Fast and Default English model \n", + "5 Fast and recommended Chinese model \n", + "6 Multilingual model, e5-large. Recommend using this model for non-English languages \n", + "7 English embedding model supporting 8192 sequence length \n", + "8 English embedding model supporting 8192 sequence length \n", + "9 Sentence Transformer model, MiniLM-L6-v2 \n", + "10 Multilingual model. Recommended for non-English languages \n", + "11 Sentence-transformers model for tasks like clustering or semantic search \n", "\n", - " size_in_GB \n", - "0 0.20 \n", - "1 0.13 \n", - "2 0.10 \n", - "3 0.50 \n", - "4 0.44 \n", - "5 0.09 \n", - "6 2.24 \n", - "7 0.55 \n", - "8 0.13 " + " size_in_GB \n", + "0 0.50 \n", + "1 0.44 \n", + "2 1.34 \n", + "3 0.20 \n", + "4 0.13 \n", + "5 0.10 \n", + "6 2.24 \n", + "7 0.55 \n", + "8 0.13 \n", + "9 0.09 \n", + "10 2.24 \n", + "11 1.11 " ] }, - "execution_count": 1, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -175,7 +205,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.11.7" }, "orig_nbformat": 4 }, diff --git a/fastembed/embedding.py b/fastembed/embedding.py index c4abfe51..1330eb27 100644 --- a/fastembed/embedding.py +++ b/fastembed/embedding.py @@ -213,7 +213,9 @@ def embed(self, texts: Iterable[str], batch_size: int = 256, parallel: int = Non raise NotImplementedError @classmethod - def list_supported_models(cls, exclude: List[str] = []) -> List[Dict[str, Any]]: + def list_supported_models( + cls, exclude: List[str] = ["compressed_url_sources", "hf_sources"] + ) -> List[Dict[str, Any]]: """Lists the supported models. Args: