diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb
index 18b64899..44f51db3 100644
--- a/docs/examples/Supported_Models.ipynb
+++ b/docs/examples/Supported_Models.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -45,47 +45,47 @@
"
\n",
" \n",
" 0 | \n",
+ " BAAI/bge-base-en | \n",
+ " 768 | \n",
+ " Base English model | \n",
+ " 0.50 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " BAAI/bge-base-en-v1.5 | \n",
+ " 768 | \n",
+ " Base English model, v1.5 | \n",
+ " 0.44 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " BAAI/bge-large-en-v1.5 | \n",
+ " 1024 | \n",
+ " Large English model, v1.5 | \n",
+ " 1.34 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
" BAAI/bge-small-en | \n",
" 384 | \n",
" Fast English model | \n",
" 0.20 | \n",
"
\n",
" \n",
- " 1 | \n",
+ " 4 | \n",
" BAAI/bge-small-en-v1.5 | \n",
" 384 | \n",
" Fast and Default English model | \n",
" 0.13 | \n",
"
\n",
" \n",
- " 2 | \n",
+ " 5 | \n",
" BAAI/bge-small-zh-v1.5 | \n",
" 512 | \n",
" Fast and recommended Chinese model | \n",
" 0.10 | \n",
"
\n",
" \n",
- " 3 | \n",
- " BAAI/bge-base-en | \n",
- " 768 | \n",
- " Base English model | \n",
- " 0.50 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " BAAI/bge-base-en-v1.5 | \n",
- " 768 | \n",
- " Base English model, v1.5 | \n",
- " 0.44 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " sentence-transformers/all-MiniLM-L6-v2 | \n",
- " 384 | \n",
- " Sentence Transformer model, MiniLM-L6-v2 | \n",
- " 0.09 | \n",
- "
\n",
- " \n",
" 6 | \n",
" intfloat/multilingual-e5-large | \n",
" 1024 | \n",
@@ -106,46 +106,76 @@
" English embedding model supporting 8192 sequence length | \n",
" 0.13 | \n",
"
\n",
+ " \n",
+ " 9 | \n",
+ " sentence-transformers/all-MiniLM-L6-v2 | \n",
+ " 384 | \n",
+ " Sentence Transformer model, MiniLM-L6-v2 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " xenova/multilingual-e5-large | \n",
+ " 1024 | \n",
+ " Multilingual model. Recommended for non-English languages | \n",
+ " 2.24 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " xenova/paraphrase-multilingual-mpnet-base-v2 | \n",
+ " 768 | \n",
+ " Sentence-transformers model for tasks like clustering or semantic search | \n",
+ " 1.11 | \n",
+ "
\n",
" \n",
"\n",
""
],
"text/plain": [
- " model dim \\\n",
- "0 BAAI/bge-small-en 384 \n",
- "1 BAAI/bge-small-en-v1.5 384 \n",
- "2 BAAI/bge-small-zh-v1.5 512 \n",
- "3 BAAI/bge-base-en 768 \n",
- "4 BAAI/bge-base-en-v1.5 768 \n",
- "5 sentence-transformers/all-MiniLM-L6-v2 384 \n",
- "6 intfloat/multilingual-e5-large 1024 \n",
- "7 jinaai/jina-embeddings-v2-base-en 768 \n",
- "8 jinaai/jina-embeddings-v2-small-en 512 \n",
+ " model dim \\\n",
+ "0 BAAI/bge-base-en 768 \n",
+ "1 BAAI/bge-base-en-v1.5 768 \n",
+ "2 BAAI/bge-large-en-v1.5 1024 \n",
+ "3 BAAI/bge-small-en 384 \n",
+ "4 BAAI/bge-small-en-v1.5 384 \n",
+ "5 BAAI/bge-small-zh-v1.5 512 \n",
+ "6 intfloat/multilingual-e5-large 1024 \n",
+ "7 jinaai/jina-embeddings-v2-base-en 768 \n",
+ "8 jinaai/jina-embeddings-v2-small-en 512 \n",
+ "9 sentence-transformers/all-MiniLM-L6-v2 384 \n",
+ "10 xenova/multilingual-e5-large 1024 \n",
+ "11 xenova/paraphrase-multilingual-mpnet-base-v2 768 \n",
"\n",
- " description \\\n",
- "0 Fast English model \n",
- "1 Fast and Default English model \n",
- "2 Fast and recommended Chinese model \n",
- "3 Base English model \n",
- "4 Base English model, v1.5 \n",
- "5 Sentence Transformer model, MiniLM-L6-v2 \n",
- "6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
- "7 English embedding model supporting 8192 sequence length \n",
- "8 English embedding model supporting 8192 sequence length \n",
+ " description \\\n",
+ "0 Base English model \n",
+ "1 Base English model, v1.5 \n",
+ "2 Large English model, v1.5 \n",
+ "3 Fast English model \n",
+ "4 Fast and Default English model \n",
+ "5 Fast and recommended Chinese model \n",
+ "6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
+ "7 English embedding model supporting 8192 sequence length \n",
+ "8 English embedding model supporting 8192 sequence length \n",
+ "9 Sentence Transformer model, MiniLM-L6-v2 \n",
+ "10 Multilingual model. Recommended for non-English languages \n",
+ "11 Sentence-transformers model for tasks like clustering or semantic search \n",
"\n",
- " size_in_GB \n",
- "0 0.20 \n",
- "1 0.13 \n",
- "2 0.10 \n",
- "3 0.50 \n",
- "4 0.44 \n",
- "5 0.09 \n",
- "6 2.24 \n",
- "7 0.55 \n",
- "8 0.13 "
+ " size_in_GB \n",
+ "0 0.50 \n",
+ "1 0.44 \n",
+ "2 1.34 \n",
+ "3 0.20 \n",
+ "4 0.13 \n",
+ "5 0.10 \n",
+ "6 2.24 \n",
+ "7 0.55 \n",
+ "8 0.13 \n",
+ "9 0.09 \n",
+ "10 2.24 \n",
+ "11 1.11 "
]
},
- "execution_count": 1,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -175,7 +205,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.17"
+ "version": "3.11.7"
},
"orig_nbformat": 4
},
diff --git a/fastembed/embedding.py b/fastembed/embedding.py
index c4abfe51..1330eb27 100644
--- a/fastembed/embedding.py
+++ b/fastembed/embedding.py
@@ -213,7 +213,9 @@ def embed(self, texts: Iterable[str], batch_size: int = 256, parallel: int = Non
raise NotImplementedError
@classmethod
- def list_supported_models(cls, exclude: List[str] = []) -> List[Dict[str, Any]]:
+ def list_supported_models(
+ cls, exclude: List[str] = ["compressed_url_sources", "hf_sources"]
+ ) -> List[Dict[str, Any]]:
"""Lists the supported models.
Args: