diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb
index 47cd9ab3..d52a98fc 100644
--- a/docs/examples/Supported_Models.ipynb
+++ b/docs/examples/Supported_Models.ipynb
@@ -1,15 +1,5 @@
{
"cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
{
"cell_type": "code",
"execution_count": 4,
@@ -46,14 +36,6 @@
"
\n",
" \n",
" 0 | \n",
- " BAAI/bge-base-en | \n",
- " 768 | \n",
- " Base English model | \n",
- " 0.50 | \n",
- " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} | \n",
- "
\n",
- " \n",
- " 1 | \n",
" BAAI/bge-base-en-v1.5 | \n",
" 768 | \n",
" Base English model, v1.5 | \n",
@@ -61,31 +43,15 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} | \n",
"
\n",
" \n",
- " 2 | \n",
- " BAAI/bge-large-en-v1.5-quantized | \n",
- " 1024 | \n",
- " Large English model, v1.5 | \n",
- " 1.34 | \n",
- " {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} | \n",
- "
\n",
- " \n",
- " 3 | \n",
+ " 1 | \n",
" BAAI/bge-large-en-v1.5 | \n",
" 1024 | \n",
" Large English model, v1.5 | \n",
" 1.34 | \n",
- " {'hf': 'qdrant/bge-large-en-v1.5-onnx'} | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " BAAI/bge-small-en | \n",
- " 384 | \n",
- " Fast English model | \n",
- " 0.20 | \n",
- " {'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'} | \n",
+ " {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} | \n",
"
\n",
" \n",
- " 5 | \n",
+ " 2 | \n",
" BAAI/bge-small-en-v1.5 | \n",
" 384 | \n",
" Fast and Default English model | \n",
@@ -93,7 +59,7 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} | \n",
"
\n",
" \n",
- " 6 | \n",
+ " 3 | \n",
" BAAI/bge-small-zh-v1.5 | \n",
" 512 | \n",
" Fast and recommended Chinese model | \n",
@@ -101,7 +67,7 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} | \n",
"
\n",
" \n",
- " 7 | \n",
+ " 4 | \n",
" sentence-transformers/all-MiniLM-L6-v2 | \n",
" 384 | \n",
" Sentence Transformer model, MiniLM-L6-v2 | \n",
@@ -109,7 +75,7 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} | \n",
"
\n",
" \n",
- " 8 | \n",
+ " 5 | \n",
" intfloat/multilingual-e5-large | \n",
" 1024 | \n",
" Multilingual model, e5-large. Recommend using this model for non-English languages | \n",
@@ -117,7 +83,7 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} | \n",
"
\n",
" \n",
- " 9 | \n",
+ " 6 | \n",
" sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | \n",
" 768 | \n",
" Sentence-transformers model for tasks like clustering or semantic search | \n",
@@ -125,7 +91,7 @@
" {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} | \n",
"
\n",
" \n",
- " 10 | \n",
+ " 7 | \n",
" jinaai/jina-embeddings-v2-base-en | \n",
" 768 | \n",
" English embedding model supporting 8192 sequence length | \n",
@@ -133,7 +99,7 @@
" {'hf': 'xenova/jina-embeddings-v2-base-en'} | \n",
"
\n",
" \n",
- " 11 | \n",
+ " 8 | \n",
" jinaai/jina-embeddings-v2-small-en | \n",
" 512 | \n",
" English embedding model supporting 8192 sequence length | \n",
@@ -145,61 +111,49 @@
""
],
"text/plain": [
- " model dim \\\n",
- "0 BAAI/bge-base-en 768 \n",
- "1 BAAI/bge-base-en-v1.5 768 \n",
- "2 BAAI/bge-large-en-v1.5-quantized 1024 \n",
- "3 BAAI/bge-large-en-v1.5 1024 \n",
- "4 BAAI/bge-small-en 384 \n",
- "5 BAAI/bge-small-en-v1.5 384 \n",
- "6 BAAI/bge-small-zh-v1.5 512 \n",
- "7 sentence-transformers/all-MiniLM-L6-v2 384 \n",
- "8 intfloat/multilingual-e5-large 1024 \n",
- "9 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
- "10 jinaai/jina-embeddings-v2-base-en 768 \n",
- "11 jinaai/jina-embeddings-v2-small-en 512 \n",
+ " model dim \\\n",
+ "0 BAAI/bge-base-en-v1.5 768 \n",
+ "1 BAAI/bge-large-en-v1.5 1024 \n",
+ "2 BAAI/bge-small-en-v1.5 384 \n",
+ "3 BAAI/bge-small-zh-v1.5 512 \n",
+ "4 sentence-transformers/all-MiniLM-L6-v2 384 \n",
+ "5 intfloat/multilingual-e5-large 1024 \n",
+ "6 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
+ "7 jinaai/jina-embeddings-v2-base-en 768 \n",
+ "8 jinaai/jina-embeddings-v2-small-en 512 \n",
"\n",
- " description \\\n",
- "0 Base English model \n",
- "1 Base English model, v1.5 \n",
- "2 Large English model, v1.5 \n",
- "3 Large English model, v1.5 \n",
- "4 Fast English model \n",
- "5 Fast and Default English model \n",
- "6 Fast and recommended Chinese model \n",
- "7 Sentence Transformer model, MiniLM-L6-v2 \n",
- "8 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
- "9 Sentence-transformers model for tasks like clustering or semantic search \n",
- "10 English embedding model supporting 8192 sequence length \n",
- "11 English embedding model supporting 8192 sequence length \n",
+ " description \\\n",
+ "0 Base English model, v1.5 \n",
+ "1 Large English model, v1.5 \n",
+ "2 Fast and Default English model \n",
+ "3 Fast and recommended Chinese model \n",
+ "4 Sentence Transformer model, MiniLM-L6-v2 \n",
+ "5 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
+ "6 Sentence-transformers model for tasks like clustering or semantic search \n",
+ "7 English embedding model supporting 8192 sequence length \n",
+ "8 English embedding model supporting 8192 sequence length \n",
"\n",
- " size_in_GB \\\n",
- "0 0.50 \n",
- "1 0.44 \n",
- "2 1.34 \n",
- "3 1.34 \n",
- "4 0.20 \n",
- "5 0.13 \n",
- "6 0.10 \n",
- "7 0.09 \n",
- "8 2.24 \n",
- "9 1.11 \n",
- "10 0.55 \n",
- "11 0.13 \n",
+ " size_in_GB \\\n",
+ "0 0.44 \n",
+ "1 1.34 \n",
+ "2 0.13 \n",
+ "3 0.10 \n",
+ "4 0.09 \n",
+ "5 2.24 \n",
+ "6 1.11 \n",
+ "7 0.55 \n",
+ "8 0.13 \n",
"\n",
- " sources \n",
- "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n",
- "1 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n",
- "2 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n",
- "3 {'hf': 'qdrant/bge-large-en-v1.5-onnx'} \n",
- "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'} \n",
- "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
- "6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
- "7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
- "8 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
- "9 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
- "10 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
- "11 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
+ " sources \n",
+ "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n",
+ "1 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n",
+ "2 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
+ "3 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
+ "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
+ "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
+ "6 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
+ "7 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
+ "8 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
]
},
"execution_count": 4,
@@ -232,7 +186,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.10.13"
},
"orig_nbformat": 4
},
diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
index a60a2912..c0d87303 100644
--- a/fastembed/text/onnx_embedding.py
+++ b/fastembed/text/onnx_embedding.py
@@ -1,6 +1,6 @@
import os
from multiprocessing import get_all_start_methods
-from typing import List, Dict, Any, Tuple, Union, Iterable, Type
+from typing import Any, Dict, Iterable, List, Tuple, Type, Union
import numpy as np
import onnxruntime as ort
@@ -12,15 +12,6 @@
from fastembed.text.text_embedding_base import TextEmbeddingBase
supported_onnx_models = [
- {
- "model": "BAAI/bge-base-en",
- "dim": 768,
- "description": "Base English model",
- "size_in_GB": 0.5,
- "sources": {
- "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz",
- },
- },
{
"model": "BAAI/bge-base-en-v1.5",
"dim": 768,
@@ -31,44 +22,15 @@
"hf": "qdrant/bge-base-en-v1.5-onnx-q",
},
},
- {
- "model": "BAAI/bge-large-en-v1.5-quantized",
- "dim": 1024,
- "description": "Large English model, v1.5",
- "size_in_GB": 1.34,
- "sources": {
- "hf": "qdrant/bge-large-en-v1.5-onnx-q",
- },
- },
{
"model": "BAAI/bge-large-en-v1.5",
"dim": 1024,
"description": "Large English model, v1.5",
"size_in_GB": 1.34,
"sources": {
- "hf": "qdrant/bge-large-en-v1.5-onnx",
- },
- },
- {
- "model": "BAAI/bge-small-en",
- "dim": 384,
- "description": "Fast English model",
- "size_in_GB": 0.2,
- "sources": {
- "url": "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz",
+ "hf": "qdrant/bge-large-en-v1.5-onnx-q",
},
},
- # {
- # "model": "BAAI/bge-small-en",
- # "dim": 384,
- # "description": "Fast English model",
- # "size_in_GB": 0.2,
- # "hf_sources": [],
- # "compressed_url_sources": [
- # "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en.tar.gz",
- # "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz"
- # ]
- # },
{
"model": "BAAI/bge-small-en-v1.5",
"dim": 384,
@@ -98,19 +60,6 @@
"hf": "qdrant/all-MiniLM-L6-v2-onnx",
},
},
- # {
- # "model": "sentence-transformers/all-MiniLM-L6-v2",
- # "dim": 384,
- # "description": "Sentence Transformer model, MiniLM-L6-v2",
- # "size_in_GB": 0.09,
- # "hf_sources": [
- # "qdrant/all-MiniLM-L6-v2-onnx"
- # ],
- # "compressed_url_sources": [
- # "https://storage.googleapis.com/qdrant-fastembed/fast-all-MiniLM-L6-v2.tar.gz",
- # "https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz"
- # ]
- # }
]
diff --git a/tests/test_onnx_embeddings.py b/tests/test_onnx_embeddings.py
index 6fcc64ae..4d7082fd 100644
--- a/tests/test_onnx_embeddings.py
+++ b/tests/test_onnx_embeddings.py
@@ -6,10 +6,8 @@
from fastembed.embedding import DefaultEmbedding, JinaEmbedding
CANONICAL_VECTOR_VALUES = {
- "BAAI/bge-small-en": np.array([-0.0232, -0.0255, 0.0174, -0.0639, -0.0006]),
"BAAI/bge-small-en-v1.5": np.array([0.01522374, -0.02271799, 0.00860278, -0.07424029, 0.00386434]),
"BAAI/bge-small-zh-v1.5": np.array([-0.01023294, 0.07634465, 0.0691722, -0.04458365, -0.03160762]),
- "BAAI/bge-base-en": np.array([0.0115, 0.0372, 0.0295, 0.0121, 0.0346]),
"BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]),
"BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
"sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]),
diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py
index f610fd86..4947f0bc 100644
--- a/tests/test_text_onnx_embeddings.py
+++ b/tests/test_text_onnx_embeddings.py
@@ -6,13 +6,10 @@
from fastembed.text.text_embedding import TextEmbedding
CANONICAL_VECTOR_VALUES = {
- "BAAI/bge-small-en": np.array([-0.0232, -0.0255, 0.0174, -0.0639, -0.0006]),
"BAAI/bge-small-en-v1.5": np.array([0.01522374, -0.02271799, 0.00860278, -0.07424029, 0.00386434]),
"BAAI/bge-small-zh-v1.5": np.array([-0.01023294, 0.07634465, 0.0691722, -0.04458365, -0.03160762]),
- "BAAI/bge-base-en": np.array([0.0115, 0.0372, 0.0295, 0.0121, 0.0346]),
"BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]),
"BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
- "BAAI/bge-large-en-v1.5-quantized": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
"sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]),
"intfloat/multilingual-e5-large": np.array([0.0098, 0.0045, 0.0066, -0.0354, 0.0070]),
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": np.array(