From 06ebcd1ed41cc8323cdf4ecb47cac3aac01894a6 Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Thu, 15 Feb 2024 11:40:09 +0530 Subject: [PATCH] Remove unused models from supported_onnx_models --- docs/examples/Supported_Models.ipynb | 146 +++++++++------------------ fastembed/text/onnx_embedding.py | 55 +--------- tests/test_onnx_embeddings.py | 2 - tests/test_text_onnx_embeddings.py | 3 - 4 files changed, 52 insertions(+), 154 deletions(-) diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb index 47cd9ab3..d52a98fc 100644 --- a/docs/examples/Supported_Models.ipynb +++ b/docs/examples/Supported_Models.ipynb @@ -1,15 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, { "cell_type": "code", "execution_count": 4, @@ -46,14 +36,6 @@ " \n", " \n", " 0\n", - " BAAI/bge-base-en\n", - " 768\n", - " Base English model\n", - " 0.50\n", - " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'}\n", - " \n", - " \n", - " 1\n", " BAAI/bge-base-en-v1.5\n", " 768\n", " Base English model, v1.5\n", @@ -61,31 +43,15 @@ " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'}\n", " \n", " \n", - " 2\n", - " BAAI/bge-large-en-v1.5-quantized\n", - " 1024\n", - " Large English model, v1.5\n", - " 1.34\n", - " {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'}\n", - " \n", - " \n", - " 3\n", + " 1\n", " BAAI/bge-large-en-v1.5\n", " 1024\n", " Large English model, v1.5\n", " 1.34\n", - " {'hf': 'qdrant/bge-large-en-v1.5-onnx'}\n", - " \n", - " \n", - " 4\n", - " BAAI/bge-small-en\n", - " 384\n", - " Fast English model\n", - " 0.20\n", - " {'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'}\n", + " {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'}\n", " \n", " \n", - " 5\n", + " 2\n", " BAAI/bge-small-en-v1.5\n", " 384\n", " Fast and Default English model\n", @@ -93,7 +59,7 @@ " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'}\n", " \n", " \n", - " 6\n", + " 3\n", " BAAI/bge-small-zh-v1.5\n", " 512\n", " Fast and recommended Chinese model\n", @@ -101,7 +67,7 @@ " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'}\n", " \n", " \n", - " 7\n", + " 4\n", " sentence-transformers/all-MiniLM-L6-v2\n", " 384\n", " Sentence Transformer model, MiniLM-L6-v2\n", @@ -109,7 +75,7 @@ " {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'}\n", " \n", " \n", - " 8\n", + " 5\n", " intfloat/multilingual-e5-large\n", " 1024\n", " Multilingual model, e5-large. Recommend using this model for non-English languages\n", @@ -117,7 +83,7 @@ " {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'}\n", " \n", " \n", - " 9\n", + " 6\n", " sentence-transformers/paraphrase-multilingual-mpnet-base-v2\n", " 768\n", " Sentence-transformers model for tasks like clustering or semantic search\n", @@ -125,7 +91,7 @@ " {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'}\n", " \n", " \n", - " 10\n", + " 7\n", " jinaai/jina-embeddings-v2-base-en\n", " 768\n", " English embedding model supporting 8192 sequence length\n", @@ -133,7 +99,7 @@ " {'hf': 'xenova/jina-embeddings-v2-base-en'}\n", " \n", " \n", - " 11\n", + " 8\n", " jinaai/jina-embeddings-v2-small-en\n", " 512\n", " English embedding model supporting 8192 sequence length\n", @@ -145,61 +111,49 @@ "" ], "text/plain": [ - " model dim \\\n", - "0 BAAI/bge-base-en 768 \n", - "1 BAAI/bge-base-en-v1.5 768 \n", - "2 BAAI/bge-large-en-v1.5-quantized 1024 \n", - "3 BAAI/bge-large-en-v1.5 1024 \n", - "4 BAAI/bge-small-en 384 \n", - "5 BAAI/bge-small-en-v1.5 384 \n", - "6 BAAI/bge-small-zh-v1.5 512 \n", - "7 sentence-transformers/all-MiniLM-L6-v2 384 \n", - "8 intfloat/multilingual-e5-large 1024 \n", - "9 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", - "10 jinaai/jina-embeddings-v2-base-en 768 \n", - "11 jinaai/jina-embeddings-v2-small-en 512 \n", + " model dim \\\n", + "0 BAAI/bge-base-en-v1.5 768 \n", + "1 BAAI/bge-large-en-v1.5 1024 \n", + "2 BAAI/bge-small-en-v1.5 384 \n", + "3 BAAI/bge-small-zh-v1.5 512 \n", + "4 sentence-transformers/all-MiniLM-L6-v2 384 \n", + "5 intfloat/multilingual-e5-large 1024 \n", + "6 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", + "7 jinaai/jina-embeddings-v2-base-en 768 \n", + "8 jinaai/jina-embeddings-v2-small-en 512 \n", "\n", - " description \\\n", - "0 Base English model \n", - "1 Base English model, v1.5 \n", - "2 Large English model, v1.5 \n", - "3 Large English model, v1.5 \n", - "4 Fast English model \n", - "5 Fast and Default English model \n", - "6 Fast and recommended Chinese model \n", - "7 Sentence Transformer model, MiniLM-L6-v2 \n", - "8 Multilingual model, e5-large. Recommend using this model for non-English languages \n", - "9 Sentence-transformers model for tasks like clustering or semantic search \n", - "10 English embedding model supporting 8192 sequence length \n", - "11 English embedding model supporting 8192 sequence length \n", + " description \\\n", + "0 Base English model, v1.5 \n", + "1 Large English model, v1.5 \n", + "2 Fast and Default English model \n", + "3 Fast and recommended Chinese model \n", + "4 Sentence Transformer model, MiniLM-L6-v2 \n", + "5 Multilingual model, e5-large. Recommend using this model for non-English languages \n", + "6 Sentence-transformers model for tasks like clustering or semantic search \n", + "7 English embedding model supporting 8192 sequence length \n", + "8 English embedding model supporting 8192 sequence length \n", "\n", - " size_in_GB \\\n", - "0 0.50 \n", - "1 0.44 \n", - "2 1.34 \n", - "3 1.34 \n", - "4 0.20 \n", - "5 0.13 \n", - "6 0.10 \n", - "7 0.09 \n", - "8 2.24 \n", - "9 1.11 \n", - "10 0.55 \n", - "11 0.13 \n", + " size_in_GB \\\n", + "0 0.44 \n", + "1 1.34 \n", + "2 0.13 \n", + "3 0.10 \n", + "4 0.09 \n", + "5 2.24 \n", + "6 1.11 \n", + "7 0.55 \n", + "8 0.13 \n", "\n", - " sources \n", - "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n", - "1 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n", - "2 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n", - "3 {'hf': 'qdrant/bge-large-en-v1.5-onnx'} \n", - "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'} \n", - "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n", - "6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n", - "7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n", - "8 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", - "9 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", - "10 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", - "11 {'hf': 'xenova/jina-embeddings-v2-small-en'} " + " sources \n", + "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n", + "1 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n", + "2 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n", + "3 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n", + "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n", + "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", + "6 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", + "7 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", + "8 {'hf': 'xenova/jina-embeddings-v2-small-en'} " ] }, "execution_count": 4, @@ -232,7 +186,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" }, "orig_nbformat": 4 }, diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py index a60a2912..c0d87303 100644 --- a/fastembed/text/onnx_embedding.py +++ b/fastembed/text/onnx_embedding.py @@ -1,6 +1,6 @@ import os from multiprocessing import get_all_start_methods -from typing import List, Dict, Any, Tuple, Union, Iterable, Type +from typing import Any, Dict, Iterable, List, Tuple, Type, Union import numpy as np import onnxruntime as ort @@ -12,15 +12,6 @@ from fastembed.text.text_embedding_base import TextEmbeddingBase supported_onnx_models = [ - { - "model": "BAAI/bge-base-en", - "dim": 768, - "description": "Base English model", - "size_in_GB": 0.5, - "sources": { - "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz", - }, - }, { "model": "BAAI/bge-base-en-v1.5", "dim": 768, @@ -31,44 +22,15 @@ "hf": "qdrant/bge-base-en-v1.5-onnx-q", }, }, - { - "model": "BAAI/bge-large-en-v1.5-quantized", - "dim": 1024, - "description": "Large English model, v1.5", - "size_in_GB": 1.34, - "sources": { - "hf": "qdrant/bge-large-en-v1.5-onnx-q", - }, - }, { "model": "BAAI/bge-large-en-v1.5", "dim": 1024, "description": "Large English model, v1.5", "size_in_GB": 1.34, "sources": { - "hf": "qdrant/bge-large-en-v1.5-onnx", - }, - }, - { - "model": "BAAI/bge-small-en", - "dim": 384, - "description": "Fast English model", - "size_in_GB": 0.2, - "sources": { - "url": "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz", + "hf": "qdrant/bge-large-en-v1.5-onnx-q", }, }, - # { - # "model": "BAAI/bge-small-en", - # "dim": 384, - # "description": "Fast English model", - # "size_in_GB": 0.2, - # "hf_sources": [], - # "compressed_url_sources": [ - # "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en.tar.gz", - # "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz" - # ] - # }, { "model": "BAAI/bge-small-en-v1.5", "dim": 384, @@ -98,19 +60,6 @@ "hf": "qdrant/all-MiniLM-L6-v2-onnx", }, }, - # { - # "model": "sentence-transformers/all-MiniLM-L6-v2", - # "dim": 384, - # "description": "Sentence Transformer model, MiniLM-L6-v2", - # "size_in_GB": 0.09, - # "hf_sources": [ - # "qdrant/all-MiniLM-L6-v2-onnx" - # ], - # "compressed_url_sources": [ - # "https://storage.googleapis.com/qdrant-fastembed/fast-all-MiniLM-L6-v2.tar.gz", - # "https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz" - # ] - # } ] diff --git a/tests/test_onnx_embeddings.py b/tests/test_onnx_embeddings.py index 6fcc64ae..4d7082fd 100644 --- a/tests/test_onnx_embeddings.py +++ b/tests/test_onnx_embeddings.py @@ -6,10 +6,8 @@ from fastembed.embedding import DefaultEmbedding, JinaEmbedding CANONICAL_VECTOR_VALUES = { - "BAAI/bge-small-en": np.array([-0.0232, -0.0255, 0.0174, -0.0639, -0.0006]), "BAAI/bge-small-en-v1.5": np.array([0.01522374, -0.02271799, 0.00860278, -0.07424029, 0.00386434]), "BAAI/bge-small-zh-v1.5": np.array([-0.01023294, 0.07634465, 0.0691722, -0.04458365, -0.03160762]), - "BAAI/bge-base-en": np.array([0.0115, 0.0372, 0.0295, 0.0121, 0.0346]), "BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]), "BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]), "sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]), diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py index f610fd86..4947f0bc 100644 --- a/tests/test_text_onnx_embeddings.py +++ b/tests/test_text_onnx_embeddings.py @@ -6,13 +6,10 @@ from fastembed.text.text_embedding import TextEmbedding CANONICAL_VECTOR_VALUES = { - "BAAI/bge-small-en": np.array([-0.0232, -0.0255, 0.0174, -0.0639, -0.0006]), "BAAI/bge-small-en-v1.5": np.array([0.01522374, -0.02271799, 0.00860278, -0.07424029, 0.00386434]), "BAAI/bge-small-zh-v1.5": np.array([-0.01023294, 0.07634465, 0.0691722, -0.04458365, -0.03160762]), - "BAAI/bge-base-en": np.array([0.0115, 0.0372, 0.0295, 0.0121, 0.0346]), "BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]), "BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]), - "BAAI/bge-large-en-v1.5-quantized": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]), "sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]), "intfloat/multilingual-e5-large": np.array([0.0098, 0.0045, 0.0066, -0.0354, 0.0070]), "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": np.array(