From 4200834a45779b8cb04db0a12269f22112270a2b Mon Sep 17 00:00:00 2001 From: Dmitrii Rudenko Date: Sat, 9 Nov 2024 00:18:30 +0100 Subject: [PATCH 1/3] Merge master --- fastembed/text/onnx_text_model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py index 95301985..d0127024 100644 --- a/fastembed/text/onnx_text_model.py +++ b/fastembed/text/onnx_text_model.py @@ -79,6 +79,10 @@ def onnx_embed( onnx_input["token_type_ids"] = np.array( [np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64 ) + if "pixel_values" in input_names: + onnx_input["pixel_values"] = np.zeros( + (np.array(input_ids, dtype=np.int64).shape[0], 3, 448, 448), dtype=np.float32 + ) onnx_input = self._preprocess_onnx_input(onnx_input, **kwargs) From d44ae27c64a0b74fe6f145e271f2b7c48c8c00f3 Mon Sep 17 00:00:00 2001 From: "d.rudenko" Date: Tue, 24 Dec 2024 14:15:35 +0100 Subject: [PATCH 2/3] HF sources for all models --- fastembed/text/onnx_embedding.py | 40 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py index e4d657c7..410c4a8d 100644 --- a/fastembed/text/onnx_embedding.py +++ b/fastembed/text/onnx_embedding.py @@ -1,4 +1,4 @@ -from typing import Any, Iterable, Optional, Sequence, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Sequence, Type, Union import numpy as np @@ -16,6 +16,7 @@ "license": "mit", "size_in_GB": 0.42, "sources": { + "hf": "Qdrant/fast-bge-base-en", "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz", }, "model_file": "model_optimized.onnx", @@ -50,6 +51,7 @@ "license": "mit", "size_in_GB": 0.13, "sources": { + "hf": "Qdrant/bge-small-en", "url": "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz", }, "model_file": "model_optimized.onnx", @@ -72,6 +74,7 @@ "license": "mit", "size_in_GB": 0.09, "sources": { + "hf": "Qdrant/bge-small-zh-v1.5", "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz", }, "model_file": "model_optimized.onnx", @@ -165,15 +168,16 @@ "model_file": "onnx/model.onnx", }, { - "model": "jinaai/jina-clip-v1", - "dim": 768, - "description": "Text embeddings, Multimodal (text&image), English, Prefixes for queries/documents: not necessary, 2024 year", - "license": "apache-2.0", - "size_in_GB": 0.55, + "model": "akshayballal/colpali-v1.2-merged", + "dim": 128, + "description": "", + "license": "mit", + "size_in_GB": 6.08, "sources": { - "hf": "jinaai/jina-clip-v1", + "hf": "akshayballal/colpali-v1.2-merged-onnx", }, - "model_file": "onnx/text_model.onnx", + "additional_files": ["model.onnx_data"], + "model_file": "model.onnx", }, ] @@ -182,12 +186,12 @@ class OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[np.ndarray]): """Implementation of the Flag Embedding model.""" @classmethod - def list_supported_models(cls) -> list[dict[str, Any]]: + def list_supported_models(cls) -> List[Dict[str, Any]]: """ Lists the supported models. Returns: - list[dict[str, Any]]: A list of dictionaries containing the model information. + List[Dict[str, Any]]: A list of dictionaries containing the model information. """ return supported_onnx_models @@ -198,7 +202,7 @@ def __init__( threads: Optional[int] = None, providers: Optional[Sequence[OnnxProvider]] = None, cuda: bool = False, - device_ids: Optional[list[int]] = None, + device_ids: Optional[List[int]] = None, lazy_load: bool = False, device_id: Optional[int] = None, **kwargs, @@ -214,7 +218,7 @@ def __init__( Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None. cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers` Defaults to False. - device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in + device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None. lazy_load (bool, optional): Whether to load the model during class initialization or on demand. Should be set to True when using multiple-gpu and parallel encoding. Defaults to False. @@ -287,8 +291,8 @@ def _get_worker_class(cls) -> Type["TextEmbeddingWorker"]: return OnnxTextEmbeddingWorker def _preprocess_onnx_input( - self, onnx_input: dict[str, np.ndarray], **kwargs - ) -> dict[str, np.ndarray]: + self, onnx_input: Dict[str, np.ndarray], **kwargs + ) -> Dict[str, np.ndarray]: """ Preprocess the onnx input. """ @@ -296,13 +300,7 @@ def _preprocess_onnx_input( def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.ndarray]: embeddings = output.model_output - if embeddings.ndim == 3: # (batch_size, seq_len, embedding_dim) - processed_embeddings = embeddings[:, 0] - elif embeddings.ndim == 2: # (batch_size, embedding_dim) - processed_embeddings = embeddings - else: - raise ValueError(f"Unsupported embedding shape: {embeddings.shape}") - return normalize(processed_embeddings).astype(np.float32) + return normalize(embeddings[:, 0]).astype(np.float32) def load_onnx_model(self) -> None: self._load_onnx_model( From b58e9399aee08b47d25dcc444245e4411fd154a5 Mon Sep 17 00:00:00 2001 From: "d.rudenko" Date: Tue, 24 Dec 2024 14:28:14 +0100 Subject: [PATCH 3/3] Merge conflict fix --- fastembed/text/onnx_embedding.py | 37 ++++++++++++++++++------------- fastembed/text/onnx_text_model.py | 5 ----- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py index 410c4a8d..48b93dae 100644 --- a/fastembed/text/onnx_embedding.py +++ b/fastembed/text/onnx_embedding.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Iterable, List, Optional, Sequence, Type, Union +from typing import Any, Iterable, Optional, Sequence, Type, Union import numpy as np @@ -168,16 +168,15 @@ "model_file": "onnx/model.onnx", }, { - "model": "akshayballal/colpali-v1.2-merged", - "dim": 128, - "description": "", - "license": "mit", - "size_in_GB": 6.08, + "model": "jinaai/jina-clip-v1", + "dim": 768, + "description": "Text embeddings, Multimodal (text&image), English, Prefixes for queries/documents: not necessary, 2024 year", + "license": "apache-2.0", + "size_in_GB": 0.55, "sources": { - "hf": "akshayballal/colpali-v1.2-merged-onnx", + "hf": "jinaai/jina-clip-v1", }, - "additional_files": ["model.onnx_data"], - "model_file": "model.onnx", + "model_file": "onnx/text_model.onnx", }, ] @@ -186,12 +185,12 @@ class OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[np.ndarray]): """Implementation of the Flag Embedding model.""" @classmethod - def list_supported_models(cls) -> List[Dict[str, Any]]: + def list_supported_models(cls) -> list[dict[str, Any]]: """ Lists the supported models. Returns: - List[Dict[str, Any]]: A list of dictionaries containing the model information. + list[dict[str, Any]]: A list of dictionaries containing the model information. """ return supported_onnx_models @@ -202,7 +201,7 @@ def __init__( threads: Optional[int] = None, providers: Optional[Sequence[OnnxProvider]] = None, cuda: bool = False, - device_ids: Optional[List[int]] = None, + device_ids: Optional[list[int]] = None, lazy_load: bool = False, device_id: Optional[int] = None, **kwargs, @@ -218,7 +217,7 @@ def __init__( Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None. cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers` Defaults to False. - device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in + device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None. lazy_load (bool, optional): Whether to load the model during class initialization or on demand. Should be set to True when using multiple-gpu and parallel encoding. Defaults to False. @@ -291,8 +290,8 @@ def _get_worker_class(cls) -> Type["TextEmbeddingWorker"]: return OnnxTextEmbeddingWorker def _preprocess_onnx_input( - self, onnx_input: Dict[str, np.ndarray], **kwargs - ) -> Dict[str, np.ndarray]: + self, onnx_input: dict[str, np.ndarray], **kwargs + ) -> dict[str, np.ndarray]: """ Preprocess the onnx input. """ @@ -300,7 +299,13 @@ def _preprocess_onnx_input( def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.ndarray]: embeddings = output.model_output - return normalize(embeddings[:, 0]).astype(np.float32) + if embeddings.ndim == 3: # (batch_size, seq_len, embedding_dim) + processed_embeddings = embeddings[:, 0] + elif embeddings.ndim == 2: # (batch_size, embedding_dim) + processed_embeddings = embeddings + else: + raise ValueError(f"Unsupported embedding shape: {embeddings.shape}") + return normalize(processed_embeddings).astype(np.float32) def load_onnx_model(self) -> None: self._load_onnx_model( diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py index d0127024..ba3e1516 100644 --- a/fastembed/text/onnx_text_model.py +++ b/fastembed/text/onnx_text_model.py @@ -79,11 +79,6 @@ def onnx_embed( onnx_input["token_type_ids"] = np.array( [np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64 ) - if "pixel_values" in input_names: - onnx_input["pixel_values"] = np.zeros( - (np.array(input_ids, dtype=np.int64).shape[0], 3, 448, 448), dtype=np.float32 - ) - onnx_input = self._preprocess_onnx_input(onnx_input, **kwargs) model_output = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input)