From 4200834a45779b8cb04db0a12269f22112270a2b Mon Sep 17 00:00:00 2001
From: Dmitrii Rudenko <dmitrii.rudenko@qdrant.tech>
Date: Sat, 9 Nov 2024 00:18:30 +0100
Subject: [PATCH 1/3] Merge master

---
 fastembed/text/onnx_text_model.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py
index 95301985..d0127024 100644
--- a/fastembed/text/onnx_text_model.py
+++ b/fastembed/text/onnx_text_model.py
@@ -79,6 +79,10 @@ def onnx_embed(
             onnx_input["token_type_ids"] = np.array(
                 [np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64
             )
+        if "pixel_values" in input_names:
+            onnx_input["pixel_values"] = np.zeros(
+                (np.array(input_ids, dtype=np.int64).shape[0], 3, 448, 448), dtype=np.float32
+            )
 
         onnx_input = self._preprocess_onnx_input(onnx_input, **kwargs)
 

From d44ae27c64a0b74fe6f145e271f2b7c48c8c00f3 Mon Sep 17 00:00:00 2001
From: "d.rudenko" <dmitrii.rudenko@qdrant.com>
Date: Tue, 24 Dec 2024 14:15:35 +0100
Subject: [PATCH 2/3] HF sources for all models

---
 fastembed/text/onnx_embedding.py | 40 +++++++++++++++-----------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
index e4d657c7..410c4a8d 100644
--- a/fastembed/text/onnx_embedding.py
+++ b/fastembed/text/onnx_embedding.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterable, Optional, Sequence, Type, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Type, Union
 
 import numpy as np
 
@@ -16,6 +16,7 @@
         "license": "mit",
         "size_in_GB": 0.42,
         "sources": {
+            "hf": "Qdrant/fast-bge-base-en",
             "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz",
         },
         "model_file": "model_optimized.onnx",
@@ -50,6 +51,7 @@
         "license": "mit",
         "size_in_GB": 0.13,
         "sources": {
+            "hf": "Qdrant/bge-small-en",
             "url": "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz",
         },
         "model_file": "model_optimized.onnx",
@@ -72,6 +74,7 @@
         "license": "mit",
         "size_in_GB": 0.09,
         "sources": {
+            "hf": "Qdrant/bge-small-zh-v1.5",
             "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz",
         },
         "model_file": "model_optimized.onnx",
@@ -165,15 +168,16 @@
         "model_file": "onnx/model.onnx",
     },
     {
-        "model": "jinaai/jina-clip-v1",
-        "dim": 768,
-        "description": "Text embeddings, Multimodal (text&image), English, Prefixes for queries/documents: not necessary, 2024 year",
-        "license": "apache-2.0",
-        "size_in_GB": 0.55,
+        "model": "akshayballal/colpali-v1.2-merged",
+        "dim": 128,
+        "description": "",
+        "license": "mit",
+        "size_in_GB": 6.08,
         "sources": {
-            "hf": "jinaai/jina-clip-v1",
+            "hf": "akshayballal/colpali-v1.2-merged-onnx",
         },
-        "model_file": "onnx/text_model.onnx",
+        "additional_files": ["model.onnx_data"],
+        "model_file": "model.onnx",
     },
 ]
 
@@ -182,12 +186,12 @@ class OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[np.ndarray]):
     """Implementation of the Flag Embedding model."""
 
     @classmethod
-    def list_supported_models(cls) -> list[dict[str, Any]]:
+    def list_supported_models(cls) -> List[Dict[str, Any]]:
         """
         Lists the supported models.
 
         Returns:
-            list[dict[str, Any]]: A list of dictionaries containing the model information.
+            List[Dict[str, Any]]: A list of dictionaries containing the model information.
         """
         return supported_onnx_models
 
@@ -198,7 +202,7 @@ def __init__(
         threads: Optional[int] = None,
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
-        device_ids: Optional[list[int]] = None,
+        device_ids: Optional[List[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
         **kwargs,
@@ -214,7 +218,7 @@ def __init__(
                 Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
             cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
                 Defaults to False.
-            device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
+            device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in
                 workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
@@ -287,8 +291,8 @@ def _get_worker_class(cls) -> Type["TextEmbeddingWorker"]:
         return OnnxTextEmbeddingWorker
 
     def _preprocess_onnx_input(
-        self, onnx_input: dict[str, np.ndarray], **kwargs
-    ) -> dict[str, np.ndarray]:
+        self, onnx_input: Dict[str, np.ndarray], **kwargs
+    ) -> Dict[str, np.ndarray]:
         """
         Preprocess the onnx input.
         """
@@ -296,13 +300,7 @@ def _preprocess_onnx_input(
 
     def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.ndarray]:
         embeddings = output.model_output
-        if embeddings.ndim == 3:  # (batch_size, seq_len, embedding_dim)
-            processed_embeddings = embeddings[:, 0]
-        elif embeddings.ndim == 2:  # (batch_size, embedding_dim)
-            processed_embeddings = embeddings
-        else:
-            raise ValueError(f"Unsupported embedding shape: {embeddings.shape}")
-        return normalize(processed_embeddings).astype(np.float32)
+        return normalize(embeddings[:, 0]).astype(np.float32)
 
     def load_onnx_model(self) -> None:
         self._load_onnx_model(

From b58e9399aee08b47d25dcc444245e4411fd154a5 Mon Sep 17 00:00:00 2001
From: "d.rudenko" <dmitrii.rudenko@qdrant.com>
Date: Tue, 24 Dec 2024 14:28:14 +0100
Subject: [PATCH 3/3] Merge conflict fix

---
 fastembed/text/onnx_embedding.py  | 37 ++++++++++++++++++-------------
 fastembed/text/onnx_text_model.py |  5 -----
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
index 410c4a8d..48b93dae 100644
--- a/fastembed/text/onnx_embedding.py
+++ b/fastembed/text/onnx_embedding.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Type, Union
+from typing import Any, Iterable, Optional, Sequence, Type, Union
 
 import numpy as np
 
@@ -168,16 +168,15 @@
         "model_file": "onnx/model.onnx",
     },
     {
-        "model": "akshayballal/colpali-v1.2-merged",
-        "dim": 128,
-        "description": "",
-        "license": "mit",
-        "size_in_GB": 6.08,
+        "model": "jinaai/jina-clip-v1",
+        "dim": 768,
+        "description": "Text embeddings, Multimodal (text&image), English, Prefixes for queries/documents: not necessary, 2024 year",
+        "license": "apache-2.0",
+        "size_in_GB": 0.55,
         "sources": {
-            "hf": "akshayballal/colpali-v1.2-merged-onnx",
+            "hf": "jinaai/jina-clip-v1",
         },
-        "additional_files": ["model.onnx_data"],
-        "model_file": "model.onnx",
+        "model_file": "onnx/text_model.onnx",
     },
 ]
 
@@ -186,12 +185,12 @@ class OnnxTextEmbedding(TextEmbeddingBase, OnnxTextModel[np.ndarray]):
     """Implementation of the Flag Embedding model."""
 
     @classmethod
-    def list_supported_models(cls) -> List[Dict[str, Any]]:
+    def list_supported_models(cls) -> list[dict[str, Any]]:
         """
         Lists the supported models.
 
         Returns:
-            List[Dict[str, Any]]: A list of dictionaries containing the model information.
+            list[dict[str, Any]]: A list of dictionaries containing the model information.
         """
         return supported_onnx_models
 
@@ -202,7 +201,7 @@ def __init__(
         threads: Optional[int] = None,
         providers: Optional[Sequence[OnnxProvider]] = None,
         cuda: bool = False,
-        device_ids: Optional[List[int]] = None,
+        device_ids: Optional[list[int]] = None,
         lazy_load: bool = False,
         device_id: Optional[int] = None,
         **kwargs,
@@ -218,7 +217,7 @@ def __init__(
                 Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
             cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
                 Defaults to False.
-            device_ids (Optional[List[int]], optional): The list of device ids to use for data parallel processing in
+            device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
                 workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
             lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                 Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
@@ -291,8 +290,8 @@ def _get_worker_class(cls) -> Type["TextEmbeddingWorker"]:
         return OnnxTextEmbeddingWorker
 
     def _preprocess_onnx_input(
-        self, onnx_input: Dict[str, np.ndarray], **kwargs
-    ) -> Dict[str, np.ndarray]:
+        self, onnx_input: dict[str, np.ndarray], **kwargs
+    ) -> dict[str, np.ndarray]:
         """
         Preprocess the onnx input.
         """
@@ -300,7 +299,13 @@ def _preprocess_onnx_input(
 
     def _post_process_onnx_output(self, output: OnnxOutputContext) -> Iterable[np.ndarray]:
         embeddings = output.model_output
-        return normalize(embeddings[:, 0]).astype(np.float32)
+        if embeddings.ndim == 3:  # (batch_size, seq_len, embedding_dim)
+            processed_embeddings = embeddings[:, 0]
+        elif embeddings.ndim == 2:  # (batch_size, embedding_dim)
+            processed_embeddings = embeddings
+        else:
+            raise ValueError(f"Unsupported embedding shape: {embeddings.shape}")
+        return normalize(processed_embeddings).astype(np.float32)
 
     def load_onnx_model(self) -> None:
         self._load_onnx_model(
diff --git a/fastembed/text/onnx_text_model.py b/fastembed/text/onnx_text_model.py
index d0127024..ba3e1516 100644
--- a/fastembed/text/onnx_text_model.py
+++ b/fastembed/text/onnx_text_model.py
@@ -79,11 +79,6 @@ def onnx_embed(
             onnx_input["token_type_ids"] = np.array(
                 [np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64
             )
-        if "pixel_values" in input_names:
-            onnx_input["pixel_values"] = np.zeros(
-                (np.array(input_ids, dtype=np.int64).shape[0], 3, 448, 448), dtype=np.float32
-            )
-
         onnx_input = self._preprocess_onnx_input(onnx_input, **kwargs)
 
         model_output = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input)