From 4a1beb61b38360a44542f1bb0c08d8030053664b Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 30 Jan 2024 22:23:53 +0530 Subject: [PATCH] chore: updated embeddings value xenova/multilingual-e5-large --- fastembed/embedding.py | 2 +- tests/test_onnx_embeddings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fastembed/embedding.py b/fastembed/embedding.py index 38e5fa16..fcedac24 100644 --- a/fastembed/embedding.py +++ b/fastembed/embedding.py @@ -115,7 +115,7 @@ def __init__( # Hacky support for multilingual model self.exclude_token_type_ids = False - if model_name in ["intfloat/multilingual-e5-large", "xenova/multilingual-e5-large"]: + if model_name in ["intfloat/multilingual-e5-large", "xenova/multilingual-e5-large", "xenova/paraphrase-multilingual-mpnet-base-v2"]: self.exclude_token_type_ids = True so = ort.SessionOptions() diff --git a/tests/test_onnx_embeddings.py b/tests/test_onnx_embeddings.py index c41d733d..9bdc5008 100644 --- a/tests/test_onnx_embeddings.py +++ b/tests/test_onnx_embeddings.py @@ -14,7 +14,7 @@ "BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]), "sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]), "intfloat/multilingual-e5-large": np.array([0.0098, 0.0045, 0.0066, -0.0354, 0.0070]), - "xenova/multilingual-e5-large": np.array([0.00, 0.00, 0.00, 0.00, 0.00]), + "xenova/multilingual-e5-large": np.array([0.00975464, 0.00446568, 0.00655449, -0.0354155, 0.00702112]), "xenova/paraphrase-multilingual-mpnet-base-v2": np.array([0.00, 0.00, 0.00, 0.00, 0.00]), "jinaai/jina-embeddings-v2-small-en": np.array([-0.0455, -0.0428, -0.0122, 0.0613, 0.0015]), "jinaai/jina-embeddings-v2-base-en": np.array([-0.0332, -0.0509, 0.0287, -0.0043, -0.0077]),