-
Notifications
You must be signed in to change notification settings - Fork 773
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Thanks to @huangrpablo and @juliuslipp we now have a mixedbread.ai embedder!
- Loading branch information
1 parent
affd997
commit ddba928
Showing
14 changed files
with
13,948 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
-c ../deps/constraints.txt | ||
-c ../base.txt | ||
mixedbread-ai |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# | ||
# This file is autogenerated by pip-compile with Python 3.9 | ||
# by the following command: | ||
# | ||
# pip-compile ./ingest/embed-mixedbreadai.in | ||
# | ||
annotated-types==0.7.0 | ||
# via pydantic | ||
anyio==4.4.0 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# httpx | ||
certifi==2024.7.4 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# -c ./ingest/../deps/constraints.txt | ||
# httpcore | ||
# httpx | ||
exceptiongroup==1.2.2 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# anyio | ||
h11==0.14.0 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# httpcore | ||
httpcore==1.0.5 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# httpx | ||
httpx==0.27.0 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# mixedbread-ai | ||
idna==3.8 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# anyio | ||
# httpx | ||
mixedbread-ai==2.2.6 | ||
# via -r ./ingest/embed-mixedbreadai.in | ||
pydantic==2.8.2 | ||
# via mixedbread-ai | ||
pydantic-core==2.20.1 | ||
# via pydantic | ||
sniffio==1.3.1 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# anyio | ||
# httpx | ||
typing-extensions==4.12.2 | ||
# via | ||
# -c ./ingest/../base.txt | ||
# anyio | ||
# mixedbread-ai | ||
# pydantic | ||
# pydantic-core |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from unstructured.documents.elements import Text | ||
from unstructured.embed.mixedbreadai import ( | ||
MixedbreadAIEmbeddingConfig, | ||
MixedbreadAIEmbeddingEncoder, | ||
) | ||
|
||
|
||
def test_embed_documents_does_not_break_element_to_dict(mocker): | ||
mock_client = mocker.MagicMock() | ||
|
||
def mock_embeddings( | ||
model, | ||
normalized, | ||
encoding_format, | ||
truncation_strategy, | ||
request_options, | ||
input, | ||
): | ||
mock_response = mocker.MagicMock() | ||
mock_response.data = [mocker.MagicMock(embedding=[i, i + 1]) for i in range(len(input))] | ||
return mock_response | ||
|
||
mock_client.embeddings.side_effect = mock_embeddings | ||
|
||
# Mock create_client to return our mock_client | ||
mocker.patch.object(MixedbreadAIEmbeddingEncoder, "create_client", return_value=mock_client) | ||
|
||
encoder = MixedbreadAIEmbeddingEncoder( | ||
config=MixedbreadAIEmbeddingConfig( | ||
api_key="api_key", model_name="mixedbread-ai/mxbai-embed-large-v1" | ||
) | ||
) | ||
|
||
elements = encoder.embed_documents( | ||
elements=[Text("This is sentence 1"), Text("This is sentence 2")], | ||
) | ||
assert len(elements) == 2 | ||
assert elements[0].to_dict()["text"] == "This is sentence 1" | ||
assert elements[1].to_dict()["text"] == "This is sentence 2" | ||
assert elements[0].embeddings is not None | ||
assert elements[1].embeddings is not None |
Oops, something went wrong.