From 26c5a24ab2200170917487357810722274647468 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:33:47 +0100 Subject: [PATCH 1/3] build(deps): bump pydantic from 2.10.2 to 2.10.3 (#1173) Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.10.2 to 2.10.3. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.10.2...v2.10.3) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sebastian Niehus <165138846+SebastianNiehusAA@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index b38d99a2..e841859e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4160,13 +4160,13 @@ files = [ [[package]] name = "pydantic" -version = "2.10.2" +version = "2.10.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.10.2-py3-none-any.whl", hash = "sha256:cfb96e45951117c3024e6b67b25cdc33a3cb7b2fa62e239f7af1378358a1d99e"}, - {file = "pydantic-2.10.2.tar.gz", hash = "sha256:2bc2d7f17232e0841cbba4641e65ba1eb6fafb3a08de3a091ff3ce14a197c4fa"}, + {file = "pydantic-2.10.3-py3-none-any.whl", hash = "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d"}, + {file = "pydantic-2.10.3.tar.gz", hash = "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9"}, ] [package.dependencies] From 44805cc2b04707609501bb2a8f365c8c471c1f61 Mon Sep 17 00:00:00 2001 From: Michael Barlow <25936840+Michael-JB@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:08:38 +0100 Subject: [PATCH 2/3] fix: allow negative min score in SearchQuery (#1184) * fix: allow negative min score in SearchQuery Update the `SearchQuery` model in `document_index.py` to allow a `min_score` between -1 and 1. * Fix: Change model version in elo_qa_eval.ipynb - Fix typo --------- Co-authored-by: Sebastian Niehus --- CHANGELOG.md | 2 +- src/documentation/elo_qa_eval.ipynb | 2 +- .../connectors/document_index/document_index.py | 9 ++++----- src/intelligence_layer/core/model.py | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e34ffeef..5d11a94d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ - Add method `DocumentIndexClient.chunks()` for retrieving all text chunks of a document. ### Fixes -... +- The Document Index `SearchQuery` now correctly allows searches with a negative `min_score`. ### Deprecations ... diff --git a/src/documentation/elo_qa_eval.ipynb b/src/documentation/elo_qa_eval.ipynb index ef2b2c55..4b71af6f 100644 --- a/src/documentation/elo_qa_eval.ipynb +++ b/src/documentation/elo_qa_eval.ipynb @@ -448,7 +448,7 @@ "outputs": [], "source": [ "newly_added_models = [\n", - " Llama3InstructModel(name=\"llama-3.1-70b-instruct\", client=aa_client),\n", + " Llama3InstructModel(name=\"llama-3.3-70b-instruct\", client=aa_client),\n", "]\n", "\n", "for model in newly_added_models:\n", diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py index 6c160d17..0afbd1c3 100644 --- a/src/intelligence_layer/connectors/document_index/document_index.py +++ b/src/intelligence_layer/connectors/document_index/document_index.py @@ -293,16 +293,15 @@ class SearchQuery(BaseModel): query: Actual text to be searched with. max_results: Max number of search results to be retrieved by the query. Must be larger than 0. - min_score: Filter out results with a similarity score below this value. - Must be between 0 and 1. - For searches on hybrid indexes, the Document Index applies the min_score - to the semantic results before fusion of result sets. As fusion re-scores results, + min_score: Filter out results with a similarity score below this value. Must be between + -1 and 1. For searches on hybrid indexes, the Document Index applies the min_score to + the semantic results before fusion of result sets. As fusion re-scores results, returned scores may exceed this value. """ query: str max_results: int = Field(ge=0, default=1) - min_score: float = Field(ge=0.0, le=1.0, default=0.0) + min_score: float = Field(ge=-1.0, le=1.0, default=0.0) filters: Optional[list[Filters]] = None diff --git a/src/intelligence_layer/core/model.py b/src/intelligence_layer/core/model.py index 3c691808..c1612174 100644 --- a/src/intelligence_layer/core/model.py +++ b/src/intelligence_layer/core/model.py @@ -261,7 +261,7 @@ def __init__( ) if name not in [model["name"] for model in self._client.models()]: warnings.warn( - "The provided model is not a recommended model for this model class." + "The provided model is not a recommended model for this model class. " "Make sure that the model you have selected is suited to be use for the prompt template used in this model class." ) self._complete: Task[CompleteInput, CompleteOutput] = _Complete( @@ -414,7 +414,7 @@ def __init__( ) -> None: if name not in self.RECOMMENDED_MODELS or name == "": warnings.warn( - "The provided model is not a recommended model for this model class." + "The provided model is not a recommended model for this model class. " "Make sure that the model you have selected is suited to be use for the prompt template used in this model class." ) super().__init__(name, client) From 61d06608bd322a07e1af2ea78b630d26d3f2beeb Mon Sep 17 00:00:00 2001 From: Patrice Billaut <57354406+pbillaut@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:28:36 +0100 Subject: [PATCH 3/3] feat(document-index): introduce `is_null` filter (#1183) * feat(document-index): introduce filter `is_null` * style: fix formatting * chore: add release notes --------- Co-authored-by: Sebastian Niehus <165138846+SebastianNiehusAA@users.noreply.github.com> --- CHANGELOG.md | 3 + .../document_index/document_index.py | 1 + tests/conftest_document_index.py | 7 ++ .../document_index/test_document_index.py | 75 +++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d11a94d..329bdbe6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,10 @@ - Add `create_project` bool to `StudioClient.__init__()` to enable users to automatically create their Studio projects - Add progressbar to the `Runner` to be able to track the `Run` - Add `StudioClient.submit_benchmark_lineages` function and include it in `StudioClient.submit_benchmark_execution` + +#### DocumentIndexClient - Add method `DocumentIndexClient.chunks()` for retrieving all text chunks of a document. +- Add metadata filter `FilterOps.IS_NULL`, that allows to filter fields based on whether their value is null. ### Fixes - The Document Index `SearchQuery` now correctly allows searches with a negative `min_score`. diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py index 0afbd1c3..cbe27d5e 100644 --- a/src/intelligence_layer/connectors/document_index/document_index.py +++ b/src/intelligence_layer/connectors/document_index/document_index.py @@ -227,6 +227,7 @@ class FilterOps(Enum): BEFORE = "before" AT_OR_BEFORE = "at_or_before" EQUAL_TO = "equal_to" + IS_NULL = "is_null" class FilterField(BaseModel): diff --git a/tests/conftest_document_index.py b/tests/conftest_document_index.py index 9950fe7e..3bc18cbf 100644 --- a/tests/conftest_document_index.py +++ b/tests/conftest_document_index.py @@ -158,6 +158,7 @@ def document_contents_with_metadata() -> list[DocumentContents]: metadata_1: JsonSerializable = { "string-field": "example_string_1", + "option-field": None, "integer-field": 123, "float-field": 123.45, "boolean-field": True, @@ -168,6 +169,7 @@ def document_contents_with_metadata() -> list[DocumentContents]: metadata_2: JsonSerializable = { "string-field": "example_string_2", + "option-field": "example_string_2", "integer-field": 456, "float-field": 678.90, "boolean-field": False, @@ -178,6 +180,7 @@ def document_contents_with_metadata() -> list[DocumentContents]: metadata_3: JsonSerializable = { "string-field": "example_string_3", + "option-field": "example_string_3", "integer-field": 789, "float-field": 101112.13, "boolean-field": True, @@ -237,6 +240,10 @@ def filter_index_configs( "field-name": "string-field", "field-type": "string", }, + random_identifier(): { + "field-name": "option-field", + "field-type": "string", + }, random_identifier(): { "field-name": "integer-field", "field-type": "integer", diff --git a/tests/connectors/document_index/test_document_index.py b/tests/connectors/document_index/test_document_index.py index 4843f8d6..df63c0ea 100644 --- a/tests/connectors/document_index/test_document_index.py +++ b/tests/connectors/document_index/test_document_index.py @@ -408,6 +408,81 @@ def search() -> None: search() +def test_search_with_null_filter( + document_index: DocumentIndexClient, + read_only_populated_collection: tuple[CollectionPath, IndexPath], +) -> None: + search_query = SearchQuery( + query="Pemberton", + max_results=10, + min_score=0.5, + filters=[ + Filters( + filter_type="with", + fields=[ + FilterField( + field_name="option-field", + field_value=True, + criteria=FilterOps.IS_NULL, + ) + ], + ) + ], + ) + + @retry + def search() -> None: + collection_path, index_path = read_only_populated_collection + results = document_index.search( + collection_path, + index_path.index, + search_query, + ) + assert len(results) == 1 + assert results[0].document_path.document_name == "document-0" + + search() + + +def test_search_with_null_filter_without( + document_index: DocumentIndexClient, + read_only_populated_collection: tuple[CollectionPath, IndexPath], +) -> None: + search_query = SearchQuery( + query="Pemberton", + max_results=10, + min_score=0.5, + filters=[ + Filters( + filter_type="without", + fields=[ + FilterField( + field_name="option-field", + field_value=True, + criteria=FilterOps.IS_NULL, + ) + ], + ) + ], + ) + + @retry + def search() -> None: + collection_path, index_path = read_only_populated_collection + results = document_index.search( + collection_path, + index_path.index, + search_query, + ) + assert len(results) == 2 + assert {r.document_path.document_name for r in results} == { + "document-1", + "document-2", + } + + search() + + def test_search_with_integer_filter( document_index: DocumentIndexClient, read_only_populated_collection: tuple[CollectionPath, IndexPath],