From 26c5a24ab2200170917487357810722274647468 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Dec 2024 09:33:47 +0100
Subject: [PATCH 1/3] build(deps): bump pydantic from 2.10.2 to 2.10.3 (#1173)

Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.10.2 to 2.10.3.
- [Release notes](https://github.com/pydantic/pydantic/releases)
- [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md)
- [Commits](https://github.com/pydantic/pydantic/compare/v2.10.2...v2.10.3)

---
updated-dependencies:
- dependency-name: pydantic
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Sebastian Niehus <165138846+SebastianNiehusAA@users.noreply.github.com>
---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index b38d99a2..e841859e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4160,13 +4160,13 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.10.2"
+version = "2.10.3"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.10.2-py3-none-any.whl", hash = "sha256:cfb96e45951117c3024e6b67b25cdc33a3cb7b2fa62e239f7af1378358a1d99e"},
-    {file = "pydantic-2.10.2.tar.gz", hash = "sha256:2bc2d7f17232e0841cbba4641e65ba1eb6fafb3a08de3a091ff3ce14a197c4fa"},
+    {file = "pydantic-2.10.3-py3-none-any.whl", hash = "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d"},
+    {file = "pydantic-2.10.3.tar.gz", hash = "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9"},
 ]
 
 [package.dependencies]

From 44805cc2b04707609501bb2a8f365c8c471c1f61 Mon Sep 17 00:00:00 2001
From: Michael Barlow <25936840+Michael-JB@users.noreply.github.com>
Date: Tue, 17 Dec 2024 16:08:38 +0100
Subject: [PATCH 2/3] fix: allow negative min score in SearchQuery (#1184)

* fix: allow negative min score in SearchQuery

Update the `SearchQuery` model in `document_index.py` to allow a
`min_score` between -1 and 1.

* Fix: Change model version in elo_qa_eval.ipynb
 - Fix typo

---------

Co-authored-by: Sebastian Niehus <sebastian.niehus@ext.aleph-alpha.com>
---
 CHANGELOG.md                                             | 2 +-
 src/documentation/elo_qa_eval.ipynb                      | 2 +-
 .../connectors/document_index/document_index.py          | 9 ++++-----
 src/intelligence_layer/core/model.py                     | 4 ++--
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e34ffeef..5d11a94d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@
 - Add method `DocumentIndexClient.chunks()` for retrieving all text chunks of a document.
 
 ### Fixes
-...
+- The Document Index `SearchQuery` now correctly allows searches with a negative `min_score`.
 
 ### Deprecations
 ...
diff --git a/src/documentation/elo_qa_eval.ipynb b/src/documentation/elo_qa_eval.ipynb
index ef2b2c55..4b71af6f 100644
--- a/src/documentation/elo_qa_eval.ipynb
+++ b/src/documentation/elo_qa_eval.ipynb
@@ -448,7 +448,7 @@
    "outputs": [],
    "source": [
     "newly_added_models = [\n",
-    "    Llama3InstructModel(name=\"llama-3.1-70b-instruct\", client=aa_client),\n",
+    "    Llama3InstructModel(name=\"llama-3.3-70b-instruct\", client=aa_client),\n",
     "]\n",
     "\n",
     "for model in newly_added_models:\n",
diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py
index 6c160d17..0afbd1c3 100644
--- a/src/intelligence_layer/connectors/document_index/document_index.py
+++ b/src/intelligence_layer/connectors/document_index/document_index.py
@@ -293,16 +293,15 @@ class SearchQuery(BaseModel):
         query: Actual text to be searched with.
         max_results: Max number of search results to be retrieved by the query.
             Must be larger than 0.
-        min_score: Filter out results with a similarity score below this value.
-            Must be between 0 and 1.
-            For searches on hybrid indexes, the Document Index applies the min_score
-            to the semantic results before fusion of result sets. As fusion re-scores results,
+        min_score: Filter out results with a similarity score below this value. Must be between
+            -1 and 1. For searches on hybrid indexes, the Document Index applies the min_score to
+            the semantic results before fusion of result sets. As fusion re-scores results,
             returned scores may exceed this value.
     """
 
     query: str
     max_results: int = Field(ge=0, default=1)
-    min_score: float = Field(ge=0.0, le=1.0, default=0.0)
+    min_score: float = Field(ge=-1.0, le=1.0, default=0.0)
     filters: Optional[list[Filters]] = None
 
 
diff --git a/src/intelligence_layer/core/model.py b/src/intelligence_layer/core/model.py
index 3c691808..c1612174 100644
--- a/src/intelligence_layer/core/model.py
+++ b/src/intelligence_layer/core/model.py
@@ -261,7 +261,7 @@ def __init__(
         )
         if name not in [model["name"] for model in self._client.models()]:
             warnings.warn(
-                "The provided model is not a recommended model for this model class."
+                "The provided model is not a recommended model for this model class. "
                 "Make sure that the model you have selected is suited to be use for the prompt template used in this model class."
             )
         self._complete: Task[CompleteInput, CompleteOutput] = _Complete(
@@ -414,7 +414,7 @@ def __init__(
     ) -> None:
         if name not in self.RECOMMENDED_MODELS or name == "":
             warnings.warn(
-                "The provided model is not a recommended model for this model class."
+                "The provided model is not a recommended model for this model class. "
                 "Make sure that the model you have selected is suited to be use for the prompt template used in this model class."
             )
         super().__init__(name, client)

From 61d06608bd322a07e1af2ea78b630d26d3f2beeb Mon Sep 17 00:00:00 2001
From: Patrice Billaut <57354406+pbillaut@users.noreply.github.com>
Date: Tue, 17 Dec 2024 16:28:36 +0100
Subject: [PATCH 3/3] feat(document-index): introduce `is_null` filter (#1183)

* feat(document-index): introduce filter `is_null`

* style: fix formatting

* chore: add release notes

---------

Co-authored-by: Sebastian Niehus <165138846+SebastianNiehusAA@users.noreply.github.com>
---
 CHANGELOG.md                                  |  3 +
 .../document_index/document_index.py          |  1 +
 tests/conftest_document_index.py              |  7 ++
 .../document_index/test_document_index.py     | 75 +++++++++++++++++++
 4 files changed, 86 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d11a94d..329bdbe6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,10 @@
 - Add `create_project` bool to `StudioClient.__init__()` to enable users to automatically create their Studio projects
 - Add progressbar to the `Runner` to be able to track the `Run`
 - Add `StudioClient.submit_benchmark_lineages` function and include it in `StudioClient.submit_benchmark_execution`
+
+#### DocumentIndexClient 
 - Add method `DocumentIndexClient.chunks()` for retrieving all text chunks of a document.
+- Add metadata filter `FilterOps.IS_NULL`, that allows to filter fields based on whether their value is null.
 
 ### Fixes
 - The Document Index `SearchQuery` now correctly allows searches with a negative `min_score`.
diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py
index 0afbd1c3..cbe27d5e 100644
--- a/src/intelligence_layer/connectors/document_index/document_index.py
+++ b/src/intelligence_layer/connectors/document_index/document_index.py
@@ -227,6 +227,7 @@ class FilterOps(Enum):
     BEFORE = "before"
     AT_OR_BEFORE = "at_or_before"
     EQUAL_TO = "equal_to"
+    IS_NULL = "is_null"
 
 
 class FilterField(BaseModel):
diff --git a/tests/conftest_document_index.py b/tests/conftest_document_index.py
index 9950fe7e..3bc18cbf 100644
--- a/tests/conftest_document_index.py
+++ b/tests/conftest_document_index.py
@@ -158,6 +158,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:
 
     metadata_1: JsonSerializable = {
         "string-field": "example_string_1",
+        "option-field": None,
         "integer-field": 123,
         "float-field": 123.45,
         "boolean-field": True,
@@ -168,6 +169,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:
 
     metadata_2: JsonSerializable = {
         "string-field": "example_string_2",
+        "option-field": "example_string_2",
         "integer-field": 456,
         "float-field": 678.90,
         "boolean-field": False,
@@ -178,6 +180,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:
 
     metadata_3: JsonSerializable = {
         "string-field": "example_string_3",
+        "option-field": "example_string_3",
         "integer-field": 789,
         "float-field": 101112.13,
         "boolean-field": True,
@@ -237,6 +240,10 @@ def filter_index_configs(
             "field-name": "string-field",
             "field-type": "string",
         },
+        random_identifier(): {
+            "field-name": "option-field",
+            "field-type": "string",
+        },
         random_identifier(): {
             "field-name": "integer-field",
             "field-type": "integer",
diff --git a/tests/connectors/document_index/test_document_index.py b/tests/connectors/document_index/test_document_index.py
index 4843f8d6..df63c0ea 100644
--- a/tests/connectors/document_index/test_document_index.py
+++ b/tests/connectors/document_index/test_document_index.py
@@ -408,6 +408,81 @@ def search() -> None:
     search()
 
 
+def test_search_with_null_filter(
+    document_index: DocumentIndexClient,
+    read_only_populated_collection: tuple[CollectionPath, IndexPath],
+) -> None:
+    search_query = SearchQuery(
+        query="Pemberton",
+        max_results=10,
+        min_score=0.5,
+        filters=[
+            Filters(
+                filter_type="with",
+                fields=[
+                    FilterField(
+                        field_name="option-field",
+                        field_value=True,
+                        criteria=FilterOps.IS_NULL,
+                    )
+                ],
+            )
+        ],
+    )
+
+    @retry
+    def search() -> None:
+        collection_path, index_path = read_only_populated_collection
+        results = document_index.search(
+            collection_path,
+            index_path.index,
+            search_query,
+        )
+        assert len(results) == 1
+        assert results[0].document_path.document_name == "document-0"
+
+    search()
+
+
+def test_search_with_null_filter_without(
+    document_index: DocumentIndexClient,
+    read_only_populated_collection: tuple[CollectionPath, IndexPath],
+) -> None:
+    search_query = SearchQuery(
+        query="Pemberton",
+        max_results=10,
+        min_score=0.5,
+        filters=[
+            Filters(
+                filter_type="without",
+                fields=[
+                    FilterField(
+                        field_name="option-field",
+                        field_value=True,
+                        criteria=FilterOps.IS_NULL,
+                    )
+                ],
+            )
+        ],
+    )
+
+    @retry
+    def search() -> None:
+        collection_path, index_path = read_only_populated_collection
+        results = document_index.search(
+            collection_path,
+            index_path.index,
+            search_query,
+        )
+        assert len(results) == 2
+        assert {r.document_path.document_name for r in results} == {
+            "document-1",
+            "document-2",
+        }
+
+    search()
+
+
 def test_search_with_integer_filter(
     document_index: DocumentIndexClient,
     read_only_populated_collection: tuple[CollectionPath, IndexPath],