From b5fd74026193fde0edd713c296dc0a7bfec97a25 Mon Sep 17 00:00:00 2001 From: Alex Keeler Date: Wed, 28 Aug 2024 13:29:15 -0400 Subject: [PATCH 1/4] Update knn query spec Signed-off-by: Alex Keeler --- spec/namespaces/_core.yaml | 4 +- spec/schemas/_common.query_dsl.yaml | 8 ++ spec/schemas/_common.yaml | 41 ++------ spec/schemas/_core.msearch.yaml | 4 +- tests/default/_core/search/knn.yaml | 153 ++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 36 deletions(-) create mode 100644 tests/default/_core/search/knn.yaml diff --git a/spec/namespaces/_core.yaml b/spec/namespaces/_core.yaml index 9ba5edb16..f26fb4de6 100644 --- a/spec/namespaces/_core.yaml +++ b/spec/namespaces/_core.yaml @@ -2437,10 +2437,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + - $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' rank: $ref: '../schemas/_common.yaml#/components/schemas/RankContainer' min_score: diff --git a/spec/schemas/_common.query_dsl.yaml b/spec/schemas/_common.query_dsl.yaml index ba5afcc3b..6633c043c 100644 --- a/spec/schemas/_common.query_dsl.yaml +++ b/spec/schemas/_common.query_dsl.yaml @@ -65,6 +65,8 @@ components: $ref: '#/components/schemas/IntervalsQuery' minProperties: 1 maxProperties: 1 + knn: + $ref: '#/components/schemas/KnnQuery' match: description: |- Returns documents that match a provided text, number, date or boolean value. @@ -896,6 +898,12 @@ components: $ref: '_common.yaml#/components/schemas/Field' required: - pattern + KnnQuery: + type: object + additionalProperties: + $ref: '_common.yaml#/components/schemas/KnnField' + minProperties: 1 + maxProperties: 1 MatchQuery: allOf: - $ref: '#/components/schemas/QueryBase' diff --git a/spec/schemas/_common.yaml b/spec/schemas/_common.yaml index 903c97c04..df0b585e6 100644 --- a/spec/schemas/_common.yaml +++ b/spec/schemas/_common.yaml @@ -591,23 +591,19 @@ components: type: number EmptyObject: type: object - KnnQuery: + KnnField: type: object properties: - field: - $ref: '#/components/schemas/Field' - query_vector: + vector: $ref: '#/components/schemas/QueryVector' - query_vector_builder: - $ref: '#/components/schemas/QueryVectorBuilder' k: description: The final number of nearest neighbors to return as top hits type: number - num_candidates: - description: The number of nearest neighbor candidates to consider per shard + min_score: + description: The minimum similarity score for a neighbor to be considered a hit type: number - boost: - description: Boost value to apply to kNN scores + max_distance: + description: The maximum physical distance in vector space for a neighbor to be considered a hit type: number filter: description: Filters for the kNN search query @@ -616,34 +612,15 @@ components: - type: array items: $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer' - similarity: - description: The minimum similarity for a vector to be considered a match + boost: + description: Boost value to apply to kNN scores type: number required: - - field - - k - - num_candidates + - vector QueryVector: type: array items: type: number - QueryVectorBuilder: - type: object - properties: - text_embedding: - $ref: '#/components/schemas/TextEmbedding' - minProperties: 1 - maxProperties: 1 - TextEmbedding: - type: object - properties: - model_id: - type: string - model_text: - type: string - required: - - model_id - - model_text SlicedScroll: type: object properties: diff --git a/spec/schemas/_core.msearch.yaml b/spec/schemas/_core.msearch.yaml index 23bca730b..06c256943 100644 --- a/spec/schemas/_core.msearch.yaml +++ b/spec/schemas/_core.msearch.yaml @@ -66,10 +66,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '_common.yaml#/components/schemas/KnnQuery' + - $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '_common.yaml#/components/schemas/KnnQuery' + $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' from: description: |- Starting document offset. By default, you cannot page through more than 10,000 diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn.yaml new file mode 100644 index 000000000..b515e822c --- /dev/null +++ b/tests/default/_core/search/knn.yaml @@ -0,0 +1,153 @@ +$schema: ../../../../json_schemas/test_story.schema.yaml + +description: Test search endpoint with knn query. +prologues: + - path: /movies + method: PUT + request: + payload: + settings: + index: + knn: true + mappings: + properties: + director: + type: text + title: + type: text + year: + type: integer + embedding: + type: knn_vector + dimension: 5 + method: + name: hnsw + space_type: l2 + engine: faiss + - path: /movies/_doc + method: POST + parameters: + refresh: true + request: + payload: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + status: [201] +epilogues: + - path: /movies + method: DELETE + status: [200, 404] +chapters: + - synopsis: Search using the k parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + k: 1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using the min_score parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + min_score: 0.9 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using the max_distance parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + max_distance: 0.1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using a filter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + k: 1 + filter: + term: + year: 2012 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 0 + relation: eq + hits: [] \ No newline at end of file From 9b115b584b0319cee9a06d409a115c26bf7ba676 Mon Sep 17 00:00:00 2001 From: Alex Keeler Date: Wed, 28 Aug 2024 15:01:51 -0400 Subject: [PATCH 2/4] Update changelog, add periods to descriptions Signed-off-by: Alex Keeler --- CHANGELOG.md | 1 + spec/schemas/_common.yaml | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8fb01b7e..33921bf47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fixed security spec to add support for 400 and 403s ([#439](https://github.com/opensearch-project/opensearch-api-specification/pull/439)) - Fixed required parameters in `NodeInfo` and `NodeOperatingSystemInfo` ([#483](https://github.com/opensearch-project/opensearch-api-specification/pull/483)) - Fixed query DSL `neural` field `query_image` set `contentEncoding` and `model_id` as optional ([#512](https://github.com/opensearch-project/opensearch-api-specification/pull/512)) +- Fixed `knn` query specification ([#538](https://github.com/opensearch-project/opensearch-api-specification/pull/538)) ### Security diff --git a/spec/schemas/_common.yaml b/spec/schemas/_common.yaml index df0b585e6..e97ac24df 100644 --- a/spec/schemas/_common.yaml +++ b/spec/schemas/_common.yaml @@ -597,16 +597,18 @@ components: vector: $ref: '#/components/schemas/QueryVector' k: - description: The final number of nearest neighbors to return as top hits + description: The final number of nearest neighbors to return as top hits. type: number min_score: - description: The minimum similarity score for a neighbor to be considered a hit + description: The minimum similarity score for a neighbor to be considered a hit. type: number + x-version-added: '2.14' max_distance: - description: The maximum physical distance in vector space for a neighbor to be considered a hit + description: The maximum physical distance in vector space for a neighbor to be considered a hit. type: number + x-version-added: '2.14' filter: - description: Filters for the kNN search query + description: Filters for the kNN search query. oneOf: - $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer' - type: array From 1779bb6c85b2a4ef85f3d4408e3094237215e027 Mon Sep 17 00:00:00 2001 From: Alex Keeler Date: Wed, 28 Aug 2024 15:44:58 -0400 Subject: [PATCH 3/4] Add version restriction, lint fix to knn test Signed-off-by: Alex Keeler --- tests/default/_core/search/knn.yaml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn.yaml index b515e822c..31f918bcd 100644 --- a/tests/default/_core/search/knn.yaml +++ b/tests/default/_core/search/knn.yaml @@ -33,7 +33,7 @@ prologues: director: Bennett Miller title: Moneyball year: 2011 - embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] status: [201] epilogues: - path: /movies @@ -50,7 +50,7 @@ chapters: query: knn: embedding: - vector: [1.4, 3.5, 2.3, 4.1, 9.2] + vector: [1.4, 2.3, 3.5, 4.1, 9.2] k: 1 response: status: 200 @@ -67,9 +67,10 @@ chapters: director: Bennett Miller title: Moneyball year: 2011 - embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] - synopsis: Search using the min_score parameter. + version: '>= 2.14' path: /{index}/_search parameters: index: movies @@ -79,7 +80,7 @@ chapters: query: knn: embedding: - vector: [1.4, 3.5, 2.3, 4.1, 9.2] + vector: [1.4, 2.3, 3.5, 4.1, 9.2] min_score: 0.9 response: status: 200 @@ -96,9 +97,10 @@ chapters: director: Bennett Miller title: Moneyball year: 2011 - embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] - synopsis: Search using the max_distance parameter. + version: '>= 2.14' path: /{index}/_search parameters: index: movies @@ -108,7 +110,7 @@ chapters: query: knn: embedding: - vector: [1.4, 3.5, 2.3, 4.1, 9.2] + vector: [1.4,2.3, 3.5, 4.1, 9.2] max_distance: 0.1 response: status: 200 @@ -125,7 +127,7 @@ chapters: director: Bennett Miller title: Moneyball year: 2011 - embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] - synopsis: Search using a filter. path: /{index}/_search @@ -137,7 +139,7 @@ chapters: query: knn: embedding: - vector: [1.4, 3.5, 2.3, 4.1, 9.2] + vector: [1.4, 2.3, 3.5, 4.1, 9.2] k: 1 filter: term: From 7f5b7b8b5a1de95ec7c18a04557959f74ad4421d Mon Sep 17 00:00:00 2001 From: Alex Keeler Date: Thu, 29 Aug 2024 08:45:27 -0400 Subject: [PATCH 4/4] Add version constraint for knn filter Signed-off-by: Alex Keeler --- tests/default/_core/search/knn.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn.yaml index 31f918bcd..6fcfb12e6 100644 --- a/tests/default/_core/search/knn.yaml +++ b/tests/default/_core/search/knn.yaml @@ -1,6 +1,7 @@ $schema: ../../../../json_schemas/test_story.schema.yaml description: Test search endpoint with knn query. +version: '>= 1.2' prologues: - path: /movies method: PUT @@ -130,6 +131,7 @@ chapters: embedding: [1.4, 2.3, 3.5, 4.1, 9.2] - synopsis: Search using a filter. + version: '>= 2.9' path: /{index}/_search parameters: index: movies