From 14d819bdec6a45fe3372464ad87b49ec20c5bf83 Mon Sep 17 00:00:00 2001 From: alex-keeler <59743435+alex-keeler@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:34:26 -0400 Subject: [PATCH] Update knn query spec (#538) * Update knn query spec Signed-off-by: Alex Keeler * Update changelog, add periods to descriptions Signed-off-by: Alex Keeler * Add version restriction, lint fix to knn test Signed-off-by: Alex Keeler * Add version constraint for knn filter Signed-off-by: Alex Keeler --------- Signed-off-by: Alex Keeler --- CHANGELOG.md | 1 + spec/namespaces/_core.yaml | 4 +- spec/schemas/_common.query_dsl.yaml | 8 ++ spec/schemas/_common.yaml | 47 +++------ spec/schemas/_core.msearch.yaml | 4 +- tests/default/_core/search/knn.yaml | 157 ++++++++++++++++++++++++++++ 6 files changed, 183 insertions(+), 38 deletions(-) create mode 100644 tests/default/_core/search/knn.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index b8fb01b7e..33921bf47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fixed security spec to add support for 400 and 403s ([#439](https://github.com/opensearch-project/opensearch-api-specification/pull/439)) - Fixed required parameters in `NodeInfo` and `NodeOperatingSystemInfo` ([#483](https://github.com/opensearch-project/opensearch-api-specification/pull/483)) - Fixed query DSL `neural` field `query_image` set `contentEncoding` and `model_id` as optional ([#512](https://github.com/opensearch-project/opensearch-api-specification/pull/512)) +- Fixed `knn` query specification ([#538](https://github.com/opensearch-project/opensearch-api-specification/pull/538)) ### Security diff --git a/spec/namespaces/_core.yaml b/spec/namespaces/_core.yaml index 9ba5edb16..f26fb4de6 100644 --- a/spec/namespaces/_core.yaml +++ b/spec/namespaces/_core.yaml @@ -2437,10 +2437,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + - $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' rank: $ref: '../schemas/_common.yaml#/components/schemas/RankContainer' min_score: diff --git a/spec/schemas/_common.query_dsl.yaml b/spec/schemas/_common.query_dsl.yaml index ba5afcc3b..6633c043c 100644 --- a/spec/schemas/_common.query_dsl.yaml +++ b/spec/schemas/_common.query_dsl.yaml @@ -65,6 +65,8 @@ components: $ref: '#/components/schemas/IntervalsQuery' minProperties: 1 maxProperties: 1 + knn: + $ref: '#/components/schemas/KnnQuery' match: description: |- Returns documents that match a provided text, number, date or boolean value. @@ -896,6 +898,12 @@ components: $ref: '_common.yaml#/components/schemas/Field' required: - pattern + KnnQuery: + type: object + additionalProperties: + $ref: '_common.yaml#/components/schemas/KnnField' + minProperties: 1 + maxProperties: 1 MatchQuery: allOf: - $ref: '#/components/schemas/QueryBase' diff --git a/spec/schemas/_common.yaml b/spec/schemas/_common.yaml index 903c97c04..e97ac24df 100644 --- a/spec/schemas/_common.yaml +++ b/spec/schemas/_common.yaml @@ -591,59 +591,38 @@ components: type: number EmptyObject: type: object - KnnQuery: + KnnField: type: object properties: - field: - $ref: '#/components/schemas/Field' - query_vector: + vector: $ref: '#/components/schemas/QueryVector' - query_vector_builder: - $ref: '#/components/schemas/QueryVectorBuilder' k: - description: The final number of nearest neighbors to return as top hits + description: The final number of nearest neighbors to return as top hits. type: number - num_candidates: - description: The number of nearest neighbor candidates to consider per shard + min_score: + description: The minimum similarity score for a neighbor to be considered a hit. type: number - boost: - description: Boost value to apply to kNN scores + x-version-added: '2.14' + max_distance: + description: The maximum physical distance in vector space for a neighbor to be considered a hit. type: number + x-version-added: '2.14' filter: - description: Filters for the kNN search query + description: Filters for the kNN search query. oneOf: - $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer' - type: array items: $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer' - similarity: - description: The minimum similarity for a vector to be considered a match + boost: + description: Boost value to apply to kNN scores type: number required: - - field - - k - - num_candidates + - vector QueryVector: type: array items: type: number - QueryVectorBuilder: - type: object - properties: - text_embedding: - $ref: '#/components/schemas/TextEmbedding' - minProperties: 1 - maxProperties: 1 - TextEmbedding: - type: object - properties: - model_id: - type: string - model_text: - type: string - required: - - model_id - - model_text SlicedScroll: type: object properties: diff --git a/spec/schemas/_core.msearch.yaml b/spec/schemas/_core.msearch.yaml index 23bca730b..06c256943 100644 --- a/spec/schemas/_core.msearch.yaml +++ b/spec/schemas/_core.msearch.yaml @@ -66,10 +66,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '_common.yaml#/components/schemas/KnnQuery' + - $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '_common.yaml#/components/schemas/KnnQuery' + $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' from: description: |- Starting document offset. By default, you cannot page through more than 10,000 diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn.yaml new file mode 100644 index 000000000..6fcfb12e6 --- /dev/null +++ b/tests/default/_core/search/knn.yaml @@ -0,0 +1,157 @@ +$schema: ../../../../json_schemas/test_story.schema.yaml + +description: Test search endpoint with knn query. +version: '>= 1.2' +prologues: + - path: /movies + method: PUT + request: + payload: + settings: + index: + knn: true + mappings: + properties: + director: + type: text + title: + type: text + year: + type: integer + embedding: + type: knn_vector + dimension: 5 + method: + name: hnsw + space_type: l2 + engine: faiss + - path: /movies/_doc + method: POST + parameters: + refresh: true + request: + payload: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] + status: [201] +epilogues: + - path: /movies + method: DELETE + status: [200, 404] +chapters: + - synopsis: Search using the k parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 2.3, 3.5, 4.1, 9.2] + k: 1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] + + - synopsis: Search using the min_score parameter. + version: '>= 2.14' + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 2.3, 3.5, 4.1, 9.2] + min_score: 0.9 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] + + - synopsis: Search using the max_distance parameter. + version: '>= 2.14' + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4,2.3, 3.5, 4.1, 9.2] + max_distance: 0.1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 2.3, 3.5, 4.1, 9.2] + + - synopsis: Search using a filter. + version: '>= 2.9' + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 2.3, 3.5, 4.1, 9.2] + k: 1 + filter: + term: + year: 2012 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 0 + relation: eq + hits: [] \ No newline at end of file