Skip to content

Commit

Permalink
Update knn query spec (#538)
Browse files Browse the repository at this point in the history
* Update knn query spec

Signed-off-by: Alex Keeler <[email protected]>

* Update changelog, add periods to descriptions

Signed-off-by: Alex Keeler <[email protected]>

* Add version restriction, lint fix to knn test

Signed-off-by: Alex Keeler <[email protected]>

* Add version constraint for knn filter

Signed-off-by: Alex Keeler <[email protected]>

---------

Signed-off-by: Alex Keeler <[email protected]>
  • Loading branch information
alex-keeler authored Aug 29, 2024
1 parent e553ab1 commit 14d819b
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Fixed security spec to add support for 400 and 403s ([#439](https://github.com/opensearch-project/opensearch-api-specification/pull/439))
- Fixed required parameters in `NodeInfo` and `NodeOperatingSystemInfo` ([#483](https://github.com/opensearch-project/opensearch-api-specification/pull/483))
- Fixed query DSL `neural` field `query_image` set `contentEncoding` and `model_id` as optional ([#512](https://github.com/opensearch-project/opensearch-api-specification/pull/512))
- Fixed `knn` query specification ([#538](https://github.com/opensearch-project/opensearch-api-specification/pull/538))

### Security

Expand Down
4 changes: 2 additions & 2 deletions spec/namespaces/_core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2437,10 +2437,10 @@ components:
knn:
description: Defines the approximate kNN search to run.
oneOf:
- $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery'
- $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery'
- type: array
items:
$ref: '../schemas/_common.yaml#/components/schemas/KnnQuery'
$ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery'
rank:
$ref: '../schemas/_common.yaml#/components/schemas/RankContainer'
min_score:
Expand Down
8 changes: 8 additions & 0 deletions spec/schemas/_common.query_dsl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ components:
$ref: '#/components/schemas/IntervalsQuery'
minProperties: 1
maxProperties: 1
knn:
$ref: '#/components/schemas/KnnQuery'
match:
description: |-
Returns documents that match a provided text, number, date or boolean value.
Expand Down Expand Up @@ -896,6 +898,12 @@ components:
$ref: '_common.yaml#/components/schemas/Field'
required:
- pattern
KnnQuery:
type: object
additionalProperties:
$ref: '_common.yaml#/components/schemas/KnnField'
minProperties: 1
maxProperties: 1
MatchQuery:
allOf:
- $ref: '#/components/schemas/QueryBase'
Expand Down
47 changes: 13 additions & 34 deletions spec/schemas/_common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -591,59 +591,38 @@ components:
type: number
EmptyObject:
type: object
KnnQuery:
KnnField:
type: object
properties:
field:
$ref: '#/components/schemas/Field'
query_vector:
vector:
$ref: '#/components/schemas/QueryVector'
query_vector_builder:
$ref: '#/components/schemas/QueryVectorBuilder'
k:
description: The final number of nearest neighbors to return as top hits
description: The final number of nearest neighbors to return as top hits.
type: number
num_candidates:
description: The number of nearest neighbor candidates to consider per shard
min_score:
description: The minimum similarity score for a neighbor to be considered a hit.
type: number
boost:
description: Boost value to apply to kNN scores
x-version-added: '2.14'
max_distance:
description: The maximum physical distance in vector space for a neighbor to be considered a hit.
type: number
x-version-added: '2.14'
filter:
description: Filters for the kNN search query
description: Filters for the kNN search query.
oneOf:
- $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer'
- type: array
items:
$ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer'
similarity:
description: The minimum similarity for a vector to be considered a match
boost:
description: Boost value to apply to kNN scores
type: number
required:
- field
- k
- num_candidates
- vector
QueryVector:
type: array
items:
type: number
QueryVectorBuilder:
type: object
properties:
text_embedding:
$ref: '#/components/schemas/TextEmbedding'
minProperties: 1
maxProperties: 1
TextEmbedding:
type: object
properties:
model_id:
type: string
model_text:
type: string
required:
- model_id
- model_text
SlicedScroll:
type: object
properties:
Expand Down
4 changes: 2 additions & 2 deletions spec/schemas/_core.msearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ components:
knn:
description: Defines the approximate kNN search to run.
oneOf:
- $ref: '_common.yaml#/components/schemas/KnnQuery'
- $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery'
- type: array
items:
$ref: '_common.yaml#/components/schemas/KnnQuery'
$ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery'
from:
description: |-
Starting document offset. By default, you cannot page through more than 10,000
Expand Down
157 changes: 157 additions & 0 deletions tests/default/_core/search/knn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
$schema: ../../../../json_schemas/test_story.schema.yaml

description: Test search endpoint with knn query.
version: '>= 1.2'
prologues:
- path: /movies
method: PUT
request:
payload:
settings:
index:
knn: true
mappings:
properties:
director:
type: text
title:
type: text
year:
type: integer
embedding:
type: knn_vector
dimension: 5
method:
name: hnsw
space_type: l2
engine: faiss
- path: /movies/_doc
method: POST
parameters:
refresh: true
request:
payload:
director: Bennett Miller
title: Moneyball
year: 2011
embedding: [1.4, 2.3, 3.5, 4.1, 9.2]
status: [201]
epilogues:
- path: /movies
method: DELETE
status: [200, 404]
chapters:
- synopsis: Search using the k parameter.
path: /{index}/_search
parameters:
index: movies
method: POST
request:
payload:
query:
knn:
embedding:
vector: [1.4, 2.3, 3.5, 4.1, 9.2]
k: 1
response:
status: 200
payload:
timed_out: false
hits:
total:
value: 1
relation: eq
hits:
- _index: movies
_score: 1
_source:
director: Bennett Miller
title: Moneyball
year: 2011
embedding: [1.4, 2.3, 3.5, 4.1, 9.2]

- synopsis: Search using the min_score parameter.
version: '>= 2.14'
path: /{index}/_search
parameters:
index: movies
method: POST
request:
payload:
query:
knn:
embedding:
vector: [1.4, 2.3, 3.5, 4.1, 9.2]
min_score: 0.9
response:
status: 200
payload:
timed_out: false
hits:
total:
value: 1
relation: eq
hits:
- _index: movies
_score: 1
_source:
director: Bennett Miller
title: Moneyball
year: 2011
embedding: [1.4, 2.3, 3.5, 4.1, 9.2]

- synopsis: Search using the max_distance parameter.
version: '>= 2.14'
path: /{index}/_search
parameters:
index: movies
method: POST
request:
payload:
query:
knn:
embedding:
vector: [1.4,2.3, 3.5, 4.1, 9.2]
max_distance: 0.1
response:
status: 200
payload:
timed_out: false
hits:
total:
value: 1
relation: eq
hits:
- _index: movies
_score: 1
_source:
director: Bennett Miller
title: Moneyball
year: 2011
embedding: [1.4, 2.3, 3.5, 4.1, 9.2]

- synopsis: Search using a filter.
version: '>= 2.9'
path: /{index}/_search
parameters:
index: movies
method: POST
request:
payload:
query:
knn:
embedding:
vector: [1.4, 2.3, 3.5, 4.1, 9.2]
k: 1
filter:
term:
year: 2012
response:
status: 200
payload:
timed_out: false
hits:
total:
value: 0
relation: eq
hits: []

0 comments on commit 14d819b

Please sign in to comment.