From 7a668f4f7dd172a59a57abd487039dfd106a0d34 Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Mon, 8 Apr 2024 17:44:38 +0400 Subject: [PATCH] Make k and num_candidates optional in kNN queries --- .../elasticsearch-serverless-openapi.json | 62 ++++++- output/schema/schema.json | 153 ++++++++++++++++-- output/typescript/types.ts | 18 ++- specification/_global/msearch/types.ts | 4 +- specification/_global/search/SearchRequest.ts | 4 +- specification/_types/Knn.ts | 27 ++++ .../submit/AsyncSearchSubmitRequest.ts | 4 +- 7 files changed, 248 insertions(+), 24 deletions(-) diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 049e2830cf..30f1e1610d 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -25673,12 +25673,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] @@ -26804,12 +26804,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] @@ -39613,6 +39613,56 @@ } } }, + "_types:KnnSearch": { + "type": "object", + "properties": { + "field": { + "$ref": "#/components/schemas/_types:Field" + }, + "query_vector": { + "$ref": "#/components/schemas/_types:QueryVector" + }, + "query_vector_builder": { + "$ref": "#/components/schemas/_types:QueryVectorBuilder" + }, + "k": { + "description": "The final number of nearest neighbors to return as top hits", + "type": "number" + }, + "num_candidates": { + "description": "The number of nearest neighbor candidates to consider per shard", + "type": "number" + }, + "boost": { + "description": "Boost value to apply to kNN scores", + "type": "number" + }, + "filter": { + "description": "Filters for the kNN search query", + "oneOf": [ + { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + } + } + ] + }, + "similarity": { + "description": "The minimum similarity for a vector to be considered a match", + "type": "number" + }, + "inner_hits": { + "$ref": "#/components/schemas/_global.search._types:InnerHits" + } + }, + "required": [ + "field" + ] + }, "_global.search._types:Rescore": { "allOf": [ { @@ -55878,12 +55928,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] diff --git a/output/schema/schema.json b/output/schema/schema.json index 5ad5b346dd..667402aa5c 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -26894,7 +26894,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -26903,7 +26903,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } @@ -31199,7 +31199,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -31208,7 +31208,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } @@ -43868,7 +43868,142 @@ } } ], - "specLocation": "_types/Knn.ts#L27-L49" + "specLocation": "_types/Knn.ts#L54-L76" + }, + { + "kind": "interface", + "name": { + "name": "KnnSearch", + "namespace": "_types" + }, + "properties": [ + { + "description": "The name of the vector field to search against", + "name": "field", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Field", + "namespace": "_types" + } + } + }, + { + "description": "The query vector", + "name": "query_vector", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "QueryVector", + "namespace": "_types" + } + } + }, + { + "description": "The query vector builder. You must provide a query_vector_builder or query_vector, but not both.", + "name": "query_vector_builder", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "QueryVectorBuilder", + "namespace": "_types" + } + } + }, + { + "description": "The final number of nearest neighbors to return as top hits", + "name": "k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "The number of nearest neighbor candidates to consider per shard", + "name": "num_candidates", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "Boost value to apply to kNN scores", + "name": "boost", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "Filters for the kNN search query", + "name": "filter", + "required": false, + "type": { + "items": [ + { + "kind": "instance_of", + "type": { + "name": "QueryContainer", + "namespace": "_types.query_dsl" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "QueryContainer", + "namespace": "_types.query_dsl" + } + } + } + ], + "kind": "union_of" + } + }, + { + "description": "The minimum similarity for a vector to be considered a match", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "If defined, each search hit will contain inner hits.", + "docId": "knn-inner-hits", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/knn-search.html#nested-knn-search-inner-hits", + "name": "inner_hits", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InnerHits", + "namespace": "_global.search._types" + } + } + } + ], + "specLocation": "_types/Knn.ts#L30-L52" }, { "kind": "interface", @@ -45211,7 +45346,7 @@ } } ], - "specLocation": "_types/Knn.ts#L51-L54", + "specLocation": "_types/Knn.ts#L78-L81", "variants": { "kind": "container" } @@ -47458,7 +47593,7 @@ } } ], - "specLocation": "_types/Knn.ts#L56-L59" + "specLocation": "_types/Knn.ts#L83-L86" }, { "kind": "enum", @@ -81266,7 +81401,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -81275,7 +81410,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } diff --git a/output/typescript/types.ts b/output/typescript/types.ts index ec81d6868f..bdae491870 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -732,7 +732,7 @@ export interface MsearchMultisearchBody { ext?: Record stored_fields?: Fields docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] from?: integer highlight?: SearchHighlight indices_boost?: Record[] @@ -1199,7 +1199,7 @@ export interface SearchRequest extends RequestBase { track_total_hits?: SearchTrackHits indices_boost?: Record[] docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] rank?: RankContainer min_score?: double post_filter?: QueryDslQueryContainer @@ -2365,6 +2365,18 @@ export interface KnnQuery { inner_hits?: SearchInnerHits } +export interface KnnSearch { + field: Field + query_vector?: QueryVector + query_vector_builder?: QueryVectorBuilder + k?: long + num_candidates?: long + boost?: float + filter?: QueryDslQueryContainer | QueryDslQueryContainer[] + similarity?: float + inner_hits?: SearchInnerHits +} + export interface LatLonGeoLocation { lat: double lon: double @@ -6261,7 +6273,7 @@ export interface AsyncSearchSubmitRequest extends RequestBase { track_total_hits?: SearchTrackHits indices_boost?: Record[] docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] min_score?: double post_filter?: QueryDslQueryContainer profile?: boolean diff --git a/specification/_global/msearch/types.ts b/specification/_global/msearch/types.ts index 57495ade82..7988457a11 100644 --- a/specification/_global/msearch/types.ts +++ b/specification/_global/msearch/types.ts @@ -41,7 +41,7 @@ import { Rescore } from '@global/search/_types/rescoring' import { SourceConfig } from '@global/search/_types/SourceFilter' import { RuntimeFields } from '@_types/mapping/RuntimeFields' import { ScriptField } from '@_types/Scripting' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' /** @@ -101,7 +101,7 @@ export class MultisearchBody { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Starting document offset. By default, you cannot page through more than 10,000 * hits using the from and size parameters. To page through more hits, use the diff --git a/specification/_global/search/SearchRequest.ts b/specification/_global/search/SearchRequest.ts index 9fec1ed183..a82b77bb3b 100644 --- a/specification/_global/search/SearchRequest.ts +++ b/specification/_global/search/SearchRequest.ts @@ -46,7 +46,7 @@ import { Suggester } from './_types/suggester' import { TrackHits } from '@global/search/_types/hits' import { Operator } from '@_types/query_dsl/Operator' import { Sort, SortResults } from '@_types/sort' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { RankContainer } from '@_types/Rank' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' @@ -384,7 +384,7 @@ export interface Request extends RequestBase { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Defines the Reciprocal Rank Fusion (RRF) to use. * @availability stack since=8.8.0 diff --git a/specification/_types/Knn.ts b/specification/_types/Knn.ts index 7702eece37..8227d25869 100644 --- a/specification/_types/Knn.ts +++ b/specification/_types/Knn.ts @@ -24,6 +24,33 @@ import { InnerHits } from '@global/search/_types/hits' export type QueryVector = float[] +/* KnnSearch (used in kNN search) and KnnQuery (ued in kNN queries) are close + * but different enough to require different classes */ + +export interface KnnSearch { + /** The name of the vector field to search against */ + field: Field + /** The query vector */ + query_vector?: QueryVector + /** The query vector builder. You must provide a query_vector_builder or query_vector, but not both. */ + query_vector_builder?: QueryVectorBuilder + /** The final number of nearest neighbors to return as top hits */ + k?: long + /** The number of nearest neighbor candidates to consider per shard */ + num_candidates?: long + /** Boost value to apply to kNN scores */ + boost?: float + /** Filters for the kNN search query */ + filter?: QueryContainer | QueryContainer[] + /** The minimum similarity for a vector to be considered a match */ + similarity?: float + /** + * If defined, each search hit will contain inner hits. + * @doc_id knn-inner-hits + */ + inner_hits?: InnerHits +} + export interface KnnQuery { /** The name of the vector field to search against */ field: Field diff --git a/specification/async_search/submit/AsyncSearchSubmitRequest.ts b/specification/async_search/submit/AsyncSearchSubmitRequest.ts index 989f57eaf9..c8b6051fe0 100644 --- a/specification/async_search/submit/AsyncSearchSubmitRequest.ts +++ b/specification/async_search/submit/AsyncSearchSubmitRequest.ts @@ -49,7 +49,7 @@ import { import { Suggester } from '@global/search/_types/suggester' import { TrackHits } from '@global/search/_types/hits' import { Operator } from '@_types/query_dsl/Operator' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' /** @@ -192,7 +192,7 @@ export interface Request extends RequestBase { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Minimum _score for matching documents. Documents with a lower _score are * not included in the search results.