From 4718254e0ea2bab696610ef8e141c140d116fa6e Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Fri, 12 Apr 2024 11:42:59 +0400 Subject: [PATCH] Make k and num_candidates optional in kNN queries (#2490) * Make k and num_candidates optional in kNN queries * Address review comments --- .../elasticsearch-serverless-openapi.json | 148 ++++++++++++------ output/schema/schema.json | 137 ++++++++++++++-- output/schema/validation-errors.json | 1 + output/typescript/types.ts | 22 ++- specification/_global/msearch/types.ts | 4 +- specification/_global/search/SearchRequest.ts | 4 +- specification/_types/Knn.ts | 28 +++- .../submit/AsyncSearchSubmitRequest.ts | 4 +- 8 files changed, 271 insertions(+), 77 deletions(-) diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 2e6fcffdf3..c0dee4fb7e 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -26856,12 +26856,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] @@ -27987,12 +27987,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] @@ -36097,55 +36097,53 @@ ] }, "_types:KnnQuery": { - "type": "object", - "properties": { - "field": { - "$ref": "#/components/schemas/_types:Field" - }, - "query_vector": { - "$ref": "#/components/schemas/_types:QueryVector" - }, - "query_vector_builder": { - "$ref": "#/components/schemas/_types:QueryVectorBuilder" - }, - "k": { - "description": "The final number of nearest neighbors to return as top hits", - "type": "number" - }, - "num_candidates": { - "description": "The number of nearest neighbor candidates to consider per shard", - "type": "number" - }, - "boost": { - "description": "Boost value to apply to kNN scores", - "type": "number" + "allOf": [ + { + "$ref": "#/components/schemas/_types.query_dsl:QueryBase" }, - "filter": { - "description": "Filters for the kNN search query", - "oneOf": [ - { - "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + { + "type": "object", + "properties": { + "field": { + "$ref": "#/components/schemas/_types:Field" }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" - } + "query_vector": { + "$ref": "#/components/schemas/_types:QueryVector" + }, + "query_vector_builder": { + "$ref": "#/components/schemas/_types:QueryVectorBuilder" + }, + "num_candidates": { + "description": "The number of nearest neighbor candidates to consider per shard", + "type": "number" + }, + "boost": { + "description": "Boost value to apply to kNN scores", + "type": "number" + }, + "filter": { + "description": "Filters for the kNN search query", + "oneOf": [ + { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + } + } + ] + }, + "similarity": { + "description": "The minimum similarity for a vector to be considered a match", + "type": "number" } + }, + "required": [ + "field" ] - }, - "similarity": { - "description": "The minimum similarity for a vector to be considered a match", - "type": "number" - }, - "inner_hits": { - "$ref": "#/components/schemas/_global.search._types:InnerHits" } - }, - "required": [ - "field", - "k", - "num_candidates" ] }, "_types:QueryVector": { @@ -40796,6 +40794,56 @@ } } }, + "_types:KnnSearch": { + "type": "object", + "properties": { + "field": { + "$ref": "#/components/schemas/_types:Field" + }, + "query_vector": { + "$ref": "#/components/schemas/_types:QueryVector" + }, + "query_vector_builder": { + "$ref": "#/components/schemas/_types:QueryVectorBuilder" + }, + "k": { + "description": "The final number of nearest neighbors to return as top hits", + "type": "number" + }, + "num_candidates": { + "description": "The number of nearest neighbor candidates to consider per shard", + "type": "number" + }, + "boost": { + "description": "Boost value to apply to kNN scores", + "type": "number" + }, + "filter": { + "description": "Filters for the kNN search query", + "oneOf": [ + { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/_types.query_dsl:QueryContainer" + } + } + ] + }, + "similarity": { + "description": "The minimum similarity for a vector to be considered a match", + "type": "number" + }, + "inner_hits": { + "$ref": "#/components/schemas/_global.search._types:InnerHits" + } + }, + "required": [ + "field" + ] + }, "_global.search._types:Rescore": { "allOf": [ { @@ -57789,12 +57837,12 @@ "description": "Defines the approximate kNN search to run.", "oneOf": [ { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" }, { "type": "array", "items": { - "$ref": "#/components/schemas/_types:KnnQuery" + "$ref": "#/components/schemas/_types:KnnSearch" } } ] diff --git a/output/schema/schema.json b/output/schema/schema.json index 912a770972..78b6629c38 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -27638,7 +27638,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -27647,7 +27647,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } @@ -31943,7 +31943,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -31952,7 +31952,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } @@ -44530,11 +44530,126 @@ } }, { + "inherits": { + "type": { + "name": "QueryBase", + "namespace": "_types.query_dsl" + } + }, "kind": "interface", "name": { "name": "KnnQuery", "namespace": "_types" }, + "properties": [ + { + "description": "The name of the vector field to search against", + "name": "field", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Field", + "namespace": "_types" + } + } + }, + { + "description": "The query vector", + "name": "query_vector", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "QueryVector", + "namespace": "_types" + } + } + }, + { + "description": "The query vector builder. You must provide a query_vector_builder or query_vector, but not both.", + "name": "query_vector_builder", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "QueryVectorBuilder", + "namespace": "_types" + } + } + }, + { + "description": "The number of nearest neighbor candidates to consider per shard", + "name": "num_candidates", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "Boost value to apply to kNN scores", + "name": "boost", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "Filters for the kNN search query", + "name": "filter", + "required": false, + "type": { + "items": [ + { + "kind": "instance_of", + "type": { + "name": "QueryContainer", + "namespace": "_types.query_dsl" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "QueryContainer", + "namespace": "_types.query_dsl" + } + } + } + ], + "kind": "union_of" + } + }, + { + "description": "The minimum similarity for a vector to be considered a match", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "_types/Knn.ts#L54-L69" + }, + { + "kind": "interface", + "name": { + "name": "KnnSearch", + "namespace": "_types" + }, "properties": [ { "description": "The name of the vector field to search against", @@ -44575,7 +44690,7 @@ { "description": "The final number of nearest neighbors to return as top hits", "name": "k", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -44587,7 +44702,7 @@ { "description": "The number of nearest neighbor candidates to consider per shard", "name": "num_candidates", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -44662,7 +44777,7 @@ } } ], - "specLocation": "_types/Knn.ts#L27-L49" + "specLocation": "_types/Knn.ts#L30-L52" }, { "kind": "interface", @@ -46005,7 +46120,7 @@ } } ], - "specLocation": "_types/Knn.ts#L51-L54", + "specLocation": "_types/Knn.ts#L71-L74", "variants": { "kind": "container" } @@ -48252,7 +48367,7 @@ } } ], - "specLocation": "_types/Knn.ts#L56-L59" + "specLocation": "_types/Knn.ts#L76-L79" }, { "kind": "enum", @@ -82060,7 +82175,7 @@ { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } }, @@ -82069,7 +82184,7 @@ "value": { "kind": "instance_of", "type": { - "name": "KnnQuery", + "name": "KnnSearch", "namespace": "_types" } } diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json index 720eea3d87..435bb52758 100644 --- a/output/schema/validation-errors.json +++ b/output/schema/validation-errors.json @@ -66,6 +66,7 @@ "Request: query parameter 'scroll' does not exist in the json spec", "Request: query parameter 'rest_total_hits_as_int' does not exist in the json spec", "interface definition _types:QueryVectorBuilder - Property text_embedding is a single-variant and must be required", + "interface definition _types:KnnQuery - Property 'boost' is already defined in an ancestor class", "type_alias definition _spec_utils:PipeSeparatedFlags / union_of / instance_of - No type definition for '_spec_utils:T'" ], "response": [] diff --git a/output/typescript/types.ts b/output/typescript/types.ts index b5cb30c7cf..93ec439e16 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -732,7 +732,7 @@ export interface MsearchMultisearchBody { ext?: Record stored_fields?: Fields docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] from?: integer highlight?: SearchHighlight indices_boost?: Record[] @@ -1199,7 +1199,7 @@ export interface SearchRequest extends RequestBase { track_total_hits?: SearchTrackHits indices_boost?: Record[] docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] rank?: RankContainer min_score?: double post_filter?: QueryDslQueryContainer @@ -2357,12 +2357,22 @@ export interface InlineScript extends ScriptBase { export type Ip = string -export interface KnnQuery { +export interface KnnQuery extends QueryDslQueryBase { field: Field query_vector?: QueryVector query_vector_builder?: QueryVectorBuilder - k: long - num_candidates: long + num_candidates?: long + boost?: float + filter?: QueryDslQueryContainer | QueryDslQueryContainer[] + similarity?: float +} + +export interface KnnSearch { + field: Field + query_vector?: QueryVector + query_vector_builder?: QueryVectorBuilder + k?: long + num_candidates?: long boost?: float filter?: QueryDslQueryContainer | QueryDslQueryContainer[] similarity?: float @@ -6265,7 +6275,7 @@ export interface AsyncSearchSubmitRequest extends RequestBase { track_total_hits?: SearchTrackHits indices_boost?: Record[] docvalue_fields?: (QueryDslFieldAndFormat | Field)[] - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] min_score?: double post_filter?: QueryDslQueryContainer profile?: boolean diff --git a/specification/_global/msearch/types.ts b/specification/_global/msearch/types.ts index 57495ade82..7988457a11 100644 --- a/specification/_global/msearch/types.ts +++ b/specification/_global/msearch/types.ts @@ -41,7 +41,7 @@ import { Rescore } from '@global/search/_types/rescoring' import { SourceConfig } from '@global/search/_types/SourceFilter' import { RuntimeFields } from '@_types/mapping/RuntimeFields' import { ScriptField } from '@_types/Scripting' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' /** @@ -101,7 +101,7 @@ export class MultisearchBody { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Starting document offset. By default, you cannot page through more than 10,000 * hits using the from and size parameters. To page through more hits, use the diff --git a/specification/_global/search/SearchRequest.ts b/specification/_global/search/SearchRequest.ts index 9fec1ed183..a82b77bb3b 100644 --- a/specification/_global/search/SearchRequest.ts +++ b/specification/_global/search/SearchRequest.ts @@ -46,7 +46,7 @@ import { Suggester } from './_types/suggester' import { TrackHits } from '@global/search/_types/hits' import { Operator } from '@_types/query_dsl/Operator' import { Sort, SortResults } from '@_types/sort' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { RankContainer } from '@_types/Rank' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' @@ -384,7 +384,7 @@ export interface Request extends RequestBase { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Defines the Reciprocal Rank Fusion (RRF) to use. * @availability stack since=8.8.0 diff --git a/specification/_types/Knn.ts b/specification/_types/Knn.ts index 7702eece37..60562df555 100644 --- a/specification/_types/Knn.ts +++ b/specification/_types/Knn.ts @@ -19,12 +19,15 @@ import { Field } from '@_types/common' import { long, float } from '@_types/Numeric' -import { QueryContainer } from './query_dsl/abstractions' +import { QueryBase, QueryContainer } from './query_dsl/abstractions' import { InnerHits } from '@global/search/_types/hits' export type QueryVector = float[] -export interface KnnQuery { +/* KnnSearch (used in kNN search) and KnnQuery (ued in kNN queries) are close + * but different enough to require different classes */ + +export interface KnnSearch { /** The name of the vector field to search against */ field: Field /** The query vector */ @@ -32,9 +35,9 @@ export interface KnnQuery { /** The query vector builder. You must provide a query_vector_builder or query_vector, but not both. */ query_vector_builder?: QueryVectorBuilder /** The final number of nearest neighbors to return as top hits */ - k: long + k?: long /** The number of nearest neighbor candidates to consider per shard */ - num_candidates: long + num_candidates?: long /** Boost value to apply to kNN scores */ boost?: float /** Filters for the kNN search query */ @@ -48,6 +51,23 @@ export interface KnnQuery { inner_hits?: InnerHits } +export interface KnnQuery extends QueryBase { + /** The name of the vector field to search against */ + field: Field + /** The query vector */ + query_vector?: QueryVector + /** The query vector builder. You must provide a query_vector_builder or query_vector, but not both. */ + query_vector_builder?: QueryVectorBuilder + /** The number of nearest neighbor candidates to consider per shard */ + num_candidates?: long + /** Boost value to apply to kNN scores */ + boost?: float + /** Filters for the kNN search query */ + filter?: QueryContainer | QueryContainer[] + /** The minimum similarity for a vector to be considered a match */ + similarity?: float +} + /** @variants container */ export interface QueryVectorBuilder { text_embedding?: TextEmbedding diff --git a/specification/async_search/submit/AsyncSearchSubmitRequest.ts b/specification/async_search/submit/AsyncSearchSubmitRequest.ts index 989f57eaf9..c8b6051fe0 100644 --- a/specification/async_search/submit/AsyncSearchSubmitRequest.ts +++ b/specification/async_search/submit/AsyncSearchSubmitRequest.ts @@ -49,7 +49,7 @@ import { import { Suggester } from '@global/search/_types/suggester' import { TrackHits } from '@global/search/_types/hits' import { Operator } from '@_types/query_dsl/Operator' -import { KnnQuery } from '@_types/Knn' +import { KnnSearch } from '@_types/Knn' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' /** @@ -192,7 +192,7 @@ export interface Request extends RequestBase { * @availability stack since=8.4.0 * @availability serverless */ - knn?: KnnQuery | KnnQuery[] + knn?: KnnSearch | KnnSearch[] /** * Minimum _score for matching documents. Documents with a lower _score are * not included in the search results.