diff --git a/output/schema/schema.json b/output/schema/schema.json index 3af0917211..ebf51cc25c 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -72926,7 +72926,7 @@ "name": "DenseVectorElementType", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/DenseVectorProperty.ts#L32-L48" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L64-L80" }, { "kind": "interface", @@ -72936,19 +72936,22 @@ }, "properties": [ { - "name": "type", - "required": true, + "description": "The confidence interval to use when quantizing the vectors. Can be any value between and including `0.90` and\n`1.0` or exactly `0`. When the value is `0`, this indicates that dynamic quantiles should be calculated for\noptimized quantization. When between `0.90` and `1.0`, this value restricts the values used when calculating\nthe quantization thresholds.\n\nFor example, a value of `0.95` will only use the middle `95%` of the values when calculating the quantization\nthresholds (e.g. the highest and lowest `2.5%` of values will be ignored).\n\nDefaults to `1/(dims + 1)` for `int8` quantized vectors and `0` for `int4` for dynamic quantile calculation.\n\nOnly applicable to `int8_hnsw`, `int4_hnsw`, `int8_flat`, and `int4_flat` index types.", + "name": "confidence_interval", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "DenseVectorIndexOptionsType", - "namespace": "_types.mapping" + "name": "float", + "namespace": "_types" } } }, { - "name": "m", + "description": "The number of candidates to track while assembling the list of nearest neighbors for each new node.\n\nOnly applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types.", + "name": "ef_construction", "required": false, + "serverDefault": 100, "type": { "kind": "instance_of", "type": { @@ -72958,8 +72961,10 @@ } }, { - "name": "ef_construction", + "description": "The number of neighbors each node will be connected to in the HNSW graph.\n\nOnly applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types.", + "name": "m", "required": false, + "serverDefault": 16, "type": { "kind": "instance_of", "type": { @@ -72969,18 +72974,19 @@ } }, { - "name": "confidence_interval", - "required": false, + "description": "The type of kNN algorithm to use.", + "name": "type", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "DenseVectorIndexOptionsType", + "namespace": "_types.mapping" } } } ], - "specLocation": "_types/mapping/DenseVectorProperty.ts#L97-L102" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L129-L162" }, { "kind": "enum", @@ -73002,11 +73008,11 @@ "name": "int4_hnsw" }, { - "description": "This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports \n`element_type` of `float`.", + "description": "This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports\n`element_type` of `float`.", "name": "int8_flat" }, { - "description": "The default index type for `float` vectors. This utilizes the HNSW algorithm in addition to automatically scalar \nquantization for scalable approximate kNN search with `element_type` of `float`.\n\nThis can reduce the memory footprint by 4x at the cost of some accuracy.", + "description": "The default index type for `float` vectors. This utilizes the HNSW algorithm in addition to automatically scalar\nquantization for scalable approximate kNN search with `element_type` of `float`.\n\nThis can reduce the memory footprint by 4x at the cost of some accuracy.", "name": "int8_hnsw" } ], @@ -73014,7 +73020,7 @@ "name": "DenseVectorIndexOptionsType", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/DenseVectorProperty.ts#L104-L137" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L164-L197" }, { "kind": "interface", @@ -73038,17 +73044,7 @@ } }, { - "name": "element_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "DenseVectorElementType", - "namespace": "_types.mapping" - } - } - }, - { + "description": "Number of vector dimensions. Can't exceed `4096`. If `dims` is not specified, it will be set to the length of\nthe first vector added to the field.", "name": "dims", "required": false, "type": { @@ -73060,19 +73056,23 @@ } }, { - "name": "similarity", + "description": "The data type used to encode vectors. The supported data types are `float` (default), `byte`, and `bit`.", + "name": "element_type", "required": false, + "serverDefault": "float", "type": { "kind": "instance_of", "type": { - "name": "DenseVectorSimilarity", + "name": "DenseVectorElementType", "namespace": "_types.mapping" } } }, { + "description": "If `true`, you can search this field using the kNN search API.", "name": "index", "required": false, + "serverDefault": true, "type": { "kind": "instance_of", "type": { @@ -73082,6 +73082,7 @@ } }, { + "description": "An optional section that configures the kNN indexing algorithm. The HNSW algorithm has two internal parameters\nthat influence how the data structure is built. These can be adjusted to improve the accuracy of results, at the\nexpense of slower indexing speed.\n\nThis parameter can only be specified when `index` is `true`.", "name": "index_options", "required": false, "type": { @@ -73091,15 +73092,27 @@ "namespace": "_types.mapping" } } + }, + { + "description": "The vector similarity metric to use in kNN search.\n\nDocuments are ranked by their vector field's similarity to the query vector. The `_score` of each document will\nbe derived from the similarity, in a way that ensures scores are positive and that a larger score corresponds\nto a higher ranking.\n\nDefaults to `l2_norm` when `element_type` is `bit` otherwise defaults to `cosine`.\n\n`bit` vectors only support `l2_norm` as their similarity metric.\n\nThis parameter can only be specified when `index` is `true`.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "DenseVectorSimilarity", + "namespace": "_types.mapping" + } + } } ], - "specLocation": "_types/mapping/DenseVectorProperty.ts#L23-L30" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L23-L62" }, { "kind": "enum", "members": [ { - "description": "Computes the cosine similarity. During indexing Elasticsearch automatically normalizes vectors with `cosine`\nsimilarity to unit length. This allows to internally use `dot_product` for computing similarity, which is more \nefficient. Original un-normalized vectors can be still accessed through scripts.\n\nThe document `_score` is computed as `(1 + cosine(query, vector)) / 2`.\n\nThe `cosine` similarity does not allow vectors with zero magnitude, since cosine is not defined in this case.", + "description": "Computes the cosine similarity. During indexing Elasticsearch automatically normalizes vectors with `cosine`\nsimilarity to unit length. This allows to internally use `dot_product` for computing similarity, which is more\nefficient. Original un-normalized vectors can be still accessed through scripts.\n\nThe document `_score` is computed as `(1 + cosine(query, vector)) / 2`.\n\nThe `cosine` similarity does not allow vectors with zero magnitude, since cosine is not defined in this case.", "name": "cosine" }, { @@ -73119,7 +73132,7 @@ "name": "DenseVectorSimilarity", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/DenseVectorProperty.ts#L50-L95" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L82-L127" }, { "kind": "interface", @@ -73962,7 +73975,7 @@ "name": "FieldType", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/Property.ts#L168-L215" + "specLocation": "_types/mapping/Property.ts#L166-L213" }, { "kind": "interface", @@ -75626,7 +75639,7 @@ "name": "Property", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/Property.ts#L98-L166", + "specLocation": "_types/mapping/Property.ts#L96-L164", "type": { "kind": "union_of", "items": [ @@ -76081,7 +76094,7 @@ } } ], - "specLocation": "_types/mapping/Property.ts#L86-L96" + "specLocation": "_types/mapping/Property.ts#L84-L94" }, { "kind": "interface", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index e990b081da..da6d40518d 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -5407,22 +5407,28 @@ export interface MappingDateRangeProperty extends MappingRangePropertyBase { type: 'date_range' } +export type MappingDenseVectorElementType = 'bit' | 'byte' | 'float' + export interface MappingDenseVectorIndexOptions { - type: string - m?: integer - ef_construction?: integer confidence_interval?: float + ef_construction?: integer + m?: integer + type: MappingDenseVectorIndexOptionsType } +export type MappingDenseVectorIndexOptionsType = 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw' + export interface MappingDenseVectorProperty extends MappingPropertyBase { type: 'dense_vector' - element_type?: string dims?: integer - similarity?: string + element_type?: MappingDenseVectorElementType index?: boolean index_options?: MappingDenseVectorIndexOptions + similarity?: MappingDenseVectorSimilarity } +export type MappingDenseVectorSimilarity = 'cosine' | 'dot_product' | 'l2_norm' | 'max_inner_product' + export interface MappingDocValuesPropertyBase extends MappingCorePropertyBase { doc_values?: boolean } diff --git a/specification/_types/mapping/DenseVectorProperty.ts b/specification/_types/mapping/DenseVectorProperty.ts index cb2df5a583..b861a908d7 100644 --- a/specification/_types/mapping/DenseVectorProperty.ts +++ b/specification/_types/mapping/DenseVectorProperty.ts @@ -22,11 +22,43 @@ import { PropertyBase } from './Property' export class DenseVectorProperty extends PropertyBase { type: 'dense_vector' - element_type?: DenseVectorElementType + /** + * Number of vector dimensions. Can't exceed `4096`. If `dims` is not specified, it will be set to the length of + * the first vector added to the field. + */ dims?: integer - similarity?: DenseVectorSimilarity + /** + * The data type used to encode vectors. The supported data types are `float` (default), `byte`, and `bit`. + * @server_default float + */ + element_type?: DenseVectorElementType + /** + * If `true`, you can search this field using the kNN search API. + * @server_default true + */ index?: boolean + /** + * An optional section that configures the kNN indexing algorithm. The HNSW algorithm has two internal parameters + * that influence how the data structure is built. These can be adjusted to improve the accuracy of results, at the + * expense of slower indexing speed. + * + * This parameter can only be specified when `index` is `true`. + */ index_options?: DenseVectorIndexOptions + /** + * The vector similarity metric to use in kNN search. + * + * Documents are ranked by their vector field's similarity to the query vector. The `_score` of each document will + * be derived from the similarity, in a way that ensures scores are positive and that a larger score corresponds + * to a higher ranking. + * + * Defaults to `l2_norm` when `element_type` is `bit` otherwise defaults to `cosine`. + * + * `bit` vectors only support `l2_norm` as their similarity metric. + * + * This parameter can only be specified when `index` is `true`. + */ + similarity?: DenseVectorSimilarity } export enum DenseVectorElementType { @@ -95,10 +127,38 @@ export enum DenseVectorSimilarity { } export class DenseVectorIndexOptions { - type: DenseVectorIndexOptionsType - m?: integer - ef_construction?: integer + /** + * The confidence interval to use when quantizing the vectors. Can be any value between and including `0.90` and + * `1.0` or exactly `0`. When the value is `0`, this indicates that dynamic quantiles should be calculated for + * optimized quantization. When between `0.90` and `1.0`, this value restricts the values used when calculating + * the quantization thresholds. + * + * For example, a value of `0.95` will only use the middle `95%` of the values when calculating the quantization + * thresholds (e.g. the highest and lowest `2.5%` of values will be ignored). + * + * Defaults to `1/(dims + 1)` for `int8` quantized vectors and `0` for `int4` for dynamic quantile calculation. + * + * Only applicable to `int8_hnsw`, `int4_hnsw`, `int8_flat`, and `int4_flat` index types. + */ confidence_interval?: float + /** + * The number of candidates to track while assembling the list of nearest neighbors for each new node. + * + * Only applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. + * @server_default 100 + */ + ef_construction?: integer + /** + * The number of neighbors each node will be connected to in the HNSW graph. + * + * Only applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. + * @server_default 16 + */ + m?: integer + /** + * The type of kNN algorithm to use. + */ + type: DenseVectorIndexOptionsType } export enum DenseVectorIndexOptionsType {