diff --git a/compiler/package-lock.json b/compiler/package-lock.json index 56c687fd0d..ed183152ca 100644 --- a/compiler/package-lock.json +++ b/compiler/package-lock.json @@ -33,10 +33,6 @@ "node": ">=14" } }, - "../compiler-rs/compiler-wasm-lib/pkg": { - "name": "compiler-wasm-lib", - "version": "0.1.0" - }, "node_modules/@babel/code-frame": { "version": "7.12.11", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", @@ -1544,8 +1540,8 @@ "dev": true }, "node_modules/compiler-wasm-lib": { - "resolved": "../compiler-rs/compiler-wasm-lib/pkg", - "link": true + "version": "0.1.0", + "resolved": "file:../compiler-rs/compiler-wasm-lib/pkg" }, "node_modules/concat-map": { "version": "0.0.1", @@ -6488,7 +6484,7 @@ "dev": true }, "compiler-wasm-lib": { - "version": "file:../compiler-rs/compiler-wasm-lib/pkg" + "version": "0.1.0" }, "concat-map": { "version": "0.0.1", diff --git a/output/schema/schema.json b/output/schema/schema.json index 91a9903ee0..ebf51cc25c 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -72216,7 +72216,7 @@ } } ], - "specLocation": "_types/mapping/complex.ts#L61-L66" + "specLocation": "_types/mapping/complex.ts#L51-L56" }, { "kind": "interface", @@ -72906,6 +72906,28 @@ ], "specLocation": "_types/mapping/range.ts#L29-L32" }, + { + "kind": "enum", + "members": [ + { + "description": "Indexes a single bit per dimension. Useful for very high-dimensional vectors or models that specifically support\nbit vectors.\n\nNOTE: when using `bit`, the number of dimensions must be a multiple of `8` and must represent the number of bits.", + "name": "bit" + }, + { + "description": "Indexes a 1-byte integer value per dimension.", + "name": "byte" + }, + { + "description": "Indexes a 4-byte floating-point value per dimension.", + "name": "float" + } + ], + "name": { + "name": "DenseVectorElementType", + "namespace": "_types.mapping" + }, + "specLocation": "_types/mapping/DenseVectorProperty.ts#L64-L80" + }, { "kind": "interface", "name": { @@ -72914,19 +72936,22 @@ }, "properties": [ { - "name": "type", - "required": true, + "description": "The confidence interval to use when quantizing the vectors. Can be any value between and including `0.90` and\n`1.0` or exactly `0`. When the value is `0`, this indicates that dynamic quantiles should be calculated for\noptimized quantization. When between `0.90` and `1.0`, this value restricts the values used when calculating\nthe quantization thresholds.\n\nFor example, a value of `0.95` will only use the middle `95%` of the values when calculating the quantization\nthresholds (e.g. the highest and lowest `2.5%` of values will be ignored).\n\nDefaults to `1/(dims + 1)` for `int8` quantized vectors and `0` for `int4` for dynamic quantile calculation.\n\nOnly applicable to `int8_hnsw`, `int4_hnsw`, `int8_flat`, and `int4_flat` index types.", + "name": "confidence_interval", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" + "name": "float", + "namespace": "_types" } } }, { - "name": "m", + "description": "The number of candidates to track while assembling the list of nearest neighbors for each new node.\n\nOnly applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types.", + "name": "ef_construction", "required": false, + "serverDefault": 100, "type": { "kind": "instance_of", "type": { @@ -72936,8 +72961,10 @@ } }, { - "name": "ef_construction", + "description": "The number of neighbors each node will be connected to in the HNSW graph.\n\nOnly applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types.", + "name": "m", "required": false, + "serverDefault": 16, "type": { "kind": "instance_of", "type": { @@ -72947,18 +72974,53 @@ } }, { - "name": "confidence_interval", - "required": false, + "description": "The type of kNN algorithm to use.", + "name": "type", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "DenseVectorIndexOptionsType", + "namespace": "_types.mapping" } } } ], - "specLocation": "_types/mapping/DenseVectorIndexOptions.ts#L22-L27" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L129-L162" + }, + { + "kind": "enum", + "members": [ + { + "description": "This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.", + "name": "flat" + }, + { + "description": "This utilizes the HNSW algorithm for scalable approximate kNN search. This supports all `element_type` values.", + "name": "hnsw" + }, + { + "description": "This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization.\nOnly supports `element_type` of `float`.", + "name": "int4_flat" + }, + { + "description": "This utilizes the HNSW algorithm in addition to automatically scalar quantization for scalable approximate kNN\nsearch with `element_type` of `float`.\n\nThis can reduce the memory footprint by 8x at the cost of some accuracy.", + "name": "int4_hnsw" + }, + { + "description": "This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports\n`element_type` of `float`.", + "name": "int8_flat" + }, + { + "description": "The default index type for `float` vectors. This utilizes the HNSW algorithm in addition to automatically scalar\nquantization for scalable approximate kNN search with `element_type` of `float`.\n\nThis can reduce the memory footprint by 4x at the cost of some accuracy.", + "name": "int8_hnsw" + } + ], + "name": { + "name": "DenseVectorIndexOptionsType", + "namespace": "_types.mapping" + }, + "specLocation": "_types/mapping/DenseVectorProperty.ts#L164-L197" }, { "kind": "interface", @@ -72982,62 +73044,95 @@ } }, { - "name": "element_type", + "description": "Number of vector dimensions. Can't exceed `4096`. If `dims` is not specified, it will be set to the length of\nthe first vector added to the field.", + "name": "dims", "required": false, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" + "name": "integer", + "namespace": "_types" } } }, { - "name": "dims", + "description": "The data type used to encode vectors. The supported data types are `float` (default), `byte`, and `bit`.", + "name": "element_type", "required": false, + "serverDefault": "float", "type": { "kind": "instance_of", "type": { - "name": "integer", - "namespace": "_types" + "name": "DenseVectorElementType", + "namespace": "_types.mapping" } } }, { - "name": "similarity", + "description": "If `true`, you can search this field using the kNN search API.", + "name": "index", "required": false, + "serverDefault": true, "type": { "kind": "instance_of", "type": { - "name": "string", + "name": "boolean", "namespace": "_builtins" } } }, { - "name": "index", + "description": "An optional section that configures the kNN indexing algorithm. The HNSW algorithm has two internal parameters\nthat influence how the data structure is built. These can be adjusted to improve the accuracy of results, at the\nexpense of slower indexing speed.\n\nThis parameter can only be specified when `index` is `true`.", + "name": "index_options", "required": false, "type": { "kind": "instance_of", "type": { - "name": "boolean", - "namespace": "_builtins" + "name": "DenseVectorIndexOptions", + "namespace": "_types.mapping" } } }, { - "name": "index_options", + "description": "The vector similarity metric to use in kNN search.\n\nDocuments are ranked by their vector field's similarity to the query vector. The `_score` of each document will\nbe derived from the similarity, in a way that ensures scores are positive and that a larger score corresponds\nto a higher ranking.\n\nDefaults to `l2_norm` when `element_type` is `bit` otherwise defaults to `cosine`.\n\n`bit` vectors only support `l2_norm` as their similarity metric.\n\nThis parameter can only be specified when `index` is `true`.", + "name": "similarity", "required": false, "type": { "kind": "instance_of", "type": { - "name": "DenseVectorIndexOptions", + "name": "DenseVectorSimilarity", "namespace": "_types.mapping" } } } ], - "specLocation": "_types/mapping/complex.ts#L52-L59" + "specLocation": "_types/mapping/DenseVectorProperty.ts#L23-L62" + }, + { + "kind": "enum", + "members": [ + { + "description": "Computes the cosine similarity. During indexing Elasticsearch automatically normalizes vectors with `cosine`\nsimilarity to unit length. This allows to internally use `dot_product` for computing similarity, which is more\nefficient. Original un-normalized vectors can be still accessed through scripts.\n\nThe document `_score` is computed as `(1 + cosine(query, vector)) / 2`.\n\nThe `cosine` similarity does not allow vectors with zero magnitude, since cosine is not defined in this case.", + "name": "cosine" + }, + { + "description": "Computes the dot product of two unit vectors. This option provides an optimized way to perform cosine similarity.\nThe constraints and computed score are defined by `element_type`.\n\nWhen `element_type` is `float`, all vectors must be unit length, including both document and query vectors.\n\nThe document `_score` is computed as `(1 + dot_product(query, vector)) / 2`.\n\nWhen `element_type` is `byte`, all vectors must have the same length including both document and query vectors or\nresults will be inaccurate.\n\nThe document `_score` is computed as `0.5 + (dot_product(query, vector) / (32768 * dims))` where `dims` is the\nnumber of dimensions per vector.", + "name": "dot_product" + }, + { + "description": "Computes similarity based on the `L2` distance (also known as Euclidean distance) between the vectors.\n\nThe document `_score` is computed as `1 / (1 + l2_norm(query, vector)^2)`.\n\nFor `bit` vectors, instead of using `l2_norm`, the `hamming` distance between the vectors is used.\n\nThe `_score` transformation is `(numBits - hamming(a, b)) / numBits`.", + "name": "l2_norm" + }, + { + "description": "Computes the maximum inner product of two vectors. This is similar to `dot_product`, but doesn't require vectors\nto be normalized. This means that each vector’s magnitude can significantly effect the score.\n\nThe document `_score` is adjusted to prevent negative values. For `max_inner_product` values `< 0`, the `_score`\nis `1 / (1 + -1 * max_inner_product(query, vector))`. For non-negative `max_inner_product` results the `_score`\nis calculated `max_inner_product(query, vector) + 1`.", + "name": "max_inner_product" + } + ], + "name": { + "name": "DenseVectorSimilarity", + "namespace": "_types.mapping" + }, + "specLocation": "_types/mapping/DenseVectorProperty.ts#L82-L127" }, { "kind": "interface", @@ -74003,7 +74098,7 @@ } } ], - "specLocation": "_types/mapping/complex.ts#L26-L37" + "specLocation": "_types/mapping/complex.ts#L25-L36" }, { "kind": "interface", @@ -75270,7 +75365,7 @@ } } ], - "specLocation": "_types/mapping/complex.ts#L39-L44" + "specLocation": "_types/mapping/complex.ts#L38-L43" }, { "kind": "interface", @@ -75439,7 +75534,7 @@ } } ], - "specLocation": "_types/mapping/complex.ts#L46-L50" + "specLocation": "_types/mapping/complex.ts#L45-L49" }, { "kind": "enum", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index e990b081da..da6d40518d 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -5407,22 +5407,28 @@ export interface MappingDateRangeProperty extends MappingRangePropertyBase { type: 'date_range' } +export type MappingDenseVectorElementType = 'bit' | 'byte' | 'float' + export interface MappingDenseVectorIndexOptions { - type: string - m?: integer - ef_construction?: integer confidence_interval?: float + ef_construction?: integer + m?: integer + type: MappingDenseVectorIndexOptionsType } +export type MappingDenseVectorIndexOptionsType = 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw' + export interface MappingDenseVectorProperty extends MappingPropertyBase { type: 'dense_vector' - element_type?: string dims?: integer - similarity?: string + element_type?: MappingDenseVectorElementType index?: boolean index_options?: MappingDenseVectorIndexOptions + similarity?: MappingDenseVectorSimilarity } +export type MappingDenseVectorSimilarity = 'cosine' | 'dot_product' | 'l2_norm' | 'max_inner_product' + export interface MappingDocValuesPropertyBase extends MappingCorePropertyBase { doc_values?: boolean } diff --git a/specification/_types/mapping/DenseVectorIndexOptions.ts b/specification/_types/mapping/DenseVectorIndexOptions.ts deleted file mode 100644 index dd05a7ccd1..0000000000 --- a/specification/_types/mapping/DenseVectorIndexOptions.ts +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to Elasticsearch B.V. under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch B.V. licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import { float, integer } from '@_types/Numeric' - -export class DenseVectorIndexOptions { - type: string - m?: integer - ef_construction?: integer - confidence_interval?: float -} diff --git a/specification/_types/mapping/DenseVectorProperty.ts b/specification/_types/mapping/DenseVectorProperty.ts new file mode 100644 index 0000000000..b861a908d7 --- /dev/null +++ b/specification/_types/mapping/DenseVectorProperty.ts @@ -0,0 +1,197 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { float, integer } from '@_types/Numeric' +import { PropertyBase } from './Property' + +export class DenseVectorProperty extends PropertyBase { + type: 'dense_vector' + /** + * Number of vector dimensions. Can't exceed `4096`. If `dims` is not specified, it will be set to the length of + * the first vector added to the field. + */ + dims?: integer + /** + * The data type used to encode vectors. The supported data types are `float` (default), `byte`, and `bit`. + * @server_default float + */ + element_type?: DenseVectorElementType + /** + * If `true`, you can search this field using the kNN search API. + * @server_default true + */ + index?: boolean + /** + * An optional section that configures the kNN indexing algorithm. The HNSW algorithm has two internal parameters + * that influence how the data structure is built. These can be adjusted to improve the accuracy of results, at the + * expense of slower indexing speed. + * + * This parameter can only be specified when `index` is `true`. + */ + index_options?: DenseVectorIndexOptions + /** + * The vector similarity metric to use in kNN search. + * + * Documents are ranked by their vector field's similarity to the query vector. The `_score` of each document will + * be derived from the similarity, in a way that ensures scores are positive and that a larger score corresponds + * to a higher ranking. + * + * Defaults to `l2_norm` when `element_type` is `bit` otherwise defaults to `cosine`. + * + * `bit` vectors only support `l2_norm` as their similarity metric. + * + * This parameter can only be specified when `index` is `true`. + */ + similarity?: DenseVectorSimilarity +} + +export enum DenseVectorElementType { + /** + * Indexes a single bit per dimension. Useful for very high-dimensional vectors or models that specifically support + * bit vectors. + * + * NOTE: when using `bit`, the number of dimensions must be a multiple of `8` and must represent the number of bits. + */ + bit, + /** + * Indexes a 1-byte integer value per dimension. + */ + byte, + /** + * Indexes a 4-byte floating-point value per dimension. + */ + float +} + +export enum DenseVectorSimilarity { + /** + * Computes the cosine similarity. During indexing Elasticsearch automatically normalizes vectors with `cosine` + * similarity to unit length. This allows to internally use `dot_product` for computing similarity, which is more + * efficient. Original un-normalized vectors can be still accessed through scripts. + * + * The document `_score` is computed as `(1 + cosine(query, vector)) / 2`. + * + * The `cosine` similarity does not allow vectors with zero magnitude, since cosine is not defined in this case. + */ + cosine, + /** + * Computes the dot product of two unit vectors. This option provides an optimized way to perform cosine similarity. + * The constraints and computed score are defined by `element_type`. + * + * When `element_type` is `float`, all vectors must be unit length, including both document and query vectors. + * + * The document `_score` is computed as `(1 + dot_product(query, vector)) / 2`. + * + * When `element_type` is `byte`, all vectors must have the same length including both document and query vectors or + * results will be inaccurate. + * + * The document `_score` is computed as `0.5 + (dot_product(query, vector) / (32768 * dims))` where `dims` is the + * number of dimensions per vector. + */ + dot_product, + /** + * Computes similarity based on the `L2` distance (also known as Euclidean distance) between the vectors. + * + * The document `_score` is computed as `1 / (1 + l2_norm(query, vector)^2)`. + * + * For `bit` vectors, instead of using `l2_norm`, the `hamming` distance between the vectors is used. + * + * The `_score` transformation is `(numBits - hamming(a, b)) / numBits`. + */ + l2_norm, + /** + * Computes the maximum inner product of two vectors. This is similar to `dot_product`, but doesn't require vectors + * to be normalized. This means that each vector’s magnitude can significantly effect the score. + * + * The document `_score` is adjusted to prevent negative values. For `max_inner_product` values `< 0`, the `_score` + * is `1 / (1 + -1 * max_inner_product(query, vector))`. For non-negative `max_inner_product` results the `_score` + * is calculated `max_inner_product(query, vector) + 1`. + */ + max_inner_product +} + +export class DenseVectorIndexOptions { + /** + * The confidence interval to use when quantizing the vectors. Can be any value between and including `0.90` and + * `1.0` or exactly `0`. When the value is `0`, this indicates that dynamic quantiles should be calculated for + * optimized quantization. When between `0.90` and `1.0`, this value restricts the values used when calculating + * the quantization thresholds. + * + * For example, a value of `0.95` will only use the middle `95%` of the values when calculating the quantization + * thresholds (e.g. the highest and lowest `2.5%` of values will be ignored). + * + * Defaults to `1/(dims + 1)` for `int8` quantized vectors and `0` for `int4` for dynamic quantile calculation. + * + * Only applicable to `int8_hnsw`, `int4_hnsw`, `int8_flat`, and `int4_flat` index types. + */ + confidence_interval?: float + /** + * The number of candidates to track while assembling the list of nearest neighbors for each new node. + * + * Only applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. + * @server_default 100 + */ + ef_construction?: integer + /** + * The number of neighbors each node will be connected to in the HNSW graph. + * + * Only applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. + * @server_default 16 + */ + m?: integer + /** + * The type of kNN algorithm to use. + */ + type: DenseVectorIndexOptionsType +} + +export enum DenseVectorIndexOptionsType { + /** + * This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values. + */ + flat, + /** + * This utilizes the HNSW algorithm for scalable approximate kNN search. This supports all `element_type` values. + */ + hnsw, + /** + * This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. + * Only supports `element_type` of `float`. + */ + int4_flat, + /** + * This utilizes the HNSW algorithm in addition to automatically scalar quantization for scalable approximate kNN + * search with `element_type` of `float`. + * + * This can reduce the memory footprint by 8x at the cost of some accuracy. + */ + int4_hnsw, + /** + * This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports + * `element_type` of `float`. + */ + int8_flat, + /** + * The default index type for `float` vectors. This utilizes the HNSW algorithm in addition to automatically scalar + * quantization for scalable approximate kNN search with `element_type` of `float`. + * + * This can reduce the memory footprint by 4x at the cost of some accuracy. + */ + int8_hnsw +} diff --git a/specification/_types/mapping/Property.ts b/specification/_types/mapping/Property.ts index 8cce8819a2..456d835ea4 100644 --- a/specification/_types/mapping/Property.ts +++ b/specification/_types/mapping/Property.ts @@ -36,7 +36,6 @@ import { import { integer } from '@_types/Numeric' import { AggregateMetricDoubleProperty, - DenseVectorProperty, FlattenedProperty, NestedProperty, ObjectProperty @@ -69,6 +68,7 @@ import { VersionProperty, WildcardProperty } from './core' +import { DenseVectorProperty } from './DenseVectorProperty' import { DynamicMapping } from './dynamic-template' import { CompletionProperty, diff --git a/specification/_types/mapping/complex.ts b/specification/_types/mapping/complex.ts index d9b0c70bb3..5fa37f12ee 100644 --- a/specification/_types/mapping/complex.ts +++ b/specification/_types/mapping/complex.ts @@ -20,7 +20,6 @@ import { TimeSeriesMetricType } from '@_types/mapping/TimeSeriesMetricType' import { double, integer } from '@_types/Numeric' import { CorePropertyBase, IndexOptions } from './core' -import { DenseVectorIndexOptions } from './DenseVectorIndexOptions' import { PropertyBase } from './Property' export class FlattenedProperty extends PropertyBase { @@ -49,15 +48,6 @@ export class ObjectProperty extends CorePropertyBase { type?: 'object' } -export class DenseVectorProperty extends PropertyBase { - type: 'dense_vector' - element_type?: string - dims?: integer - similarity?: string - index?: boolean - index_options?: DenseVectorIndexOptions -} - export class AggregateMetricDoubleProperty extends PropertyBase { type: 'aggregate_metric_double' default_metric: string