From bec78a05e0dfa0dc9ae407b7e35bfaf97e19dc0f Mon Sep 17 00:00:00 2001 From: Sylvain Wallez Date: Thu, 14 Mar 2024 14:06:33 +0100 Subject: [PATCH] Rewrite similarity definition (#2454) (#2458) --- output/schema/schema.json | 568 +++++++++++++++++- output/typescript/types.ts | 61 ++ specification/indices/_types/IndexSettings.ts | 65 +- 3 files changed, 669 insertions(+), 25 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index 41facc795d..83ede4f5b5 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -32166,6 +32166,75 @@ ], "specLocation": "_types/Geo.ts#L135-L140" }, + { + "kind": "enum", + "members": [ + { + "name": "standardized" + }, + { + "name": "saturated" + }, + { + "name": "chisquared" + } + ], + "name": { + "name": "DFIIndependenceMeasure", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L20-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "no" + }, + { + "name": "b" + }, + { + "name": "l" + } + ], + "name": { + "name": "DFRAfterEffect", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L26-L30" + }, + { + "kind": "enum", + "members": [ + { + "name": "be" + }, + { + "name": "d" + }, + { + "name": "g" + }, + { + "name": "if" + }, + { + "name": "in" + }, + { + "name": "ine" + }, + { + "name": "p" + } + ], + "name": { + "name": "DFRBasicModel", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L32-L40" + }, { "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-data-stream.html#indices-create-data-stream-api-path-params", "kind": "type_alias", @@ -33762,6 +33831,38 @@ } } }, + { + "kind": "enum", + "members": [ + { + "name": "ll" + }, + { + "name": "spl" + } + ], + "name": { + "name": "IBDistribution", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L42-L45" + }, + { + "kind": "enum", + "members": [ + { + "name": "df" + }, + { + "name": "ttf" + } + ], + "name": { + "name": "IBLambda", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L47-L50" + }, { "kind": "type_alias", "name": { @@ -35272,6 +35373,31 @@ ], "specLocation": "_types/Node.ts#L28-L36" }, + { + "kind": "enum", + "members": [ + { + "name": "no" + }, + { + "name": "h1" + }, + { + "name": "h2" + }, + { + "name": "h3" + }, + { + "name": "z" + } + ], + "name": { + "name": "Normalization", + "namespace": "_types" + }, + "specLocation": "_types/Similarity.ts#L52-L58" + }, { "kind": "enum", "members": [ @@ -95107,7 +95233,7 @@ "name": "IndexCheckOnStartup", "namespace": "indices._types" }, - "specLocation": "indices/_types/IndexSettings.ts#L324-L331" + "specLocation": "indices/_types/IndexSettings.ts#L387-L394" }, { "kind": "interface", @@ -95520,7 +95646,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L316-L322" + "specLocation": "indices/_types/IndexSettings.ts#L379-L385" }, { "attachedBehaviors": [ @@ -96411,6 +96537,29 @@ } } }, + { + "description": "Configure custom similarity settings to customize how search results are scored.", + "name": "similarity", + "required": false, + "type": { + "key": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + "kind": "dictionary_of", + "singleKey": false, + "value": { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarity", + "namespace": "indices._types" + } + } + } + }, { "description": "Enable or disable dynamic mapping for an index.", "name": "mapping", @@ -96459,7 +96608,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L69-L314" + "specLocation": "indices/_types/IndexSettings.ts#L69-L318" }, { "kind": "interface", @@ -96579,7 +96728,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L380-L386" + "specLocation": "indices/_types/IndexSettings.ts#L443-L449" }, { "kind": "interface", @@ -96663,7 +96812,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L337-L370" + "specLocation": "indices/_types/IndexSettings.ts#L400-L433" }, { "kind": "interface", @@ -96685,7 +96834,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L372-L378" + "specLocation": "indices/_types/IndexSettings.ts#L435-L441" }, { "kind": "interface", @@ -96783,7 +96932,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L333-L335" + "specLocation": "indices/_types/IndexSettings.ts#L396-L398" }, { "kind": "interface", @@ -96804,7 +96953,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L585-L587" + "specLocation": "indices/_types/IndexSettings.ts#L648-L650" }, { "kind": "interface", @@ -96826,7 +96975,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L589-L596" + "specLocation": "indices/_types/IndexSettings.ts#L652-L659" }, { "description": "Mapping Limit Settings", @@ -96926,7 +97075,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L450-L463" + "specLocation": "indices/_types/IndexSettings.ts#L513-L526" }, { "kind": "interface", @@ -96949,7 +97098,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L475-L482" + "specLocation": "indices/_types/IndexSettings.ts#L538-L545" }, { "kind": "interface", @@ -96971,7 +97120,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L512-L518" + "specLocation": "indices/_types/IndexSettings.ts#L575-L581" }, { "kind": "interface", @@ -96993,7 +97142,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L503-L510" + "specLocation": "indices/_types/IndexSettings.ts#L566-L573" }, { "kind": "interface", @@ -97016,7 +97165,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L484-L492" + "specLocation": "indices/_types/IndexSettings.ts#L547-L555" }, { "kind": "interface", @@ -97039,7 +97188,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L494-L501" + "specLocation": "indices/_types/IndexSettings.ts#L557-L564" }, { "kind": "interface", @@ -97062,7 +97211,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L465-L473" + "specLocation": "indices/_types/IndexSettings.ts#L528-L536" }, { "kind": "interface", @@ -97207,6 +97356,377 @@ }, "specLocation": "indices/_types/IndexSegmentSort.ts#L29-L32" }, + { + "kind": "type_alias", + "name": { + "name": "SettingsSimilarity", + "namespace": "indices._types" + }, + "specLocation": "indices/_types/IndexSettings.ts#L320-L331", + "type": { + "items": [ + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityBm25", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityBoolean", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityDfi", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityDfr", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityIb", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityLmd", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityLmj", + "namespace": "indices._types" + } + }, + { + "kind": "instance_of", + "type": { + "name": "SettingsSimilarityScripted", + "namespace": "indices._types" + } + } + ], + "kind": "union_of" + }, + "variants": { + "kind": "internal_tag", + "tag": "type" + } + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityBm25", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "BM25" + } + }, + { + "name": "b", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "double", + "namespace": "_types" + } + } + }, + { + "name": "discount_overlaps", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "name": "k1", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "double", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L337-L342" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityBoolean", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "boolean" + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L333-L335" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityDfi", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "DFI" + } + }, + { + "name": "independence_measure", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "DFIIndependenceMeasure", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L344-L347" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityDfr", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "DFR" + } + }, + { + "name": "after_effect", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "DFRAfterEffect", + "namespace": "_types" + } + } + }, + { + "name": "basic_model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "DFRBasicModel", + "namespace": "_types" + } + } + }, + { + "name": "normalization", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Normalization", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L349-L354" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityIb", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "IB" + } + }, + { + "name": "distribution", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "IBDistribution", + "namespace": "_types" + } + } + }, + { + "name": "lambda", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "IBLambda", + "namespace": "_types" + } + } + }, + { + "name": "normalization", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Normalization", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L356-L361" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityLmd", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "LMDirichlet" + } + }, + { + "name": "mu", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "double", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L363-L366" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityLmj", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "LMJelinekMercer" + } + }, + { + "name": "lambda", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "double", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L368-L371" + }, + { + "kind": "interface", + "name": { + "name": "SettingsSimilarityScripted", + "namespace": "indices._types" + }, + "properties": [ + { + "name": "type", + "required": true, + "type": { + "kind": "literal_value", + "value": "scripted" + } + }, + { + "name": "script", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Script", + "namespace": "_types" + } + } + }, + { + "name": "weight_script", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Script", + "namespace": "_types" + } + } + } + ], + "specLocation": "indices/_types/IndexSettings.ts#L373-L377" + }, { "kind": "interface", "name": { @@ -97259,7 +97779,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L520-L525" + "specLocation": "indices/_types/IndexSettings.ts#L583-L588" }, { "kind": "interface", @@ -97313,7 +97833,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L538-L543" + "specLocation": "indices/_types/IndexSettings.ts#L601-L606" }, { "kind": "interface", @@ -97358,7 +97878,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L527-L536" + "specLocation": "indices/_types/IndexSettings.ts#L590-L599" }, { "kind": "interface", @@ -97426,7 +97946,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L545-L554" + "specLocation": "indices/_types/IndexSettings.ts#L608-L617" }, { "kind": "enum", @@ -97455,7 +97975,7 @@ "name": "StorageType", "namespace": "indices._types" }, - "specLocation": "indices/_types/IndexSettings.ts#L556-L583" + "specLocation": "indices/_types/IndexSettings.ts#L619-L646" }, { "kind": "interface", @@ -97612,7 +98132,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L388-L410" + "specLocation": "indices/_types/IndexSettings.ts#L451-L473" }, { "kind": "enum", @@ -97636,7 +98156,7 @@ "name": "TranslogDurability", "namespace": "indices._types" }, - "specLocation": "indices/_types/IndexSettings.ts#L412-L427" + "specLocation": "indices/_types/IndexSettings.ts#L475-L490" }, { "kind": "interface", @@ -97672,7 +98192,7 @@ } } ], - "specLocation": "indices/_types/IndexSettings.ts#L429-L448" + "specLocation": "indices/_types/IndexSettings.ts#L492-L511" }, { "kind": "enum", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f9f321443d..5e82232c8a 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -1892,6 +1892,12 @@ export interface CoordsGeoBounds { right: double } +export type DFIIndependenceMeasure = 'standardized' | 'saturated' | 'chisquared' + +export type DFRAfterEffect = 'no' | 'b' | 'l' + +export type DFRBasicModel = 'be' | 'd' | 'g' | 'if' | 'in' | 'ine' | 'p' + export type DataStreamName = string export type DataStreamNames = DataStreamName | DataStreamName[] @@ -2055,6 +2061,10 @@ export type Host = string export type HttpHeaders = Record +export type IBDistribution = 'll' | 'spl' + +export type IBLambda = 'df' | 'ttf' + export type Id = string | number export type Ids = Id | Id[] @@ -2206,6 +2216,8 @@ export interface NodeStatistics { failed: integer } +export type Normalization = 'no' | 'h1' | 'h2' | 'h3' | 'z' + export type OpType = 'index' | 'create' export type Password = string @@ -9171,6 +9183,7 @@ export interface IndicesIndexSettingsKeys { top_metrics_max_size?: integer analysis?: IndicesIndexSettingsAnalysis settings?: IndicesIndexSettings + similarity?: Record mapping?: IndicesMappingLimitSettings 'indexing.slowlog'?: IndicesSlowlogSettings indexing_pressure?: IndicesIndexingPressure @@ -9276,6 +9289,54 @@ export type IndicesSegmentSortMode = 'min' | 'max' export type IndicesSegmentSortOrder = 'asc' | 'desc' +export type IndicesSettingsSimilarity = IndicesSettingsSimilarityBm25 | IndicesSettingsSimilarityBoolean | IndicesSettingsSimilarityDfi | IndicesSettingsSimilarityDfr | IndicesSettingsSimilarityIb | IndicesSettingsSimilarityLmd | IndicesSettingsSimilarityLmj | IndicesSettingsSimilarityScripted + +export interface IndicesSettingsSimilarityBm25 { + type: 'BM25' + b?: double + discount_overlaps?: boolean + k1?: double +} + +export interface IndicesSettingsSimilarityBoolean { + type: 'boolean' +} + +export interface IndicesSettingsSimilarityDfi { + type: 'DFI' + independence_measure: DFIIndependenceMeasure +} + +export interface IndicesSettingsSimilarityDfr { + type: 'DFR' + after_effect: DFRAfterEffect + basic_model: DFRBasicModel + normalization: Normalization +} + +export interface IndicesSettingsSimilarityIb { + type: 'IB' + distribution: IBDistribution + lambda: IBLambda + normalization: Normalization +} + +export interface IndicesSettingsSimilarityLmd { + type: 'LMDirichlet' + mu?: double +} + +export interface IndicesSettingsSimilarityLmj { + type: 'LMJelinekMercer' + lambda?: double +} + +export interface IndicesSettingsSimilarityScripted { + type: 'scripted' + script: Script + weight_script?: Script +} + export interface IndicesSlowlogSettings { level?: string source?: integer diff --git a/specification/indices/_types/IndexSettings.ts b/specification/indices/_types/IndexSettings.ts index 0a7459e42d..ec10e55eb7 100644 --- a/specification/indices/_types/IndexSettings.ts +++ b/specification/indices/_types/IndexSettings.ts @@ -30,7 +30,7 @@ import { Uuid, VersionString } from '@_types/common' -import { integer, long } from '@_types/Numeric' +import { double, integer, long } from '@_types/Numeric' import { DateString, Time } from '@_types/Time' import { Tokenizer } from '@_types/analysis/tokenizers' import { IndexSegmentSort } from './IndexSegmentSort' @@ -298,6 +298,10 @@ export class IndexSettings */ analysis?: IndexSettingsAnalysis settings?: IndexSettings + /** + * Configure custom similarity settings to customize how search results are scored. + */ + similarity?: Dictionary /** * Enable or disable dynamic mapping for an index. */ @@ -313,6 +317,65 @@ export class IndexSettings store?: Storage } +/** + * @variants internal tag='type' + */ +export type SettingsSimilarity = + | SettingsSimilarityBm25 + | SettingsSimilarityBoolean + | SettingsSimilarityDfi + | SettingsSimilarityDfr + | SettingsSimilarityIb + | SettingsSimilarityLmd + | SettingsSimilarityLmj + | SettingsSimilarityScripted + +export class SettingsSimilarityBoolean { + type: 'boolean' +} + +export class SettingsSimilarityBm25 { + type: 'BM25' + b?: double + discount_overlaps?: boolean + k1?: double +} + +export class SettingsSimilarityDfi { + type: 'DFI' + independence_measure: DFIIndependenceMeasure +} + +export class SettingsSimilarityDfr { + type: 'DFR' + after_effect: DFRAfterEffect + basic_model: DFRBasicModel + normalization: Normalization +} + +export class SettingsSimilarityIb { + type: 'IB' + distribution: IBDistribution + lambda: IBLambda + normalization: Normalization +} + +export class SettingsSimilarityLmd { + type: 'LMDirichlet' + mu?: double +} + +export class SettingsSimilarityLmj { + type: 'LMJelinekMercer' + lambda?: double +} + +export class SettingsSimilarityScripted { + type: 'scripted' + script: Script + weight_script?: Script +} + export class IndexSettingBlocks { read_only?: boolean read_only_allow_delete?: boolean