From 976506044580039481945d3b6f964fee1e2e794b Mon Sep 17 00:00:00 2001 From: Florian Bernd Date: Tue, 3 Dec 2024 14:43:48 +0100 Subject: [PATCH] Improve `Analyzer` definitions --- output/schema/schema.json | 146 ++++++++++++++------- specification/_types/analysis/analyzers.ts | 85 ++++++++++-- 2 files changed, 172 insertions(+), 59 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index 0639333fe0..a58a9dc355 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -64389,7 +64389,7 @@ "name": "Analyzer", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/analyzers.ts#L359-L413", + "specLocation": "_types/analysis/analyzers.ts#L429-L483", "type": { "kind": "union_of", "items": [ @@ -64804,7 +64804,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L61-L66" + "specLocation": "_types/analysis/analyzers.ts#L80-L85" }, { "kind": "interface", @@ -64858,7 +64858,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L68-L73" + "specLocation": "_types/analysis/analyzers.ts#L87-L92" }, { "kind": "interface", @@ -64956,7 +64956,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L75-L80" + "specLocation": "_types/analysis/analyzers.ts#L94-L99" }, { "kind": "interface", @@ -65010,7 +65010,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L82-L87" + "specLocation": "_types/analysis/analyzers.ts#L101-L106" }, { "kind": "interface", @@ -65050,7 +65050,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L89-L93" + "specLocation": "_types/analysis/analyzers.ts#L108-L112" }, { "kind": "interface", @@ -65104,7 +65104,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L95-L100" + "specLocation": "_types/analysis/analyzers.ts#L114-L119" }, { "kind": "interface", @@ -65158,7 +65158,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L102-L107" + "specLocation": "_types/analysis/analyzers.ts#L121-L126" }, { "kind": "type_alias", @@ -65354,7 +65354,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L109-L113" + "specLocation": "_types/analysis/analyzers.ts#L128-L132" }, { "kind": "interface", @@ -65394,7 +65394,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L115-L119" + "specLocation": "_types/analysis/analyzers.ts#L134-L138" }, { "kind": "interface", @@ -65848,7 +65848,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L121-L126" + "specLocation": "_types/analysis/analyzers.ts#L140-L145" }, { "kind": "interface", @@ -65888,7 +65888,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L128-L132" + "specLocation": "_types/analysis/analyzers.ts#L147-L151" }, { "kind": "enum", @@ -66031,7 +66031,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L134-L139" + "specLocation": "_types/analysis/analyzers.ts#L153-L158" }, { "kind": "enum", @@ -66319,7 +66319,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L141-L146" + "specLocation": "_types/analysis/analyzers.ts#L160-L165" }, { "kind": "interface", @@ -66359,7 +66359,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L148-L152" + "specLocation": "_types/analysis/analyzers.ts#L167-L171" }, { "kind": "interface", @@ -66388,8 +66388,10 @@ } }, { + "description": "The maximum token size to emit. Tokens larger than this size will be discarded.\nDefaults to `255`", "name": "max_output_size", - "required": true, + "required": false, + "serverDefault": 255, "type": { "kind": "instance_of", "type": { @@ -66410,8 +66412,9 @@ } }, { + "description": "The character to use to concatenate the terms.\nDefaults to a space.", "name": "separator", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -66421,8 +66424,10 @@ } }, { + "description": "A pre-defined stop words list like `_english_` or an array containing a list of stop words.\nDefaults to `_none_`.", "name": "stopwords", "required": false, + "serverDefault": "_none_", "type": { "kind": "instance_of", "type": { @@ -66432,6 +66437,7 @@ } }, { + "description": "The path to a file containing stop words.", "name": "stopwords_path", "required": false, "type": { @@ -66443,7 +66449,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L37-L45" + "specLocation": "_types/analysis/analyzers.ts#L37-L64" }, { "kind": "interface", @@ -66543,7 +66549,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L154-L159" + "specLocation": "_types/analysis/analyzers.ts#L173-L178" }, { "kind": "interface", @@ -66597,7 +66603,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L161-L166" + "specLocation": "_types/analysis/analyzers.ts#L180-L185" }, { "kind": "interface", @@ -66651,7 +66657,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L168-L173" + "specLocation": "_types/analysis/analyzers.ts#L187-L192" }, { "kind": "interface", @@ -66705,7 +66711,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L175-L180" + "specLocation": "_types/analysis/analyzers.ts#L194-L199" }, { "kind": "interface", @@ -66745,7 +66751,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L182-L186" + "specLocation": "_types/analysis/analyzers.ts#L201-L205" }, { "kind": "interface", @@ -66799,7 +66805,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L188-L193" + "specLocation": "_types/analysis/analyzers.ts#L207-L212" }, { "kind": "interface", @@ -66891,7 +66897,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L195-L200" + "specLocation": "_types/analysis/analyzers.ts#L214-L219" }, { "kind": "interface", @@ -67554,7 +67560,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L202-L207" + "specLocation": "_types/analysis/analyzers.ts#L221-L226" }, { "kind": "interface", @@ -67608,7 +67614,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L209-L214" + "specLocation": "_types/analysis/analyzers.ts#L228-L233" }, { "kind": "interface", @@ -67662,7 +67668,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L216-L221" + "specLocation": "_types/analysis/analyzers.ts#L235-L240" }, { "kind": "interface", @@ -67840,7 +67846,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L47-L50" + "specLocation": "_types/analysis/analyzers.ts#L66-L69" }, { "kind": "interface", @@ -68464,7 +68470,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L52-L59" + "specLocation": "_types/analysis/analyzers.ts#L71-L78" }, { "kind": "interface", @@ -68518,7 +68524,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L223-L228" + "specLocation": "_types/analysis/analyzers.ts#L242-L247" }, { "kind": "interface", @@ -68697,7 +68703,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L230-L235" + "specLocation": "_types/analysis/analyzers.ts#L249-L254" }, { "kind": "interface", @@ -69084,7 +69090,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L312-L318" + "specLocation": "_types/analysis/analyzers.ts#L331-L337" }, { "kind": "enum", @@ -69300,7 +69306,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L237-L242" + "specLocation": "_types/analysis/analyzers.ts#L256-L261" }, { "kind": "interface", @@ -69435,6 +69441,7 @@ } }, { + "description": "Java regular expression flags. Flags should be pipe-separated, eg \"CASE_INSENSITIVE|COMMENTS\".", "name": "flags", "required": false, "type": { @@ -69446,8 +69453,10 @@ } }, { + "description": "Should terms be lowercased or not.\nDefaults to `true`.", "name": "lowercase", "required": false, + "serverDefault": true, "type": { "kind": "instance_of", "type": { @@ -69457,8 +69466,10 @@ } }, { + "description": "A Java regular expression.\nDefaults to `\\W+`.", "name": "pattern", - "required": true, + "required": false, + "serverDefault": "\\W+", "type": { "kind": "instance_of", "type": { @@ -69468,8 +69479,10 @@ } }, { + "description": "A pre-defined stop words list like `_english_` or an array containing a list of stop words.\nDefaults to `_none_`.", "name": "stopwords", "required": false, + "serverDefault": "_none_", "type": { "kind": "instance_of", "type": { @@ -69477,9 +69490,21 @@ "namespace": "_types.analysis" } } + }, + { + "description": "The path to a file containing stop words.", + "name": "stopwords_path", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "_types/analysis/analyzers.ts#L320-L327" + "specLocation": "_types/analysis/analyzers.ts#L339-L371" }, { "kind": "interface", @@ -69759,7 +69784,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L244-L248" + "specLocation": "_types/analysis/analyzers.ts#L263-L267" }, { "kind": "enum", @@ -70069,7 +70094,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L250-L255" + "specLocation": "_types/analysis/analyzers.ts#L269-L274" }, { "kind": "interface", @@ -70206,7 +70231,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L257-L262" + "specLocation": "_types/analysis/analyzers.ts#L276-L281" }, { "kind": "interface", @@ -70260,7 +70285,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L264-L269" + "specLocation": "_types/analysis/analyzers.ts#L283-L288" }, { "kind": "interface", @@ -70314,7 +70339,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L271-L276" + "specLocation": "_types/analysis/analyzers.ts#L290-L295" }, { "kind": "interface", @@ -70457,7 +70482,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L329-L332" + "specLocation": "_types/analysis/analyzers.ts#L373-L376" }, { "kind": "interface", @@ -70578,7 +70603,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L334-L339" + "specLocation": "_types/analysis/analyzers.ts#L379-L384" }, { "kind": "enum", @@ -70743,7 +70768,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L278-L283" + "specLocation": "_types/analysis/analyzers.ts#L297-L302" }, { "kind": "interface", @@ -70797,7 +70822,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L285-L290" + "specLocation": "_types/analysis/analyzers.ts#L304-L309" }, { "kind": "interface", @@ -70815,8 +70840,10 @@ } }, { + "description": "The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals.\nDefaults to `255`.", "name": "max_token_length", "required": false, + "serverDefault": 255, "type": { "kind": "instance_of", "type": { @@ -70826,8 +70853,10 @@ } }, { + "description": "A pre-defined stop words list like `_english_` or an array containing a list of stop words.\nDefaults to `_none_`.", "name": "stopwords", "required": false, + "serverDefault": "_none_", "type": { "kind": "instance_of", "type": { @@ -70835,9 +70864,21 @@ "namespace": "_types.analysis" } } + }, + { + "description": "The path to a file containing stop words.", + "name": "stopwords_path", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "_types/analysis/analyzers.ts#L341-L345" + "specLocation": "_types/analysis/analyzers.ts#L386-L406" }, { "kind": "interface", @@ -70988,8 +71029,10 @@ } }, { + "description": "A pre-defined stop words list like `_english_` or an array containing a list of stop words.\nDefaults to `_none_`.", "name": "stopwords", "required": false, + "serverDefault": "_none_", "type": { "kind": "instance_of", "type": { @@ -70999,6 +71042,7 @@ } }, { + "description": "The path to a file containing stop words.", "name": "stopwords_path", "required": false, "type": { @@ -71010,7 +71054,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L347-L352" + "specLocation": "_types/analysis/analyzers.ts#L408-L422" }, { "kind": "interface", @@ -71163,7 +71207,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L292-L297" + "specLocation": "_types/analysis/analyzers.ts#L311-L316" }, { "kind": "enum", @@ -71449,7 +71493,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L306-L310" + "specLocation": "_types/analysis/analyzers.ts#L325-L329" }, { "kind": "interface", @@ -72213,7 +72257,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L299-L304" + "specLocation": "_types/analysis/analyzers.ts#L318-L323" }, { "kind": "interface", @@ -72336,7 +72380,7 @@ } } ], - "specLocation": "_types/analysis/analyzers.ts#L354-L357" + "specLocation": "_types/analysis/analyzers.ts#L424-L427" }, { "kind": "interface", diff --git a/specification/_types/analysis/analyzers.ts b/specification/_types/analysis/analyzers.ts index 47da0e68fd..1748c0f245 100644 --- a/specification/_types/analysis/analyzers.ts +++ b/specification/_types/analysis/analyzers.ts @@ -37,10 +37,28 @@ export class CustomAnalyzer { export class FingerprintAnalyzer { type: 'fingerprint' version?: VersionString - max_output_size: integer - preserve_original: boolean - separator: string - stopwords?: StopWords + /** + * The maximum token size to emit. Tokens larger than this size will be discarded. + * Defaults to `255` + * + * @server_default 255 + */ + max_output_size?: integer + /** + * The character to use to concatenate the terms. + * Defaults to a space. + */ + separator?: string + /** + * A pre-defined stop words list like `_english_` or an array containing a list of stop words. + * Defaults to `_none_`. + * + * @server_default _none_ + */ + stopwords?: StopWords + /** + * The path to a file containing stop words. + */ stopwords_path?: string } @@ -320,10 +338,35 @@ export class NoriAnalyzer { export class PatternAnalyzer { type: 'pattern' version?: VersionString - flags?: string + /** + * Java regular expression flags. Flags should be pipe-separated, eg "CASE_INSENSITIVE|COMMENTS". + */ + flags?: string // TODO: Use PipeSeparatedFlags and proper enum + /** + * Should terms be lowercased or not. + * Defaults to `true`. + * + * @server_default true + */ lowercase?: boolean - pattern: string - stopwords?: StopWords + /** + * A Java regular expression. + * Defaults to `\W+`. + * + * @server_default \W+ + */ + pattern?: string + /** + * A pre-defined stop words list like `_english_` or an array containing a list of stop words. + * Defaults to `_none_`. + * + * @server_default _none_ + */ + stopwords?: StopWords + /** + * The path to a file containing stop words. + */ + stopwords_path?: string } export class SimpleAnalyzer { @@ -331,6 +374,7 @@ export class SimpleAnalyzer { version?: VersionString } +// TODO: This one seems undocumented!? export class SnowballAnalyzer { type: 'snowball' version?: VersionString @@ -340,14 +384,39 @@ export class SnowballAnalyzer { export class StandardAnalyzer { type: 'standard' + /** + * The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. + * Defaults to `255`. + * + * @server_default 255 + */ max_token_length?: integer + /** + * A pre-defined stop words list like `_english_` or an array containing a list of stop words. + * Defaults to `_none_`. + * + * @server_default _none_ + */ stopwords?: StopWords + /** + * The path to a file containing stop words. + */ + stopwords_path?: string } export class StopAnalyzer { type: 'stop' version?: VersionString - stopwords?: StopWords + /** + * A pre-defined stop words list like `_english_` or an array containing a list of stop words. + * Defaults to `_none_`. + * + * @server_default _none_ + */ + stopwords?: StopWords + /** + * The path to a file containing stop words. + */ stopwords_path?: string }