From f75a24a775aab7e0b66428915e775d7137a38aa8 Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 17 Dec 2024 16:15:19 -0800 Subject: [PATCH] Add new requests and responses --- .../FindFieldStructureRequest.ts | 171 +++++++++++++++++ .../FindFieldStructureResponse.ts | 21 +++ .../FindMessageStructureRequest.ts | 172 ++++++++++++++++++ .../FindMessageStructureResponse.ts | 21 +++ 4 files changed, 385 insertions(+) create mode 100644 specification/text_structure/find_field_structure/FindFieldStructureRequest.ts create mode 100644 specification/text_structure/find_field_structure/FindFieldStructureResponse.ts create mode 100644 specification/text_structure/find_message_structure/FindMessageStructureRequest.ts create mode 100644 specification/text_structure/find_message_structure/FindMessageStructureResponse.ts diff --git a/specification/text_structure/find_field_structure/FindFieldStructureRequest.ts b/specification/text_structure/find_field_structure/FindFieldStructureRequest.ts new file mode 100644 index 0000000000..7d9fc75cfc --- /dev/null +++ b/specification/text_structure/find_field_structure/FindFieldStructureRequest.ts @@ -0,0 +1,171 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { GrokPattern } from '@_types/common' +import { uint } from '@_types/Numeric' +import { Duration } from '@_types/Time' +/** + * Find the structure of a text field. + * Find the structure of a text field in an Elasticsearch index. + * @rest_spec_name text_structure.find_field_structure + * @availability stack stability=stable visibility=public + * @cluster_privileges monitor_text_structure + */ +interface Request extends RequestBase { + query_parameters: { + /** + * If `format` is set to `delimited`, you can specify the column names in a comma-separated list. + * If this parameter is not specified, the structure finder uses the column names from the header row of the text. + * If the text does not have a header row, columns are named "column1", "column2", "column3", for example. + */ + column_names?: string + /** + * If you have set `format` to `delimited`, you can specify the character used to delimit the values in each row. + * Only a single character is supported; the delimiter cannot have multiple characters. + * By default, the API considers the following possibilities: comma, tab, semi-colon, and pipe (`|`). + * In this default scenario, all rows must have the same number of fields for the delimited format to be detected. + * If you specify a delimiter, up to 10% of the rows can have a different number of columns than the first row. + */ + delimiter?: string + /** + * The number of documents to include in the structural analysis. + * The minimum value is 2. + * @server_default 1000 + */ + documents_to_sample?: uint + /** + * The mode of compatibility with ECS compliant Grok patterns. + * Use this parameter to specify whether to use ECS Grok patterns instead of legacy ones when the structure finder creates a Grok pattern. + * This setting primarily has an impact when a whole message Grok pattern such as `%{CATALINALOG}` matches the input. + * If the structure finder identifies a common structure but has no idea of the meaning then generic field names such as `path`, `ipaddress`, `field1`, and `field2` are used in the `grok_pattern` output. + * The intention in that situation is that a user who knows the meanings will rename the fields before using them. + * @server_default disabled + */ + ecs_compatibility?: EcsCompatibilityType + /** + * If true, the response includes a field named `explanation`, which is an array of strings that indicate how the structure finder produced its result. + * @server_default false + */ + explain?: boolean + /** + * The field that should be analyzed. + */ + field: string + /** + * The high level structure of the text. + * By default, the API chooses the format. + * In this default scenario, all rows must have the same number of fields for a delimited format to be detected. + * If the format is set to delimited and the delimiter is not set, however, the API tolerates up to 5% of rows that have a different number of columns than the first row. + */ + format?: FormatType + /** + * If the format is `semi_structured_text`, you can specify a Grok pattern that is used to extract fields from every message in the text. + * The name of the timestamp field in the Grok pattern must match what is specified in the `timestamp_field` parameter. + * If that parameter is not specified, the name of the timestamp field in the Grok pattern must match "timestamp". + * If `grok_pattern` is not specified, the structure finder creates a Grok pattern. + */ + grok_pattern?: GrokPattern + /** + * The name of the index that contains the analyzed field. + */ + index: string + /** + * If the format is `delimited`, you can specify the character used to quote the values in each row if they contain newlines or the delimiter character. + * Only a single character is supported. + * If this parameter is not specified, the default value is a double quote (`"`). + * If your delimited text format does not use quoting, a workaround is to set this argument to a character that does not appear anywhere in the sample. + */ + quote?: string + /** + * If the format is `delimited`, you can specify whether values between delimiters should have whitespace trimmed from them. + * If this parameter is not specified and the delimiter is pipe (`|`), the default value is true. + * Otherwise, the default value is false. + */ + should_trim_fields?: boolean + /** + * The maximum amount of time that the structure analysis can take. + * If the analysis is still running when the timeout expires, it will be stopped. + * @server_default 25s + */ + timeout?: Duration + /** + * The name of the field that contains the primary timestamp of each record in the text. + * In particular, if the text was ingested into an index, this is the field that would be used to populate the `@timestamp` field. + * + * If the format is `semi_structured_text`, this field must match the name of the appropriate extraction in the `grok_pattern`. + * Therefore, for semi-structured text, it is best not to specify this parameter unless `grok_pattern` is also specified. + * + * For structured text, if you specify this parameter, the field must exist within the text. + * + * If this parameter is not specified, the structure finder makes a decision about which field (if any) is the primary timestamp field. + * For structured text, it is not compulsory to have a timestamp in the text. + */ + timestamp_field?: string + /** + * The Java time format of the timestamp field in the text. + * Only a subset of Java time format letter groups are supported: + * + * * `a` + * * `d` + * * `dd` + * * `EEE` + * * `EEEE` + * * `H` + * * `HH` + * * `h` + * * `M` + * * `MM` + * * `MMM` + * * `MMMM` + * * `mm` + * * `ss` + * * `XX` + * * `XXX` + * * `yy` + * * `yyyy` + * * `zzz` + * + * Additionally `S` letter groups (fractional seconds) of length one to nine are supported providing they occur after `ss` and are separated from the `ss` by a period (`.`), comma (`,`), or colon (`:`). + * Spacing and punctuation is also permitted with the exception a question mark (`?`), newline, and carriage return, together with literal text enclosed in single quotes. + * For example, `MM/dd HH.mm.ss,SSSSSS 'in' yyyy` is a valid override format. + * + * One valuable use case for this parameter is when the format is semi-structured text, there are multiple timestamp formats in the text, and you know which format corresponds to the primary timestamp, but you do not want to specify the full `grok_pattern`. + * Another is when the timestamp format is one that the structure finder does not consider by default. + * + * If this parameter is not specified, the structure finder chooses the best format from a built-in set. + * + * If the special value `null` is specified, the structure finder will not look for a primary timestamp in the text. + * When the format is semi-structured text, this will result in the structure finder treating the text as single-line messages. + */ + timestamp_format?: string + } +} + +export enum EcsCompatibilityType { + disabled, + v1 +} + +export enum FormatType { + delimited, + ndjson, + semi_structured_text, + xml +} diff --git a/specification/text_structure/find_field_structure/FindFieldStructureResponse.ts b/specification/text_structure/find_field_structure/FindFieldStructureResponse.ts new file mode 100644 index 0000000000..598b913c3e --- /dev/null +++ b/specification/text_structure/find_field_structure/FindFieldStructureResponse.ts @@ -0,0 +1,21 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +export class Response { + body: {} +} diff --git a/specification/text_structure/find_message_structure/FindMessageStructureRequest.ts b/specification/text_structure/find_message_structure/FindMessageStructureRequest.ts new file mode 100644 index 0000000000..420377b3fc --- /dev/null +++ b/specification/text_structure/find_message_structure/FindMessageStructureRequest.ts @@ -0,0 +1,172 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Field, GrokPattern } from '@_types/common' +import { Duration } from '@_types/Time' +/** + * Find the structure of text messages. + * Find the structure of a list of text messages. + * The messages must contain data that is suitable to be ingested into Elasticsearch. + * + * This API provides a starting point for ingesting data into Elasticsearch in a format that is suitable for subsequent use with other Elastic Stack functionality. + * Use this API rather than the find text structure API if your input text has already been split up into separate messages by some other process. + * The response from the API contains: + * + * * Sample messages. + * * Statistics that reveal the most common values for all fields detected within the text and basic numeric statistics for numeric fields. + * * Information about the structure of the text, which is useful when you write ingest configurations to index it or similarly formatted text. + * Appropriate mappings for an Elasticsearch index, which you could use to ingest the text. + * + * All this information can be calculated by the structure finder with no guidance. + * However, you can optionally override some of the decisions about the text structure by specifying one or more query parameters. + * @rest_spec_name text_structure.find_message_structure + * @availability stack stability=stable visibility=public + * @cluster_privileges monitor_text_structure + */ +interface Request extends RequestBase { + query_parameters: { + /** If the format is `delimited`, you can specify the column names in a comma-separated list. + * If this parameter is not specified, the structure finder uses the column names from the header row of the text. + * If the text does not have a header role, columns are named "column1", "column2", "column3", for example. + */ + column_names?: string + /** + * If you the format is `delimited`, you can specify the character used to delimit the values in each row. + * Only a single character is supported; the delimiter cannot have multiple characters. + * By default, the API considers the following possibilities: comma, tab, semi-colon, and pipe (`|`). + * In this default scenario, all rows must have the same number of fields for the delimited format to be detected. + * If you specify a delimiter, up to 10% of the rows can have a different number of columns than the first row. + */ + delimiter?: string + /** + * The mode of compatibility with ECS compliant Grok patterns. + * Use this parameter to specify whether to use ECS Grok patterns instead of legacy ones when the structure finder creates a Grok pattern. + * This setting primarily has an impact when a whole message Grok pattern such as `%{CATALINALOG}` matches the input. + * If the structure finder identifies a common structure but has no idea of meaning then generic field names such as `path`, `ipaddress`, `field1`, and `field2` are used in the `grok_pattern` output, with the intention that a user who knows the meanings rename these fields before using it. + * @server_default disabled + */ + ecs_compatibility?: EcsCompatibilityType + /** + * If this parameter is set to true, the response includes a field named `explanation`, which is an array of strings that indicate how the structure finder produced its result. + * @server_default false + */ + explain?: boolean + /** The high level structure of the text. + * By default, the API chooses the format. + * In this default scenario, all rows must have the same number of fields for a delimited format to be detected. + * If the format is `delimited` and the delimiter is not set, however, the API tolerates up to 5% of rows that have a different number of columns than the first row. + */ + format?: FormatType + /** + * If the format is `semi_structured_text`, you can specify a Grok pattern that is used to extract fields from every message in the text. + * The name of the timestamp field in the Grok pattern must match what is specified in the `timestamp_field` parameter. + * If that parameter is not specified, the name of the timestamp field in the Grok pattern must match "timestamp". + * If `grok_pattern` is not specified, the structure finder creates a Grok pattern. + */ + grok_pattern?: GrokPattern + /** + * If the format is `delimited`, you can specify the character used to quote the values in each row if they contain newlines or the delimiter character. + * Only a single character is supported. + * If this parameter is not specified, the default value is a double quote (`"`). + * If your delimited text format does not use quoting, a workaround is to set this argument to a character that does not appear anywhere in the sample. + */ + quote?: string + /** + * If the format is `delimited`, you can specify whether values between delimiters should have whitespace trimmed from them. + * If this parameter is not specified and the delimiter is pipe (`|`), the default value is true. + * Otherwise, the default value is false. + */ + should_trim_fields?: boolean + /** + * The maximum amount of time that the structure analysis can take. + * If the analysis is still running when the timeout expires, it will be stopped. + * @server_default 25s + */ + timeout?: Duration + /** + * The name of the field that contains the primary timestamp of each record in the text. + * In particular, if the text was ingested into an index, this is the field that would be used to populate the `@timestamp` field. + * + * If the format is `semi_structured_text`, this field must match the name of the appropriate extraction in the `grok_pattern`. + * Therefore, for semi-structured text, it is best not to specify this parameter unless `grok_pattern` is also specified. + * + * For structured text, if you specify this parameter, the field must exist within the text. + * + * If this parameter is not specified, the structure finder makes a decision about which field (if any) is the primary timestamp field. + * For structured text, it is not compulsory to have a timestamp in the text. + */ + timestamp_field?: Field + /** + * The Java time format of the timestamp field in the text. + * Only a subset of Java time format letter groups are supported: + * + * * `a` + * * `d` + * * `dd` + * * `EEE` + * * `EEEE` + * * `H` + * * `HH` + * * `h` + * * `M` + * * `MM` + * * `MMM` + * * `MMMM` + * * `mm` + * * `ss` + * * `XX` + * * `XXX` + * * `yy` + * * `yyyy` + * * `zzz` + * + * Additionally `S` letter groups (fractional seconds) of length one to nine are supported providing they occur after `ss` and are separated from the `ss` by a period (`.`), comma (`,`), or colon (`:`). + * Spacing and punctuation is also permitted with the exception a question mark (`?`), newline, and carriage return, together with literal text enclosed in single quotes. + * For example, `MM/dd HH.mm.ss,SSSSSS 'in' yyyy` is a valid override format. + * + * One valuable use case for this parameter is when the format is semi-structured text, there are multiple timestamp formats in the text, and you know which format corresponds to the primary timestamp, but you do not want to specify the full `grok_pattern`. + * Another is when the timestamp format is one that the structure finder does not consider by default. + * + * If this parameter is not specified, the structure finder chooses the best format from a built-in set. + * + * If the special value `null` is specified, the structure finder will not look for a primary timestamp in the text. + * When the format is semi-structured text, this will result in the structure finder treating the text as single-line messages. + */ + timestamp_format?: string + } + body: { + /** + * The list of messages you want to analyze. + */ + messages: Array + } +} + +export enum EcsCompatibilityType { + disabled, + v1 +} + +export enum FormatType { + delimited, + ndjson, + semi_structured_text, + xml +} diff --git a/specification/text_structure/find_message_structure/FindMessageStructureResponse.ts b/specification/text_structure/find_message_structure/FindMessageStructureResponse.ts new file mode 100644 index 0000000000..598b913c3e --- /dev/null +++ b/specification/text_structure/find_message_structure/FindMessageStructureResponse.ts @@ -0,0 +1,21 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +export class Response { + body: {} +}