From 9bb4c08acdc5e0412bc5d35c1dc6ee6d219d7245 Mon Sep 17 00:00:00 2001 From: normanj-bitquill <78755797+normanj-bitquill@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:28:36 -0700 Subject: [PATCH] Added documentation for the plugins.query.field_type_tolerance setting (#1300) (#3118) Signed-off-by: Norman Jordan <norman.jordan@improving.com> --- docs/user/admin/settings.rst | 58 +++++++++++++++++++++++++ docs/user/limitations/limitations.rst | 29 +++++++++++++ doctest/test_data/multi_value_long.json | 5 +++ 3 files changed, 92 insertions(+) create mode 100644 doctest/test_data/multi_value_long.json diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index 71718d1726..cbcb4f329d 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -824,3 +824,61 @@ To Re-enable Data Sources::: } } +plugins.query.field_type_tolerance +================================== + +Description +----------- + +This setting controls whether preserve arrays. If this setting is set to false, then an array is reduced +to the first non array value of any level of nesting. + +1. The default value is true (preserve arrays) +2. This setting is node scope +3. This setting can be updated dynamically + +Querying a field containing array values will return the full array values:: + + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------------------+ + | accounts | + +-----------------------+ + | [{'id': 1},{'id': 2}] | + +-----------------------+ + +Disable field type tolerance:: + + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : false + } + }' + +When field type tolerance is disabled, arrays are collapsed to the first non array value:: + + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------+ + | accounts | + +-----------+ + | {'id': 1} | + +-----------+ + +Reenable field type tolerance:: + + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : true + } + }' + +Limitations: +------------ +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. \ No newline at end of file diff --git a/docs/user/limitations/limitations.rst b/docs/user/limitations/limitations.rst index 8ce75a0e25..22ad3c2a17 100644 --- a/docs/user/limitations/limitations.rst +++ b/docs/user/limitations/limitations.rst @@ -101,3 +101,32 @@ The response in JDBC format with cursor id:: } The query with `aggregation` and `join` does not support pagination for now. + +Limitations on Using Multi-valued Fields +======================================== + +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. + +For example, the following query tries to calculate the absolute value of a field that contains arrays of +longs:: + + POST _plugins/_sql/ + { + "query": "SELECT id, ABS(long_array) FROM multi_value_long" + } +The response in JSON format is:: + + { + "error": { + "reason": "Invalid SQL query", + "details": "invalid to get longValue from value of type ARRAY", + "type": "ExpressionEvaluationException" + }, + "status": 400 + } diff --git a/doctest/test_data/multi_value_long.json b/doctest/test_data/multi_value_long.json new file mode 100644 index 0000000000..3c139630f6 --- /dev/null +++ b/doctest/test_data/multi_value_long.json @@ -0,0 +1,5 @@ +{"id": 1, "long_array": [1, 2]} +{"id": 2, "long_array": [3, 4]} +{"id": 3, "long_array": [1, 5]} +{"id": 4, "long_array": [1, 2]} +{"id": 5, "long_array": [2, 3]} \ No newline at end of file