Skip to content

Commit

Permalink
Added strict range type checks for ENRICH
Browse files Browse the repository at this point in the history
  • Loading branch information
craigtaverner committed Oct 18, 2024
1 parent 3bb20e3 commit e8a0716
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ public enum Cap {
*/
RANGEQUERY_FOR_DATETIME,

/**
* Enforce strict type checking on ENRICH range types.
*/
ENRICH_STRICT_RANGE_TYPES,

/**
* Fix for non-unique attribute names in ROW and logical plans.
* https://github.com/elastic/elasticsearch/issues/110541
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeType;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.search.SearchService;
Expand Down Expand Up @@ -294,6 +296,7 @@ private void doLookup(
releasables.add(mergePositionsOperator);
SearchExecutionContext searchExecutionContext = searchContext.getSearchExecutionContext();
MappedFieldType fieldType = searchExecutionContext.getFieldType(matchField);
validateTypes(inputDataType, fieldType);
var queryList = switch (matchType) {
case "match", "range" -> QueryList.termQueryList(fieldType, searchExecutionContext, inputBlock, inputDataType);
case "geo_match" -> QueryList.geoShapeQuery(fieldType, searchExecutionContext, inputBlock, inputDataType);
Expand Down Expand Up @@ -354,6 +357,30 @@ private void doLookup(
}
}

private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
if (inputDataType == DataType.UNSUPPORTED) {
throw new EsqlIllegalArgumentException("ENRICH cannot match on unsupported input data type");
}
if (fieldType == null) {
throw new EsqlIllegalArgumentException("ENRICH cannot match on non-existent field");
}
if (fieldType instanceof RangeFieldMapper.RangeFieldType rangeType) {
if (rangeTypesCompatible(rangeType.rangeType(), inputDataType) == false) {
throw new EsqlIllegalArgumentException(
"ENRICH range and input types are incompatible: range[" + rangeType.rangeType() + "], input[" + inputDataType + "]"
);
}
}
}

private static boolean rangeTypesCompatible(RangeType rangeType, DataType inputDataType) {
return switch (rangeType) {
case INTEGER, LONG -> inputDataType.isWholeNumber();
case IP -> inputDataType == DataType.IP;
default -> rangeType.isNumeric() == inputDataType.isNumeric();
};
}

private static Operator extractFieldsOperator(
SearchContext searchContext,
DriverContext driverContext,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
---
setup:
- requires:
capabilities:
- method: POST
path: /_query
parameters: [method, path, parameters, capabilities]
capabilities: [enrich_strict_range_types]
reason: "Strict range type checking was added"
test_runner_features: [capabilities, allowed_warnings_regex, warnings_regex]

- do:
indices.create:
index: ages
body:
settings:
index.number_of_shards: 1
index.routing.rebalance.enable: "none"
mappings:
properties:
age_range:
type: "integer_range"
description:
type: "keyword"

- do:
bulk:
index: ages
refresh: true
body:
- { "index": { } }
- { "age_range": { "gte": 0, "lt": 2 }, "description": "Baby" }
- { "index": { } }
- { "age_range": { "gte": 2, "lt": 4 }, "description": "Toddler" }
- { "index": { } }
- { "age_range": { "gte": 3, "lt": 5 }, "description": "Preschooler" }
- { "index": { } }
- { "age_range": { "gte": 5, "lt": 12 }, "description": "Child" }
- { "index": { } }
- { "age_range": { "gte": 13, "lt": 20 }, "description": "Adolescent" }
- { "index": { } }
- { "age_range": { "gte": 20, "lt": 40 }, "description": "Young Adult" }
- { "index": { } }
- { "age_range": { "gte": 40, "lt": 60 }, "description": "Middle-aged" }
- { "index": { } }
- { "age_range": { "gte": 60, "lt": 80 }, "description": "Senior" }
- { "index": { } }
- { "age_range": { "gte": 80, "lt": 100 }, "description": "Elderly" }
- { "index": { } }
- { "age_range": { "gte": 100, "lt": 200 }, "description": "Incredible" }
- do:
cluster.health:
wait_for_no_initializing_shards: true
wait_for_events: languid

- do:
enrich.put_policy:
name: ages-policy
body:
range:
indices: [ "ages" ]
match_field: "age_range"
enrich_fields: [ "description" ]

- do:
enrich.execute_policy:
name: ages-policy

- do:
indices.create:
index: employees
body:
mappings:
properties:
name:
type: keyword
age:
type: integer
ak:
type: keyword
salary:
type: double

- do:
bulk:
index: employees
refresh: true
body:
- { "index": { } }
- { "name": "Joe Soap", "age": 36, "ak": "36", "salary": 55.55 }
- { "index": { } }
- { "name": "Jane Doe", "age": 31, "ak": "31", "salary": 55.55 }
- { "index": { } }
- { "name": "Magic Mike", "age": 44, "ak": "44", "salary": 55.55 }
- { "index": { } }
- { "name": "Anon Ymous", "age": 61, "ak": "61", "salary": 55.55 }

---
teardown:
- do:
enrich.delete_policy:
name: ages-policy

---
"ages":
- do:
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM employees | ENRICH ages-policy ON age | STATS count=COUNT(*) BY description | SORT count DESC, description ASC'

- match: { columns.0.name: "count" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "description" }
- match: { columns.1.type: "keyword" }

- length: { values: 3 }
- match: { values.0: [ 2, "Young Adult" ] }
- match: { values.1: [ 1, "Middle-aged" ] }
- match: { values.2: [ 1, "Senior" ] }

---
"ages as typecast keywords":
- do:
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM employees | EVAL aki = ak::integer | ENRICH ages-policy ON aki | STATS count=COUNT(*) BY description | SORT count DESC, description ASC'

- match: { columns.0.name: "count" }
- match: { columns.0.type: "long" }
- match: { columns.1.name: "description" }
- match: { columns.1.type: "keyword" }

- length: { values: 3 }
- match: { values.0: [ 2, "Young Adult" ] }
- match: { values.1: [ 1, "Middle-aged" ] }
- match: { values.2: [ 1, "Senior" ] }

---
"ages as keywords":
- do:
catch: /ENRICH range and input types are incompatible. range\[INTEGER\], input\[KEYWORD\]/
esql.query:
body:
query: 'FROM employees | ENRICH ages-policy ON ak | STATS count=COUNT(*) BY description | SORT count DESC, description ASC'

---
"Invalid age as keyword":
- requires:
cluster_features: [ "gte_v8.14.0" ]
reason: "IP range ENRICH support was added in 8.14.0"

- do:
catch: /ENRICH range and input types are incompatible. range\[INTEGER\], input\[KEYWORD\]/
esql.query:
body:
query: 'FROM employees | ENRICH ages-policy ON name | STATS count=COUNT(*) BY description | SORT count DESC, description ASC'

---
"Invalid age as double":
- requires:
cluster_features: [ "gte_v8.14.0" ]
reason: "IP range ENRICH support was added in 8.14.0"

- do:
catch: /ENRICH range and input types are incompatible. range\[INTEGER\], input\[DOUBLE\]/
esql.query:
body:
query: 'FROM employees | ENRICH ages-policy ON salary | STATS count=COUNT(*) BY description | SORT count DESC, description ASC'

0 comments on commit e8a0716

Please sign in to comment.