From c1744c97bed8e80371ff4a56ce7cdd7fa1f79a7c Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 3 Jan 2025 17:26:57 +0100 Subject: [PATCH] Better approach, using KEYWORD subfield --- .../compute/operator/lookup/QueryList.java | 4 +-- .../src/main/resources/lookup-join.csv-spec | 14 ++++++++ .../esql/planner/LocalExecutionPlanner.java | 36 +++++++++++++------ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java index bfdf6b3bca9c..0884cd6d6177 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java @@ -32,7 +32,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Locale; import java.util.function.IntFunction; /** @@ -73,8 +72,7 @@ public static QueryList rawTermQueryList(MappedFieldType field, SearchExecutionC BytesRefBlock bytesRefBlock = (BytesRefBlock) block; BytesRef value = bytesRefBlock.getBytesRef(offset, new BytesRef()); if (field.typeName().equals("text")) { - // Text fields involve case-insensitive contains queries, we need to use lowercase on the term query - return new BytesRef(value.utf8ToString().toLowerCase(Locale.ROOT)); + throw new IllegalArgumentException("Cannot perform LOOKUP JOIN on TEXT fields"); } return value; }; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 384576c068d1..148faa4b80ab 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -521,6 +521,20 @@ language.id:integer | language.name:keyword | language.name.keyword:keyword 1 | English | English ; +joinOnNestedNestedFieldRowImplicitKeywords +required_capability: join_lookup_v10 + +ROW language.name = ["English", "French"] +| MV_EXPAND language.name +| LOOKUP JOIN languages_nested_fields ON language.name +| KEEP language.id, language.name, language.name.keyword, language.code +; + +language.id:integer | language.name:keyword | language.name.keyword:keyword | language.code:keyword +1 | English | English | EN +2 | French | French | FR +; + ############################################### # Tests with clientips_lookup index ############################################### diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index c40263baa656..b3ecdb95419c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -55,9 +55,11 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.TypedAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -584,28 +586,35 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan throw new IllegalArgumentException("can't plan [" + join + "], found index with mode [" + entry.getValue() + "]"); } String indexName = entry.getKey(); - List matchFields = new ArrayList<>(join.leftFields().size()); - for (Attribute m : join.leftFields()) { - Layout.ChannelAndType t = source.layout.get(m.id()); - if (t == null) { - throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]"); + if (join.leftFields().size() != join.rightFields().size()) { + throw new IllegalArgumentException("can't plan [" + join + "]: mismatching left and right field count"); + } + List matchFields = new ArrayList<>(join.leftFields().size()); + for (int i = 0; i < join.leftFields().size(); i++) { + TypedAttribute left = (TypedAttribute) join.leftFields().get(i); + FieldAttribute right = (FieldAttribute) join.rightFields().get(i); + Layout.ChannelAndType input = source.layout.get(left.id()); + if (input == null) { + throw new IllegalArgumentException("can't plan [" + join + "][" + left + "]"); } - matchFields.add(t); + matchFields.add(new MatchConfig(right, input)); } if (matchFields.size() != 1) { - throw new IllegalArgumentException("can't plan [" + join + "]"); + throw new IllegalArgumentException("can't plan [" + join + "]: multiple join predicates are not supported"); } + // TODO support multiple match fields, and support more than equality predicates + MatchConfig matchConfig = matchFields.getFirst(); return source.with( new LookupFromIndexOperator.Factory( sessionId, parentTask, context.queryPragmas().enrichMaxWorkers(), - matchFields.getFirst().channel(), + matchConfig.channel(), lookupFromIndexService, - matchFields.getFirst().type(), + matchConfig.type(), indexName, - join.leftFields().getFirst().name(), + matchConfig.fieldName(), join.addedFields().stream().map(f -> (NamedExpression) f).toList(), join.source() ), @@ -613,6 +622,13 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan ); } + private record MatchConfig(String fieldName, int channel, DataType type) { + private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) { + // Note, this handles TEXT fields with KEYWORD subfields, and we assume tha has been validated earlier during planning + this(match.exactAttribute().name(), input.channel(), input.type()); + } + } + private ExpressionEvaluator.Factory toEvaluator(Expression exp, Layout layout) { return EvalMapper.toEvaluator(exp, layout); }