From 039949ee7d1d762bba364b2bb35771b6da632033 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Thu, 13 Jun 2024 11:24:11 +0200 Subject: [PATCH 01/20] Initial commit for PPL lookup Signed-off-by: Hendrik Saly --- .../org/opensearch/sql/analysis/Analyzer.java | 99 +++++++++++++ .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/dsl/AstDSL.java | 21 +++ .../org/opensearch/sql/ast/tree/Lookup.java | 49 +++++++ .../org/opensearch/sql/executor/Explain.java | 15 ++ .../sql/planner/DefaultImplementor.java | 15 ++ .../sql/planner/logical/LogicalLookup.java | 44 ++++++ .../logical/LogicalPlanNodeVisitor.java | 4 + .../sql/planner/physical/LookupOperator.java | 137 ++++++++++++++++++ .../sql/planner/physical/PhysicalPlanDSL.java | 17 +++ .../physical/PhysicalPlanNodeVisitor.java | 4 + .../opensearch/sql/ppl/LookupCommandIT.java | 64 ++++++++ .../OpenSearchExecutionProtector.java | 82 +++++++++++ .../plugin/config/OpenSearchPluginModule.java | 4 +- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 14 ++ .../opensearch/sql/ppl/parser/AstBuilder.java | 43 ++++++ .../sql/ppl/utils/ArgumentFactory.java | 8 + .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 14 ++ .../sql/ppl/parser/AstBuilderTest.java | 15 ++ .../ppl/parser/AstExpressionBuilderTest.java | 7 +- .../sql/ppl/utils/ArgumentFactoryTest.java | 52 +++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 5 + 23 files changed, 713 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Lookup.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d5e8b93b13..1d8bdaf30e 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -50,6 +50,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; @@ -66,6 +67,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -89,6 +91,7 @@ import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalML; import org.opensearch.sql.planner.logical.LogicalMLCommons; import org.opensearch.sql.planner.logical.LogicalPaginate; @@ -507,6 +510,102 @@ public LogicalPlan visitDedupe(Dedupe node, AnalysisContext context) { consecutive); } + /** Build {@link LogicalLookup}. */ + @Override + public LogicalPlan visitLookup(Lookup node, AnalysisContext context) { + LogicalPlan child = node.getChild().get(0).accept(this, context); + List options = node.getOptions(); + // Todo, refactor the option. + Boolean appendOnly = (Boolean) options.get(0).getValue().getValue(); + + Table table = + dataSourceService + .getDataSource(DEFAULT_DATASOURCE_NAME) + .getStorageEngine() + .getTable(null, node.getIndexName()); + + if (table == null || !table.exists()) { + throw new SemanticCheckException( + String.format("no such lookup index %s", node.getIndexName())); + } + + return new LogicalLookup( + child, + node.getIndexName(), + analyzeLookupMatchFields(node.getMatchFieldList(), context), + appendOnly, + analyzeLookupCopyFields(node.getCopyFieldList(), context, table)); + } + + private ImmutableMap analyzeLookupMatchFields( + List inputMap, AnalysisContext context) { + ImmutableMap.Builder copyMapBuilder = + new ImmutableMap.Builder<>(); + for (Map resultMap : inputMap) { + Expression origin = expressionAnalyzer.analyze(resultMap.getOrigin(), context); + if (resultMap.getTarget() instanceof Field) { + ReferenceExpression target = + new ReferenceExpression( + ((Field) resultMap.getTarget()).getField().toString(), origin.type()); + ReferenceExpression originExpr = DSL.ref(origin.toString(), origin.type()); + TypeEnvironment curEnv = context.peek(); + curEnv.remove(originExpr); + curEnv.define(target); + copyMapBuilder.put(originExpr, target); + } else { + throw new SemanticCheckException( + String.format("the target expected to be field, but is %s", resultMap.getTarget())); + } + } + + return copyMapBuilder.build(); + } + + private ImmutableMap analyzeLookupCopyFields( + List inputMap, AnalysisContext context, Table table) { + + TypeEnvironment curEnv = context.peek(); + java.util.Map fieldTypes = table.getFieldTypes(); + + if (inputMap.isEmpty()) { + fieldTypes.forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + return ImmutableMap.builder().build(); + } + + ImmutableMap.Builder copyMapBuilder = + new ImmutableMap.Builder<>(); + for (Map resultMap : inputMap) { + if (resultMap.getOrigin() instanceof Field && resultMap.getTarget() instanceof Field) { + String fieldName = ((Field) resultMap.getOrigin()).getField().toString(); + ExprType ex = fieldTypes.get(fieldName); + + if (ex == null) { + throw new SemanticCheckException(String.format("no such field %s", fieldName)); + } + + ReferenceExpression origin = new ReferenceExpression(fieldName, ex); + + if (resultMap.getTarget().equals(resultMap.getOrigin())) { + + curEnv.define(origin); + copyMapBuilder.put(origin, origin); + } else { + ReferenceExpression target = + new ReferenceExpression(((Field) resultMap.getTarget()).getField().toString(), ex); + curEnv.define(target); + copyMapBuilder.put(origin, target); + } + } else { + throw new SemanticCheckException( + String.format( + "the origin and target expected to be field, but is %s/%s", + resultMap.getOrigin(), resultMap.getTarget())); + } + } + + return copyMapBuilder.build(); + } + /** Logical head is identical to {@link LogicalLimit}. */ public LogicalPlan visitHead(Head node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 973b10310b..16ebf45854 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; @@ -217,6 +218,10 @@ public T visitDedupe(Dedupe node, C context) { return visitChildren(node, context); } + public T visitLookup(Lookup node, C context) { + return visitChildren(node, context); + } + public T visitHead(Head node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 4f3056b0f7..9b435a627d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ast.dsl; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -49,6 +50,7 @@ import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.RareTopN; @@ -441,6 +443,25 @@ public static Dedupe dedupe(UnresolvedPlan input, List options, Field. return new Dedupe(input, options, Arrays.asList(fields)); } + public static Lookup lookup( + UnresolvedPlan input, + String indexName, + List matchFieldList, + List options, + List copyFieldList) { + return new Lookup(input, indexName, matchFieldList, options, copyFieldList); + } + + public static List fieldMap(String field, String asField, String... more) { + assert more == null || more.length % 2 == 0; + List list = new ArrayList(); + list.add(map(field, asField)); + for (int i = 0; i < more.length; i = i + 2) { + list.add(map(more[i], more[i + 1])); + } + return list; + } + public static Head head(UnresolvedPlan input, Integer size, Integer from) { return new Head(input, size, from); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Lookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/Lookup.java new file mode 100644 index 0000000000..d9b22cbd26 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Lookup.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Map; + +/** AST node represent Lookup operation. */ +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +@AllArgsConstructor +public class Lookup extends UnresolvedPlan { + private UnresolvedPlan child; + private final String indexName; + private final List matchFieldList; + private final List options; + private final List copyFieldList; + + @Override + public Lookup attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitLookup(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index 0f05b99383..d4b15a1cdc 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -22,6 +22,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.LookupOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanNodeVisitor; @@ -157,6 +158,20 @@ public ExplainResponseNode visitDedupe(DedupeOperator node, Object context) { "consecutive", node.getConsecutive()))); } + @Override + public ExplainResponseNode visitLookup(LookupOperator node, Object context) { + return explain( + node, + context, + explainNode -> + explainNode.setDescription( + ImmutableMap.of( + "copyfields", node.getCopyFieldMap(), + "matchfields", node.getMatchFieldMap(), + "indexname", node.getIndexName(), + "appendonly", node.getAppendOnly()))); + } + @Override public ExplainResponseNode visitRareTopN(RareTopNOperator node, Object context) { return explain( diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index b53d17b38f..96a3be1f69 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -13,6 +13,7 @@ import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; @@ -31,6 +32,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.LookupOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.ProjectOperator; @@ -74,6 +76,19 @@ public PhysicalPlan visitDedupe(LogicalDedupe node, C context) { node.getConsecutive()); } + @Override + public PhysicalPlan visitLookup(LogicalLookup node, C context) { + return new LookupOperator( + visitChild(node, context), + node.getIndexName(), + node.getMatchFieldMap(), + node.getAppendOnly(), + node.getCopyFieldMap(), + (a, b) -> { + throw new RuntimeException("not implemented by DefaultImplementor"); + }); + } + @Override public PhysicalPlan visitProject(LogicalProject node, C context) { return new ProjectOperator( diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java new file mode 100644 index 0000000000..6269b56e37 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.Arrays; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.ReferenceExpression; + +/** Logical Dedupe Plan. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalLookup extends LogicalPlan { + + private final String indexName; + private final Map matchFieldMap; + private final Map copyFieldMap; + private final Boolean appendOnly; + + /** Constructor of LogicalDedupe. */ + public LogicalLookup( + LogicalPlan child, + String indexName, + Map matchFieldMap, + Boolean appendOnly, + Map copyFieldMap) { + super(Arrays.asList(child)); + this.indexName = indexName; + this.copyFieldMap = copyFieldMap; + this.matchFieldMap = matchFieldMap; + this.appendOnly = appendOnly; + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitLookup(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 156db35306..c05f8c91f6 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -52,6 +52,10 @@ public R visitDedupe(LogicalDedupe plan, C context) { return visitNode(plan, context); } + public R visitLookup(LogicalLookup plan, C context) { + return visitNode(plan, context); + } + public R visitRename(LogicalRename plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java new file mode 100644 index 0000000000..ad7a82559d --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java @@ -0,0 +1,137 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NonNull; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.ReferenceExpression; + +/** Lookup operator. Perform lookup on another OpenSearch index and enrich the results. */ +@Getter +@EqualsAndHashCode(callSuper = false) +public class LookupOperator extends PhysicalPlan { + @Getter private final PhysicalPlan input; + @Getter private final String indexName; + @Getter private final Map matchFieldMap; + @Getter private final Map copyFieldMap; + @Getter private final Boolean appendOnly; + private final BiFunction, Map> lookup; + + /** Lookup Constructor. */ + @NonNull + public LookupOperator( + PhysicalPlan input, + String indexName, + Map matchFieldMap, + Boolean appendOnly, + Map copyFieldMap, + BiFunction, Map> lookup) { + this.input = input; + this.indexName = indexName; + this.matchFieldMap = matchFieldMap; + this.appendOnly = appendOnly; + this.copyFieldMap = copyFieldMap; + this.lookup = lookup; + } + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitLookup(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + return input.hasNext(); + } + + @Override + public ExprValue next() { + ExprValue inputValue = input.next(); + + if (STRUCT == inputValue.type()) { + Map matchMap = new HashMap<>(); + Map finalMap = new HashMap<>(); + + for (Map.Entry matchField : + matchFieldMap.entrySet()) { + Object val = inputValue.bindingTuples().resolve(matchField.getValue()).value(); + if (val != null) { + matchMap.put(matchField.getKey().toString(), val); + } else { + // System.out.println("No value in search results for " + matchField.getValue() + " + // field"); + // No value in search results, so we just return the input + return inputValue; + } + } + + finalMap.put("_match", matchMap); + + Map copyMap = new HashMap<>(); + + if (!copyFieldMap.isEmpty()) { + + for (Map.Entry copyField : + copyFieldMap.entrySet()) { + copyMap.put(String.valueOf(copyField.getKey()), String.valueOf(copyField.getValue())); + } + + finalMap.put("_copy", copyMap.keySet()); + } + + Map source = lookup.apply(indexName, finalMap); + + if (source == null || source.isEmpty()) { + // no lookup found or lookup is empty, so we just return the original input value + return inputValue; + } + + Map tupleValue = ExprValueUtils.getTupleValue(inputValue); + Map resultBuilder = new HashMap<>(); + resultBuilder.putAll(tupleValue); + + if (appendOnly) { + + for (Map.Entry sourceField : source.entrySet()) { + String u = copyMap.get(sourceField.getKey()); + resultBuilder.putIfAbsent( + u == null ? sourceField.getKey() : u.toString(), + ExprValueUtils.fromObjectValue(sourceField.getValue())); + } + } else { + // default + + for (Map.Entry sourceField : source.entrySet()) { + String u = copyMap.get(sourceField.getKey()); + resultBuilder.put( + u == null ? sourceField.getKey() : u.toString(), + ExprValueUtils.fromObjectValue(sourceField.getValue())); + } + } + + return ExprTupleValue.fromExprValueMap(resultBuilder); + + } else { + return inputValue; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 147f0e08dc..2ab3f08106 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -78,6 +78,23 @@ public static DedupeOperator dedupe( input, Arrays.asList(expressions), allowedDuplication, keepEmpty, consecutive); } + public static LookupOperator lookup( + PhysicalPlan input, + String indexName, + Map matchFieldMap, + Boolean appendOnly, + Map copyFieldMap) { + return new LookupOperator( + input, + indexName, + matchFieldMap, + appendOnly, + copyFieldMap, + (a, b) -> { + throw new RuntimeException("not implemented by PhysicalPlanDSL"); + }); + } + public WindowOperator window( PhysicalPlan input, NamedExpression windowFunction, WindowDefinition windowDefinition) { return new WindowOperator(input, windowFunction, windowDefinition); diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 99b5cc8020..0a58b018cd 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -64,6 +64,10 @@ public R visitDedupe(DedupeOperator node, C context) { return visitNode(node, context); } + public R visitLookup(LookupOperator node, C context) { + return visitNode(node, context); + } + public R visitValues(ValuesOperator node, C context) { return visitNode(node, context); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java new file mode 100644 index 0000000000..cb05285ea3 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class LookupCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testLookup() throws IOException { + JSONObject result = executeQuery(String.format("source=%s | lookup %s male", TEST_INDEX_BANK)); + verifyDataRows(result, rows(true), rows(false)); + } + + @Test + public void testConsecutiveDedup() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | dedup male consecutive=true | fields male", TEST_INDEX_BANK)); + verifyDataRows(result, rows(true), rows(false), rows(true), rows(false)); + } + + @Test + public void testAllowMoreDuplicates() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | dedup 2 male | fields male", TEST_INDEX_BANK)); + verifyDataRows(result, rows(true), rows(true), rows(false), rows(false)); + } + + @Test + public void testKeepEmptyDedup() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | dedup balance keepempty=true | fields firstname, balance", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows( + result, + rows("Amber JOHnny", 39225), + rows("Hattie", null), + rows("Nanette", 32838), + rows("Dale", 4180), + rows("Elinor", null), + rows("Virginia", null), + rows("Dillard", 48086)); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index 0905c2f4b4..cb86978168 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -5,8 +5,20 @@ package org.opensearch.sql.opensearch.executor.protector; +import java.util.Map; +import java.util.Set; +import java.util.function.BiFunction; +import javax.annotation.Nullable; +import lombok.AllArgsConstructor; import lombok.RequiredArgsConstructor; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.monitor.ResourceMonitor; +import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.planner.physical.ADOperator; import org.opensearch.sql.opensearch.planner.physical.MLCommonsOperator; import org.opensearch.sql.opensearch.planner.physical.MLOperator; @@ -16,6 +28,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.LookupOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.ProjectOperator; @@ -29,11 +42,16 @@ /** OpenSearch Execution Protector. */ @RequiredArgsConstructor +@AllArgsConstructor public class OpenSearchExecutionProtector extends ExecutionProtector { /** OpenSearch resource monitor. */ private final ResourceMonitor resourceMonitor; + @Nullable + /** OpenSearch client. Maybe null * */ + private OpenSearchClient openSearchClient; + public PhysicalPlan protect(PhysicalPlan physicalPlan) { return physicalPlan.accept(this, null); } @@ -116,6 +134,70 @@ public PhysicalPlan visitDedupe(DedupeOperator node, Object context) { node.getConsecutive()); } + @Override + public PhysicalPlan visitLookup(LookupOperator node, Object context) { + return new LookupOperator( + visitInput(node.getInput(), context), + node.getIndexName(), + node.getMatchFieldMap(), + node.getAppendOnly(), + node.getCopyFieldMap(), + lookup()); + } + + private BiFunction, Map> lookup() { + + if (openSearchClient == null) { + throw new RuntimeException( + "Can not perform lookup because openSearchClient was null. This is likely a bug."); + } + + return (indexName, inputMap) -> { + Map matchMap = (Map) inputMap.get("_match"); + Set copySet = (Set) inputMap.get("_copy"); + + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + + for (Map.Entry f : matchMap.entrySet()) { + BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); + + // Todo: Search with term and a match query? Or terms only? + orQueryBuilder.should(new TermQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.should(new MatchQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.minimumShouldMatch(1); + + // filter is the same as "must" but ignores scoring + boolQueryBuilder.filter(orQueryBuilder); + } + + SearchResponse result = + openSearchClient + .getNodeClient() + .search( + new SearchRequest(indexName) + .source( + SearchSourceBuilder.searchSource() + .fetchSource( + copySet == null ? null : copySet.toArray(new String[0]), null) + .query(boolQueryBuilder) + .size(2))) + .actionGet(); + + int hits = result.getHits().getHits().length; + + if (hits == 0) { + // null indicates no hits for the lookup found + return null; + } + + if (hits != 1) { + throw new RuntimeException("too many hits for " + indexName + " (" + hits + ")"); + } + + return result.getHits().getHits()[0].getSourceAsMap(); + }; + } + @Override public PhysicalPlan visitWindow(WindowOperator node, Object context) { return new WindowOperator( diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 33a785c498..8789c79c30 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -69,8 +69,8 @@ public ResourceMonitor resourceMonitor(Settings settings) { } @Provides - public ExecutionProtector protector(ResourceMonitor resourceMonitor) { - return new OpenSearchExecutionProtector(resourceMonitor); + public ExecutionProtector protector(ResourceMonitor resourceMonitor, OpenSearchClient client) { + return new OpenSearchExecutionProtector(resourceMonitor, client); } @Provides diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9f707c13cd..f9c8382004 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -57,6 +57,7 @@ NUM: 'NUM'; // ARGUMENT KEYWORDS KEEPEMPTY: 'KEEPEMPTY'; CONSECUTIVE: 'CONSECUTIVE'; +APPENDONLY: 'APPENDONLY'; DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; PARTITIONS: 'PARTITIONS'; ALLNUM: 'ALLNUM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 39fb7f53a6..953bf155df 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -38,6 +38,7 @@ commands | renameCommand | statsCommand | dedupCommand + | lookupCommand | sortCommand | evalCommand | headCommand @@ -85,6 +86,18 @@ dedupCommand : DEDUP (number = integerLiteral)? fieldList (KEEPEMPTY EQUAL keepempty = booleanLiteral)? (CONSECUTIVE EQUAL consecutive = booleanLiteral)? ; +matchFieldWithOptAs + : orignalMatchField = fieldExpression (AS asMatchField = fieldExpression)? + ; + +copyFieldWithOptAs + : orignalCopyField = fieldExpression (AS asCopyField = fieldExpression)? + ; + +lookupCommand + : LOOKUP tableSource matchFieldWithOptAs (COMMA matchFieldWithOptAs)* (APPENDONLY EQUAL appendonly = booleanLiteral)? (copyFieldWithOptAs (COMMA copyFieldWithOptAs)*)* + ; + sortCommand : SORT sortbyClause ; @@ -833,6 +846,7 @@ keywordsCanBeId | RENAME | STATS | DEDUP + | LOOKUP | SORT | EVAL | HEAD diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3c693fa0bd..30a448924e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FromClauseContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.HeadCommandContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.LookupCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RareCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RenameCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SearchFilterFromContext; @@ -54,6 +55,7 @@ import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Project; @@ -212,6 +214,47 @@ public UnresolvedPlan visitDedupCommand(DedupCommandContext ctx) { return new Dedupe(ArgumentFactory.getArgumentList(ctx), getFieldList(ctx.fieldList())); } + /** Lookup command */ + @Override + public UnresolvedPlan visitLookupCommand(LookupCommandContext ctx) { + ArgumentFactory.getArgumentList(ctx); + ctx.tableSource(); + ctx.copyFieldWithOptAs(); + ctx.matchFieldWithOptAs(); + return new Lookup( + ctx.tableSource().tableQualifiedName().getText(), + ctx.matchFieldWithOptAs().stream() + .map( + ct -> + new Map( + evaluateFieldExpressionContext(ct.orignalMatchField), + evaluateFieldExpressionContext(ct.asMatchField, ct.orignalMatchField))) + .collect(Collectors.toList()), + ArgumentFactory.getArgumentList(ctx), + ctx.copyFieldWithOptAs().stream() + .map( + ct -> + new Map( + evaluateFieldExpressionContext(ct.orignalCopyField), + evaluateFieldExpressionContext(ct.asCopyField, ct.orignalCopyField))) + .collect(Collectors.toList())); + } + + private UnresolvedExpression evaluateFieldExpressionContext( + OpenSearchPPLParser.FieldExpressionContext f) { + return internalVisitExpression(f); + } + + private UnresolvedExpression evaluateFieldExpressionContext( + OpenSearchPPLParser.FieldExpressionContext f0, + OpenSearchPPLParser.FieldExpressionContext f1) { + if (f0 == null) { + return internalVisitExpression(f1); + } else { + return internalVisitExpression(f0); + } + } + /** Head command visitor. */ @Override public UnresolvedPlan visitHeadCommand(HeadCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index f89ecf9c6e..44c90b4d89 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.LookupCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RareCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsCommandContext; @@ -80,6 +81,13 @@ public static List getArgumentList(DedupCommandContext ctx) { : new Argument("consecutive", new Literal(false, DataType.BOOLEAN))); } + public static List getArgumentList(LookupCommandContext ctx) { + return Arrays.asList( + ctx.appendonly != null + ? new Argument("appendonly", getArgumentValue(ctx.appendonly)) + : new Argument("appendonly", new Literal(false, DataType.BOOLEAN))); + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d28e5d122b..649d090c9d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -213,6 +213,20 @@ public String visitDedupe(Dedupe node, String context) { child, fields, allowedDuplication, keepEmpty, consecutive); } + // Todo: do we need anonymization for lookups? + /* + @Override + public String visitLookup(Lookup node, String context) { + String child = node.getChild().get(0).accept(this, context); + String fields = visitFieldList(node.getFields()); + List options = node.getOptions(); + Boolean appendonly = (Boolean) options.get(0).getValue().getValue(); + + return StringUtils.format( + "%s | lookup %s %d appendonly=%b ...", + child, fields, appendonly); + }*/ + @Override public String visitHead(Head node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index ced266ed78..f8cbe95ef1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -21,11 +21,13 @@ import static org.opensearch.sql.ast.dsl.AstDSL.eval; import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; +import static org.opensearch.sql.ast.dsl.AstDSL.fieldMap; import static org.opensearch.sql.ast.dsl.AstDSL.filter; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.head; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.let; +import static org.opensearch.sql.ast.dsl.AstDSL.lookup; import static org.opensearch.sql.ast.dsl.AstDSL.map; import static org.opensearch.sql.ast.dsl.AstDSL.nullLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.parse; @@ -44,6 +46,7 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.Collections; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -393,6 +396,18 @@ public void testDedupCommandWithSortby() { defaultDedupArgs())); } + @Test + public void testLookupCommand() { + assertEqual( + "source=t | lookup a field", + lookup( + relation("t"), + "a", + fieldMap("field", "field"), + exprList(argument("appendonly", booleanLiteral(false))), + Collections.emptyList())); + } + @Test public void testHeadCommand() { assertEqual("source=t | head", head(relation("t"), 10, 0)); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 7bcb87d193..769b6bd0a9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -616,10 +616,9 @@ public void functionNameCanBeUsedAsIdentifier() { + " | LOG10 | LOG2 | MOD | PI |POW | POWER | RAND | ROUND | SIGN | SQRT | TRUNCATE " + "| ACOS | ASIN | ATAN | ATAN2 | COS | COT | DEGREES | RADIANS | SIN | TAN"); assertFunctionNameCouldBeId( - "SEARCH | DESCRIBE | SHOW | FROM | WHERE | FIELDS | RENAME | STATS " - + "| DEDUP | SORT | EVAL | HEAD | TOP | RARE | PARSE | METHOD | REGEX | PUNCT | GROK " - + "| PATTERN | PATTERNS | NEW_FIELD | KMEANS | AD | ML | SOURCE | INDEX | D | DESC " - + "| DATASOURCES"); + "SEARCH | DESCRIBE | SHOW | FROM | WHERE | FIELDS | RENAME | STATS | DEDUP | LOOKUP | SORT" + + " | EVAL | HEAD | TOP | RARE | PARSE | METHOD | REGEX | PUNCT | GROK | PATTERN |" + + " PATTERNS | NEW_FIELD | KMEANS | AD | ML | SOURCE | INDEX | D | DESC | DATASOURCES"); } void assertFunctionNameCouldBeId(String antlrFunctionName) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index 761dbe2997..102f9591fe 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -14,12 +14,15 @@ import static org.opensearch.sql.ast.dsl.AstDSL.dedupe; import static org.opensearch.sql.ast.dsl.AstDSL.exprList; import static org.opensearch.sql.ast.dsl.AstDSL.field; +import static org.opensearch.sql.ast.dsl.AstDSL.fieldMap; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.lookup; import static org.opensearch.sql.ast.dsl.AstDSL.projectWithArg; import static org.opensearch.sql.ast.dsl.AstDSL.relation; import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import java.util.Collections; import org.junit.Test; import org.opensearch.sql.ppl.parser.AstBuilderTest; @@ -102,4 +105,53 @@ public void testSortFieldArgument() { public void testNoArgConstructorForArgumentFactoryShouldPass() { new ArgumentFactory(); } + + @Test + public void testLookupCommandRequiredArguments() { + assertEqual( + "source=t | lookup a field", + lookup( + relation("t"), + "a", + fieldMap("field", "field"), + exprList(argument("appendonly", booleanLiteral(false))), + Collections.emptyList())); + } + + @Test + public void testLookupCommandFieldArguments() { + assertEqual( + "source=t | lookup a field AS field1,field2 AS field3 destfield AS destfield1, destfield2" + + " AS destfield3", + lookup( + relation("t"), + "a", + fieldMap("field", "field1", "field2", "field3"), + exprList(argument("appendonly", booleanLiteral(false))), + fieldMap("destfield", "destfield1", "destfield2", "destfield3"))); + } + + @Test + public void testLookupCommandAppendTrueArgument() { + assertEqual( + "source=t | lookup a field appendonly=true", + lookup( + relation("t"), + "a", + fieldMap("field", "field"), + exprList(argument("appendonly", booleanLiteral(true))), + Collections.emptyList())); + } + + @Test + public void testLookupCommandAppendFalseArgument() { + assertEqual( + "source=t | lookup a field appendonly=false", + lookup( + relation("t"), + "a", + fieldMap("field", "field"), + exprList(argument("appendonly", booleanLiteral(false))), + Collections.emptyList())); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index cd51ea07df..b7001adaab 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -89,6 +89,11 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testLookupCommand() { + assertEquals("source=t | lookup ", anonymize("source=t | lookup a field1, field2")); + } + @Test public void testHeadCommandWithNumber() { assertEquals("source=t | head 3", anonymize("source=t | head 3")); From 036e07ab797408c802741ca9d5e6979783d5478e Mon Sep 17 00:00:00 2001 From: Lukasz Soszynski Date: Tue, 18 Jun 2024 19:20:05 +0200 Subject: [PATCH 02/20] Tests related to lookup and Analyzer class. Signed-off-by: Lukasz Soszynski --- .../org/opensearch/sql/analysis/Analyzer.java | 39 +- .../sql/planner/logical/LogicalPlanDSL.java | 9 + .../opensearch/sql/analysis/AnalyzerTest.java | 348 ++++++++++++++++++ 3 files changed, 382 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 1d8bdaf30e..aeff7443dd 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -512,8 +512,8 @@ public LogicalPlan visitDedupe(Dedupe node, AnalysisContext context) { /** Build {@link LogicalLookup}. */ @Override - public LogicalPlan visitLookup(Lookup node, AnalysisContext context) { - LogicalPlan child = node.getChild().get(0).accept(this, context); + public LogicalPlan visitLookup(Lookup node, AnalysisContext queryContext) { + LogicalPlan child = node.getChild().get(0).accept(this, queryContext); List options = node.getOptions(); // Todo, refactor the option. Boolean appendOnly = (Boolean) options.get(0).getValue().getValue(); @@ -529,29 +529,40 @@ public LogicalPlan visitLookup(Lookup node, AnalysisContext context) { String.format("no such lookup index %s", node.getIndexName())); } + AnalysisContext lookupTableContext = new AnalysisContext(); + TypeEnvironment lookupTableEnvironment = lookupTableContext.peek(); + table + .getFieldTypes() + .forEach( + (name, type) -> + lookupTableEnvironment.define(new Symbol(Namespace.FIELD_NAME, name), type)); + ImmutableMap matchFieldMap = + analyzeLookupMatchFields(node.getMatchFieldList(), queryContext, lookupTableContext); + return new LogicalLookup( child, node.getIndexName(), - analyzeLookupMatchFields(node.getMatchFieldList(), context), + matchFieldMap, appendOnly, - analyzeLookupCopyFields(node.getCopyFieldList(), context, table)); + analyzeLookupCopyFields(node.getCopyFieldList(), queryContext, table)); } private ImmutableMap analyzeLookupMatchFields( - List inputMap, AnalysisContext context) { + List inputMap, AnalysisContext queryContext, AnalysisContext lookupTableContext) { ImmutableMap.Builder copyMapBuilder = new ImmutableMap.Builder<>(); for (Map resultMap : inputMap) { - Expression origin = expressionAnalyzer.analyze(resultMap.getOrigin(), context); + Expression origin = expressionAnalyzer.analyze(resultMap.getOrigin(), lookupTableContext); if (resultMap.getTarget() instanceof Field) { - ReferenceExpression target = - new ReferenceExpression( - ((Field) resultMap.getTarget()).getField().toString(), origin.type()); - ReferenceExpression originExpr = DSL.ref(origin.toString(), origin.type()); - TypeEnvironment curEnv = context.peek(); - curEnv.remove(originExpr); - curEnv.define(target); - copyMapBuilder.put(originExpr, target); + Expression targerExpression = + expressionAnalyzer.analyze(resultMap.getTarget(), queryContext); + ReferenceExpression targetReference = + DSL.ref(targerExpression.toString(), targerExpression.type()); + ReferenceExpression originReference = DSL.ref(origin.toString(), origin.type()); + TypeEnvironment curEnv = queryContext.peek(); + curEnv.remove(originReference); + curEnv.define(targetReference); + copyMapBuilder.put(originReference, targetReference); } else { throw new SemanticCheckException( String.format("the target expected to be field, but is %s", resultMap.getTarget())); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 2a886ba0ca..9fb242a3bc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -138,4 +138,13 @@ public LogicalPlan values(List... values) { public static LogicalPlan limit(LogicalPlan input, Integer limit, Integer offset) { return new LogicalLimit(input, limit, offset); } + + public static LogicalPlan lookup( + LogicalPlan input, + String indexName, + Map matchFieldMap, + boolean appendOnly, + Map copyFields) { + return new LogicalLookup(input, indexName, matchFieldMap, appendOnly, copyFields); + } } diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 8d935b11d2..5070d9fa49 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -70,6 +70,8 @@ import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; @@ -82,11 +84,14 @@ import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.Kmeans; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -105,10 +110,94 @@ import org.opensearch.sql.planner.logical.LogicalPlanDSL; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalRelation; +import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.datasource.DataSourceTable; +import org.opensearch.sql.storage.StorageEngine; +import org.opensearch.sql.storage.Table; class AnalyzerTest extends AnalyzerTestBase { + private static final String LOOKUP_TABLE_DEVICE_NAMES = "device_names"; + public static final String NO_SUCH_TABLE = "no_such_table"; + public static final String TABLE_DOES_NOT_EXIST = "does_not_exist"; + + public static Map lookupTableFieldTypes = + new ImmutableMap.Builder() + .put("id", INTEGER) + .put("dev_id", STRING) + .put("serial_number", LONG) + .put("vendor", STRING) + .put("ip_v4", STRING) + .put("firmware_version", LONG) + .put("reliability_factor", DOUBLE) + .put("comment", ExprCoreType.STRING) + .build(); + + private final Table deviceNamesLookupTable; + private Table tableDoesNotExist; + + public AnalyzerTest() { + this.deviceNamesLookupTable = + new Table() { + @Override + public boolean exists() { + return true; + } + + @Override + public void create(Map schema) { + throw new UnsupportedOperationException("Create table is not supported"); + } + + @Override + public Map getFieldTypes() { + return lookupTableFieldTypes; + } + + @Override + public PhysicalPlan implement(LogicalPlan plan) { + throw new UnsupportedOperationException(); + } + + public Map getReservedFieldTypes() { + return ImmutableMap.of("_test", STRING); + } + }; + this.tableDoesNotExist = + new Table() { + + @Override + public boolean exists() { + return false; + } + + @Override + public Map getFieldTypes() { + return Map.of(); + } + + @Override + public PhysicalPlan implement(LogicalPlan plan) { + throw new UnsupportedOperationException(); + } + }; + } + + protected StorageEngine storageEngine() { + return (dataSourceSchemaName, tableName) -> { + switch (tableName) { + case NO_SUCH_TABLE: + return null; + case TABLE_DOES_NOT_EXIST: + return tableDoesNotExist; + case LOOKUP_TABLE_DEVICE_NAMES: + return deviceNamesLookupTable; + default: + return table; + } + }; + } + @Test public void filter_relation() { assertAnalyzeEqual( @@ -1767,4 +1856,263 @@ public void visit_close_cursor() { () -> assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor())); } + + @Test + public void visit_lookup_same_field_name_in_query_and_lookup_index() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), + new ReferenceExpression("field_value2", STRING)), + false, + Collections.emptyMap()), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + Collections.emptyList())); + } + + @Test + public void visit_lookup_use_multiple_fields() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("comment", STRING), + new ReferenceExpression("comment", STRING), + new ReferenceExpression("dev_id", STRING), + new ReferenceExpression("field_value1", STRING)), + false, + Collections.emptyMap()), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of( + AstDSL.map("comment", "comment"), AstDSL.map("dev_id", "field_value1")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + Collections.emptyList())); + } + + @Test + public void visit_lookup_various_field_name_in_query_and_lookup_index() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("dev_id", STRING), + new ReferenceExpression("comment", STRING)), + true, + Collections.emptyMap()), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("dev_id", "comment")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(true))), + Collections.emptyList())); + } + + @ParameterizedTest + @ValueSource(strings = {NO_SUCH_TABLE, TABLE_DOES_NOT_EXIST}) + public void visit_lookup_should_report_error_when_lookup_table_does_not_exist(String tableName) { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + tableName, + ImmutableList.of(AstDSL.map("string_value", "comment")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_join_field_does_not_exist() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("no_such_field", "comment")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_lookup_table() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("no_such_field", "comment")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_query_table() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "no_such_field")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void + visit_lookup_should_error_when_join_field_does_not_exist_in_query_table_but_exist_lookup_table() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "dev_id")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void + visit_lookup_should_error_when_join_field_does_not_exist_in_lookup_table_but_exist_query_table() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("string_value", "field_value2")), + ImmutableList.of(argument("appendonly", booleanLiteral(true))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_target_expression_point_out_function() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map(field("ip_v4"), intLiteral(7))), + ImmutableList.of(argument("appendonly", booleanLiteral(false))), + emptyList()); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_copy_field() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), + new ReferenceExpression("field_value2", STRING)), + false, + ImmutableMap.of( + new ReferenceExpression("vendor", STRING), + new ReferenceExpression("maker", STRING))), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.map("vendor", "maker")))); + } + + @Test + public void visit_lookup_copy_multiple_fields() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), + new ReferenceExpression("field_value2", STRING)), + false, + ImmutableMap.of( + new ReferenceExpression("vendor", STRING), + new ReferenceExpression("maker", STRING), + new ReferenceExpression("serial_number", LONG), + new ReferenceExpression("serial", LONG))), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of( + AstDSL.map("vendor", "maker"), AstDSL.map("serial_number", "serial")))); + } + + @Test + public void visit_lookup_copy_field_when_origin_and_target_is_the_same() { + assertAnalyzeEqual( + LogicalPlanDSL.lookup( + LogicalPlanDSL.relation("schema", table), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), + new ReferenceExpression("field_value2", STRING)), + false, + ImmutableMap.of( + new ReferenceExpression("vendor", STRING), + new ReferenceExpression("vendor", STRING))), + AstDSL.lookup( + AstDSL.relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.map("vendor", "vendor")))); + } + + @Test + public void visit_lookup_should_report_error_when_copy_field_does_not_exist() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(AstDSL.map("no_such_field", "maker"))); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_copy_target_is_not_a_field() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(AstDSL.map(AstDSL.field("vendor"), AstDSL.intLiteral(8)))); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } + + @Test + public void visit_lookup_should_report_error_when_copy_source_is_not_a_field() { + Lookup lookup = + AstDSL.lookup( + relation("schema"), + LOOKUP_TABLE_DEVICE_NAMES, + ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), + ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(AstDSL.map(AstDSL.booleanLiteral(true), AstDSL.field("maker")))); + + assertThrows(SemanticCheckException.class, () -> analyze(lookup)); + } } From 1c2a8f6159a3e57977cc462caca097578c3e45bc Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 25 Jun 2024 10:51:41 +0200 Subject: [PATCH 03/20] Removed System.out leftover Signed-off-by: Hendrik Saly --- .../org/opensearch/sql/planner/physical/LookupOperator.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java index ad7a82559d..77e7149c7e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java @@ -77,8 +77,6 @@ public ExprValue next() { if (val != null) { matchMap.put(matchField.getKey().toString(), val); } else { - // System.out.println("No value in search results for " + matchField.getValue() + " - // field"); // No value in search results, so we just return the input return inputValue; } From 1de0bdf7908944df8a39d2630ec7115122bfc1ac Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 25 Jun 2024 10:53:31 +0200 Subject: [PATCH 04/20] Added implementation in QueryDataAnonymizer and fixed related tests Signed-off-by: Hendrik Saly --- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 34 +++++++++++++++---- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 13 ++++++- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 649d090c9d..888fe5fc4b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -36,6 +36,7 @@ import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.RareTopN; import org.opensearch.sql.ast.tree.Relation; @@ -213,19 +214,40 @@ public String visitDedupe(Dedupe node, String context) { child, fields, allowedDuplication, keepEmpty, consecutive); } - // Todo: do we need anonymization for lookups? - /* @Override public String visitLookup(Lookup node, String context) { String child = node.getChild().get(0).accept(this, context); - String fields = visitFieldList(node.getFields()); + String lookupIndexName = node.getIndexName(); + ImmutableMap.Builder matchMapBuilder = new ImmutableMap.Builder<>(); + for (Map matchMap : node.getMatchFieldList()) { + matchMapBuilder.put( + visitExpression(matchMap.getOrigin()), + ((Field) matchMap.getTarget()).getField().toString()); + } + String matches = + matchMapBuilder.build().entrySet().stream() + .map(entry -> StringUtils.format("%s as %s", entry.getKey(), entry.getValue())) + .collect(Collectors.joining(",")); + + ImmutableMap.Builder copyMapBuilder = new ImmutableMap.Builder<>(); + for (Map copyMap : node.getCopyFieldList()) { + copyMapBuilder.put( + visitExpression(copyMap.getOrigin()), + ((Field) copyMap.getTarget()).getField().toString()); + } + String copies = + copyMapBuilder.build().entrySet().stream() + .map(entry -> StringUtils.format("%s as %s", entry.getKey(), entry.getValue())) + .collect(Collectors.joining(",")); + List options = node.getOptions(); Boolean appendonly = (Boolean) options.get(0).getValue().getValue(); return StringUtils.format( - "%s | lookup %s %d appendonly=%b ...", - child, fields, appendonly); - }*/ + "%s | lookup %s %s appendonly=%b %s", + child, lookupIndexName, matches, appendonly, copies) + .trim(); + } @Override public String visitHead(Head node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index b7001adaab..88e6db1ab4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -91,7 +91,18 @@ public void testDedupCommand() { @Test public void testLookupCommand() { - assertEquals("source=t | lookup ", anonymize("source=t | lookup a field1, field2")); + assertEquals( + "source=t | lookup index field1 as field1,field2 as field2 appendonly=false", + anonymize("source=t | lookup index field1,field2")); + assertEquals( + "source=t | lookup index field1 as field1,field2 as field2 appendonly=true", + anonymize("source=t | lookup index field1,field2 appendonly=true")); + assertEquals( + "source=t | lookup index field1 as field12,field2 as field22 appendonly=false copyfield1 as" + + " copyfield1,copyfield2 as copyfield22", + anonymize( + "source=t | lookup index field1 as field12, field2 AS field22 copyfield1, copyfield2 as" + + " copyfield22")); } @Test From cda7a80bf4f5c64bd3b62961af3906518607ad89 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 25 Jun 2024 11:50:10 +0200 Subject: [PATCH 05/20] Add documentation Signed-off-by: Hendrik Saly --- docs/user/ppl/cmd/lookup.rst | 139 +++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 docs/user/ppl/cmd/lookup.rst diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst new file mode 100644 index 0000000000..8580161753 --- /dev/null +++ b/docs/user/ppl/cmd/lookup.rst @@ -0,0 +1,139 @@ +============= +lookup +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Use the ``lookup`` command to do a lookup from another index and add the fields and values from the lookup document to the search result. + +Syntax +============ +lookup [AS ] ["," [AS ]]... [appendonly=] [ [AS ]] ["," [AS ]]... + +* lookup-index: mandatory. the name of the lookup index. If more than one is provided, all of them must match. +* lookup-field: mandatory. the name of the lookup field. Must be existing in the lookup-index. It is used to match to a local field (in the current search) to get the lookup document. When there is no lookup document matching it is a no-op. If there is more than one an exception is thrown. +* local-lookup-field: optional. the name of a field in the current search to match against the lookup-field. **Default:** value of lookup-field. +* appendonly: optional. indicates if the values to copy over to the search result from the lookup document should overwrite existing values. If true no existing values are overwritten. **Default:** false. +* source-field: optional. the fields to copy over from the lookup document to the search result. If no such fields are given all fields are copied. **Default:** all fields +* local-source-field: optional. the final name of the field in the search result (if different from the field name in the lookup document). **Default:** value of source-field. + +Note: To check if there is a match between the lookup index and the current search result a term and a match query for the field value of lookup-field is performed. + +Example 1: Simple lookup +============================= + +The example shows a simple lookup to add the name of a person from a lookup index. + +PPL query:: + + os> source=accounts; + fetched rows / total rows = 2/2 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 13 | F | + +------------------+----------+ + + os> source=hr; + fetched rows / total rows = 2/2 + +------------------+----------+ + | account_number | name | + |------------------+----------| + | 1 | John | + | 13 | Alice | + +------------------+----------+ + + os> source=accounts | lookup hr account_number; + fetched rows / total rows = 2/2 + +------------------+----------+----------+ + | account_number | gender | name | + |------------------+----------|----------| + | 1 | M | John | + | 13 | F | Alice | + +------------------+----------+----------+ + + +Example 2: Lookup with different field names +============================================ + +The example show a lookup to add the name of a person from a lookup index with different field names. + +PPL query:: + + os> source=accounts; + fetched rows / total rows = 2/2 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 13 | F | + +------------------+----------+ + + os> source=hr; + fetched rows / total rows = 2/2 + +------------------+----------+ + | employee_number | name | + |------------------+----------| + | 1 | John | + | 13 | Alice | + +------------------+----------+ + + os> source=accounts | lookup hr employee_number AS account_number name AS given_name; + fetched rows / total rows = 2/2 + +------------------+----------+----------------+ + | account_number | gender | given_name | + |------------------+----------|----------------| + | 1 | M | John | + | 13 | F | Alice | + +------------------+----------+----------------+ + +Example 3: Full lookup example +============================== + +The example show a lookup to add the name of a person from a lookup index with different field names. + +PPL query:: + + os> source=accounts; + fetched rows / total rows = 3/3 + +------------------+----------+------------+ + | account_number | gender | department | + |------------------+----------+------------| + | 1 | M | finance | + | 13 | F | it | + | 20 | M | it | + +------------------+----------+------------+ + + os> source=hr; + fetched rows / total rows = 4/4 + +------------------+----------+------------+--------+ + | employee_number | name | dep | active | + |------------------+----------|------------|--------| + | 1 | John | finance | true | + | 13 | Alice | finance | false | + | 13 | Melinda | it | true | + | 19 | Jack | finance | true | + +------------------+----------+------------+--------+ + + os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=true name AS given_name, active AS is_active ; + fetched rows / total rows = 2/2 + +------------------+----------+----------------+------------+-----------+ + | account_number | gender | given_name | department | is_active | + |------------------+----------|----------------|------------|-----------| + | 1 | M | John | finance | true | + | 13 | F | Melinda | it | true | + | 20 | M | NULL | it | true | + +------------------+----------+----------------+------------+-----------+ + + +Limitation +========== +The ``lookup`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. From 488c9af32fd6c40be80bf64523d90d49be5d9714 Mon Sep 17 00:00:00 2001 From: Lukasz Soszynski Date: Tue, 25 Jun 2024 18:49:30 +0200 Subject: [PATCH 06/20] Added more lookup operator tests Signed-off-by: Lukasz Soszynski --- .../org/opensearch/sql/executor/Explain.java | 4 +- .../sql/planner/physical/LookupOperator.java | 42 +- .../sql/planner/physical/PhysicalPlanDSL.java | 13 +- .../opensearch/sql/executor/ExplainTest.java | 28 + .../sql/planner/DefaultImplementorTest.java | 98 ++- .../logical/LogicalPlanNodeVisitorTest.java | 13 +- .../planner/physical/LookupOperatorTest.java | 669 ++++++++++++++++++ .../physical/PhysicalPlanNodeVisitorTest.java | 25 +- .../opensearch/sql/utils/MatcherUtils.java | 75 ++ 9 files changed, 895 insertions(+), 72 deletions(-) create mode 100644 core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index d4b15a1cdc..bec6c5e47d 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -166,8 +166,8 @@ public ExplainResponseNode visitLookup(LookupOperator node, Object context) { explainNode -> explainNode.setDescription( ImmutableMap.of( - "copyfields", node.getCopyFieldMap(), - "matchfields", node.getMatchFieldMap(), + "copyfields", node.getCopyFieldMap().toString(), + "matchfields", node.getMatchFieldMap().toString(), "indexname", node.getIndexName(), "appendonly", node.getAppendOnly()))); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java index 77e7149c7e..7117d87f5d 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java @@ -15,6 +15,7 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; +import lombok.ToString; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -22,6 +23,7 @@ /** Lookup operator. Perform lookup on another OpenSearch index and enrich the results. */ @Getter +@ToString @EqualsAndHashCode(callSuper = false) public class LookupOperator extends PhysicalPlan { @Getter private final PhysicalPlan input; @@ -29,6 +31,8 @@ public class LookupOperator extends PhysicalPlan { @Getter private final Map matchFieldMap; @Getter private final Map copyFieldMap; @Getter private final Boolean appendOnly; + + @EqualsAndHashCode.Exclude private final BiFunction, Map> lookup; /** Lookup Constructor. */ @@ -96,37 +100,29 @@ public ExprValue next() { finalMap.put("_copy", copyMap.keySet()); } - Map source = lookup.apply(indexName, finalMap); + Map lookupResult = lookup.apply(indexName, finalMap); - if (source == null || source.isEmpty()) { + if (lookupResult == null || lookupResult.isEmpty()) { // no lookup found or lookup is empty, so we just return the original input value return inputValue; } - Map tupleValue = ExprValueUtils.getTupleValue(inputValue); - Map resultBuilder = new HashMap<>(); - resultBuilder.putAll(tupleValue); - - if (appendOnly) { - - for (Map.Entry sourceField : source.entrySet()) { - String u = copyMap.get(sourceField.getKey()); - resultBuilder.putIfAbsent( - u == null ? sourceField.getKey() : u.toString(), - ExprValueUtils.fromObjectValue(sourceField.getValue())); - } - } else { - // default - - for (Map.Entry sourceField : source.entrySet()) { - String u = copyMap.get(sourceField.getKey()); - resultBuilder.put( - u == null ? sourceField.getKey() : u.toString(), - ExprValueUtils.fromObjectValue(sourceField.getValue())); + Map tupleInputValue = ExprValueUtils.getTupleValue(inputValue); + Map resultTupleBuilder = new HashMap<>(); + resultTupleBuilder.putAll(tupleInputValue); + for (Map.Entry sourceOfAdditionalField : lookupResult.entrySet()) { + String lookedUpFieldName = sourceOfAdditionalField.getKey(); + Object lookedUpFieldValue = sourceOfAdditionalField.getValue(); + String finalFieldName = copyMap.getOrDefault(lookedUpFieldName, lookedUpFieldName); + ExprValue value = ExprValueUtils.fromObjectValue(lookedUpFieldValue); + if (appendOnly) { + resultTupleBuilder.putIfAbsent(finalFieldName, value); + } else { + resultTupleBuilder.put(finalFieldName, value); } } - return ExprTupleValue.fromExprValueMap(resultBuilder); + return ExprTupleValue.fromExprValueMap(resultTupleBuilder); } else { return inputValue; diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 2ab3f08106..9649b620ed 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import lombok.experimental.UtilityClass; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -83,16 +84,10 @@ public static LookupOperator lookup( String indexName, Map matchFieldMap, Boolean appendOnly, - Map copyFieldMap) { + Map copyFieldMap, + BiFunction, Map> lookupFunction) { return new LookupOperator( - input, - indexName, - matchFieldMap, - appendOnly, - copyFieldMap, - (a, b) -> { - throw new RuntimeException("not implemented by PhysicalPlanDSL"); - }); + input, indexName, matchFieldMap, appendOnly, copyFieldMap, lookupFunction); } public WindowOperator window( diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index 897347f22d..7c59a27f23 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -21,6 +21,7 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.eval; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.lookup; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.nested; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.project; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rareTopN; @@ -235,6 +236,33 @@ void can_explain_nested() { explain.apply(plan)); } + @Test + void can_explain_lookup() { + PhysicalPlan plan = + lookup( + tableScan, + "lookup_index_name", + Map.of(ref("lookup_index_field", STRING), ref("query_index_field", STRING)), + true, + Map.of(ref("lookup_index_field_name", STRING), ref("renamed_field", STRING)), + null); + assertEquals( + new ExplainResponse( + new ExplainResponseNode( + "LookupOperator", + Map.of( + "copyfields", + "{lookup_index_field_name=renamed_field}", + "matchfields", + "{lookup_index_field=query_index_field}", + "indexname", + "lookup_index_name", + "appendonly", + true), + singletonList(tableScan.explainNode()))), + explain.apply(plan)); + } + private static class FakeTableScan extends TableScanOperator { @Override public boolean hasNext() { diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 45d8f6c03c..423163dc0e 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -21,6 +21,7 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.eval; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.lookup; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.rareTopN; @@ -122,22 +123,27 @@ public void visit_should_return_default_physical_operator() { nested( limit( LogicalPlanDSL.dedupe( - rareTopN( - sort( - eval( - remove( - rename( - aggregation( - filter(values(emptyList()), filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), + lookup( + rareTopN( + sort( + eval( + remove( + rename( + aggregation( + filter(values(emptyList()), filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + "lookup_index_name", + Map.of(), + false, + Map.of()), dedupeField), limit, offset), @@ -152,24 +158,30 @@ public void visit_should_return_default_physical_operator() { PhysicalPlanDSL.nested( PhysicalPlanDSL.limit( PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.rareTopN( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - PhysicalPlanDSL.filter( - PhysicalPlanDSL.values(emptyList()), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), + PhysicalPlanDSL.lookup( + PhysicalPlanDSL.rareTopN( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + PhysicalPlanDSL.filter( + PhysicalPlanDSL.values(emptyList()), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + "lookup_index_name", + Map.of(), + false, + Map.of(), + null), dedupeField), limit, offset), @@ -278,4 +290,22 @@ public void visitPaginate_should_remove_it_from_tree() { new ProjectOperator(new ValuesOperator(List.of(List.of())), List.of(), List.of()); assertEquals(physicalPlanTree, logicalPlanTree.accept(implementor, null)); } + + @Test + public void visitLookup_should_build_LookupOperator() { + LogicalPlan values = values(List.of(DSL.literal("to be or not to be"))); + var logicalPlan = lookup(values, "lookup_index_name", Map.of(), false, Map.of()); + var expectedPhysicalPlan = + PhysicalPlanDSL.lookup( + new ValuesOperator(List.of(List.of(DSL.literal("to be or not to be")))), + "lookup_index_name", + Map.of(), + false, + Map.of(), + null); + + PhysicalPlan lookupOperator = logicalPlan.accept(implementor, null); + + assertEquals(expectedPhysicalPlan, lookupOperator); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index f212749f48..f5719fa5c8 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -125,6 +125,7 @@ public TableWriteOperator build(PhysicalPlan child) { LogicalPlan ad = new LogicalAD(relation, Map.of()); LogicalPlan ml = new LogicalML(relation, Map.of()); LogicalPlan paginate = new LogicalPaginate(42, List.of(relation)); + LogicalPlan lookup = new LogicalLookup(relation, "lookup_index", Map.of(), true, Map.of()); List> nestedArgs = List.of( @@ -163,7 +164,8 @@ public TableWriteOperator build(PhysicalPlan child) { paginate, nested, cursor, - closeCursor) + closeCursor, + lookup) .map(Arguments::of); } @@ -214,5 +216,14 @@ public Integer visitRareTopN(LogicalRareTopN plan, Object context) { .mapToInt(Integer::intValue) .sum(); } + + @Override + public Integer visitLookup(LogicalLookup plan, Object context) { + return 1 + + plan.getChild().stream() + .map(child -> child.accept(this, context)) + .mapToInt(Integer::intValue) + .sum(); + } } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java new file mode 100644 index 0000000000..2da1811e86 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java @@ -0,0 +1,669 @@ +package org.opensearch.sql.planner.physical; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.utils.MatcherUtils.containsNull; +import static org.opensearch.sql.utils.MatcherUtils.containsValue; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.stubbing.Answer; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.expression.ReferenceExpression; + +@ExtendWith(MockitoExtension.class) +public class LookupOperatorTest extends PhysicalPlanTestBase { + + public static final String LOOKUP_INDEX = "lookup_index"; + + public static final ImmutableList> LOOKUP_TABLE = + ImmutableList.of( + ImmutableMap.of( + "id", 1, "ip", "v4", "ip_v4", "112.111.162.4", "region", "USA", "class", "A"), + ImmutableMap.of( + "id", 2, "ip", "4", "ip_v4", "74.125.19.106", "region", "EU", "class", "A")); + + public static final ImmutableList> LOOKUP_TABLE_WITH_NULLS = + ImmutableList.of( + new HashMap<>( + ImmutableMap.of( + "id", 1, "ip", "v4", "ip_v4", "112.111.162.4", "region", "USA", "class", "A")) { + { + put("class", null); + } + }, + ImmutableMap.of( + "id", 2, "ip", "4", "ip_v4", "74.125.19.106", "region", "EU", "class", "A")); + + @Mock private BiFunction, Map> lookupFunction; + + @Test + public void lookup_empty_table() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip", Collections.emptyList())); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "112.111.162.4", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "74.125.19.106", + "action", + "POST", + "response", + 200, + "referer", + "www.google.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of("ip", "74.125.19.106", "action", "POST", "response", 500)))); + } + + @Test + public void lookup_append_only_true() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "112.111.162.4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("region", "USA"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + } + + @Test + public void lookup_append_only_false() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + false, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "v4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("region", "USA")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "4"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "4"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU")))); + } + + @Test + public void lookup_copy_one_field() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of( + new ReferenceExpression("class", STRING), + new ReferenceExpression("ip_address_class", STRING)), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "112.111.162.4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("ip_address_class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("ip_address_class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("ip_address_class", "A")))); + } + + @Test + public void lookup_copy_multiple_fields() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of( + new ReferenceExpression("class", STRING), + new ReferenceExpression("class", STRING), + new ReferenceExpression("id", INTEGER), + new ReferenceExpression("address_id", INTEGER)), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "112.111.162.4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("class", "A"), + containsValue("address_id", 1)))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("class", "A"), + containsValue("address_id", 2)))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("class", "A"), + containsValue("address_id", 2)))); + } + + @Test + public void lookup_empty_join_field() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + List queryResults = + new ImmutableList.Builder() + .addAll(inputs) + .add( + ExprValueUtils.tupleValue( + ImmutableMap.of("action", "GET", "response", 200, "referer", "www.amazon.com"))) + .build(); + PhysicalPlan plan = + new LookupOperator( + testScan(queryResults), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(6)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "112.111.162.4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("region", "USA"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of("action", "GET", "response", 200, "referer", "www.amazon.com")))); + } + + @Test + public void lookup_ignore_non_struct_input() { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE)); + ExprStringValue stringExpression = + new ExprStringValue("Expression of string type should be ignored"); + List queryResults = + new ImmutableList.Builder().addAll(inputs).add(stringExpression).build(); + PhysicalPlan plan = + new LookupOperator( + testScan(queryResults), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + true, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(6)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip", "112.111.162.4"), + containsValue("ip_v4", "112.111.162.4"), + containsValue("region", "USA"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip", "74.125.19.106"), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat(result, hasItem(stringExpression)); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void lookup_table_with_nulls(boolean appendOnly) { + when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) + .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE_WITH_NULLS)); + PhysicalPlan plan = + new LookupOperator( + new TestScan(), + LOOKUP_INDEX, + ImmutableMap.of( + new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), + appendOnly, + ImmutableMap.of(), + lookupFunction); + + List result = execute(plan); + + assertThat(result, hasSize(5)); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 200, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "ip", + "209.160.24.63", + "action", + "GET", + "response", + 404, + "referer", + "www.amazon.com")))); + assertThat( + result, + hasItem( + allOf( + containsValue("ip_v4", "112.111.162.4"), + containsValue("region", "USA"), + containsNull("class")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 200), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + assertThat( + result, + hasItem( + allOf( + containsValue("response", 500), + containsValue("ip_v4", "74.125.19.106"), + containsValue("region", "EU"), + containsValue("class", "A")))); + } + + private static @NotNull Answer> lookupTableQueryResults( + String lookupTableFieldName, List> lookupTableContent) { + return invocationOnMock -> { + String lookupTableName = invocationOnMock.getArgument(0); + if (!LOOKUP_INDEX.equals(lookupTableName)) { + return ImmutableMap.of(); + } + HashMap> parameters = invocationOnMock.getArgument(1); + String valueOfJoinFieldInLookupTable = + (String) parameters.get("_match").get(lookupTableFieldName); + if (Objects.isNull(valueOfJoinFieldInLookupTable)) { + return null; + } + return lookupTableContent.stream() + .filter(map -> valueOfJoinFieldInLookupTable.equals(map.get(lookupTableFieldName))) + .findAny() + .orElse(ImmutableMap.of()); + }; + } +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index c91ae8787c..de91f93b9a 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -17,6 +17,7 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.eval; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.lookup; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.project; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rareTopN; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.remove; @@ -64,7 +65,16 @@ public void print_physical_plan() { agg( rareTopN( filter( - limit(new TestScan(), 1, 1), + limit( + lookup( + new TestScan(), + "lookup_index", + Map.of(), + true, + Map.of(), + null), + 1, + 1), DSL.equal(DSL.ref("response", INTEGER), DSL.literal(10))), CommandType.TOP, ImmutableList.of(), @@ -84,7 +94,8 @@ public void print_physical_plan() { + "\t\t\tAggregation->\n" + "\t\t\t\tRareTopN->\n" + "\t\t\t\t\tFilter->\n" - + "\t\t\t\t\t\tLimit->", + + "\t\t\t\t\t\tLimit->\n" + + "\t\t\t\t\t\t\tLookup->", printer.print(plan)); } @@ -131,6 +142,8 @@ public static Stream getPhysicalPlanForTest() { PhysicalPlan cursorClose = new CursorCloseOperator(plan); + PhysicalPlan lookup = lookup(plan, "lookup_index", Map.of(), false, Map.of(), null); + return Stream.of( Arguments.of(filter, "filter"), Arguments.of(aggregation, "aggregation"), @@ -145,7 +158,8 @@ public static Stream getPhysicalPlanForTest() { Arguments.of(rareTopN, "rareTopN"), Arguments.of(limit, "limit"), Arguments.of(nested, "nested"), - Arguments.of(cursorClose, "cursorClose")); + Arguments.of(cursorClose, "cursorClose"), + Arguments.of(lookup, "Lookup")); } @ParameterizedTest(name = "{1}") @@ -219,6 +233,11 @@ public String visitLimit(LimitOperator node, Integer tabs) { return name(node, "Limit->", tabs); } + @Override + public String visitLookup(LookupOperator node, Integer tabs) { + return name(node, "Lookup->", tabs); + } + private String name(PhysicalPlan node, String current, int tabs) { String child = node.getChild().get(0).accept(this, tabs + 1); StringBuilder sb = new StringBuilder(); diff --git a/core/src/test/java/org/opensearch/sql/utils/MatcherUtils.java b/core/src/test/java/org/opensearch/sql/utils/MatcherUtils.java index 206f05a38a..6b0d5e2ac7 100644 --- a/core/src/test/java/org/opensearch/sql/utils/MatcherUtils.java +++ b/core/src/test/java/org/opensearch/sql/utils/MatcherUtils.java @@ -5,7 +5,9 @@ package org.opensearch.sql.utils; +import java.util.Objects; import org.hamcrest.Description; +import org.hamcrest.TypeSafeDiagnosingMatcher; import org.hamcrest.TypeSafeMatcher; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprCoreType; @@ -41,4 +43,77 @@ protected boolean matchesSafely(ExprValue value) { } }; } + + public static TypeSafeDiagnosingMatcher containsValue(String key, Object value) { + Objects.requireNonNull(key, "Key is required"); + Objects.requireNonNull(value, "Value is required"); + return new TypeSafeDiagnosingMatcher<>() { + + @Override + protected boolean matchesSafely(ExprValue item, Description mismatchDescription) { + ExprValue expressionForKey = item.keyValue(key); + if (Objects.isNull(expressionForKey)) { + mismatchDescription + .appendValue(item) + .appendText(" does not contain key ") + .appendValue(key); + return false; + } + Object givenValue = expressionForKey.value(); + if (value.equals(givenValue)) { + return true; + } + mismatchDescription + .appendText(" value for key ") + .appendValue(key) + .appendText(" was ") + .appendValue(givenValue); + return false; + } + + @Override + public void describeTo(Description description) { + description + .appendText("ExprValue should contain key ") + .appendValue(key) + .appendText(" with string value ") + .appendValue(value); + } + }; + } + + public static TypeSafeDiagnosingMatcher containsNull(String key) { + Objects.requireNonNull(key, "Key is required"); + return new TypeSafeDiagnosingMatcher<>() { + + @Override + protected boolean matchesSafely(ExprValue item, Description mismatchDescription) { + ExprValue expressionForKey = item.keyValue(key); + if (Objects.isNull(expressionForKey)) { + mismatchDescription + .appendValue(item) + .appendText(" does not contain key ") + .appendValue(key); + return false; + } + if (expressionForKey.isNull()) { + return true; + } + mismatchDescription + .appendText(" value for key ") + .appendValue(key) + .appendText(" was ") + .appendValue(expressionForKey.value()); + return false; + } + + @Override + public void describeTo(Description description) { + description + .appendText("ExprValue should contain key ") + .appendValue(key) + .appendText(" with null value "); + } + }; + } } From c2edb51210a2a805ecc8a61a1b951c9c67059567 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Fri, 28 Jun 2024 15:21:02 +0200 Subject: [PATCH 07/20] Fix typo Signed-off-by: Hendrik Saly Fix typo Signed-off-by: Hendrik Saly --- .../org/opensearch/sql/planner/logical/LogicalLookup.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java index 6269b56e37..3079b69941 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java @@ -12,7 +12,7 @@ import lombok.ToString; import org.opensearch.sql.expression.ReferenceExpression; -/** Logical Dedupe Plan. */ +/** Logical Lookup Plan. */ @Getter @ToString @EqualsAndHashCode(callSuper = true) @@ -23,7 +23,7 @@ public class LogicalLookup extends LogicalPlan { private final Map copyFieldMap; private final Boolean appendOnly; - /** Constructor of LogicalDedupe. */ + /** Constructor of LogicalLookup. */ public LogicalLookup( LogicalPlan child, String indexName, From 9da10e384fc93e1bf01d0dab1936d142c5276865 Mon Sep 17 00:00:00 2001 From: lukasz-soszynski-eliatra <110241464+lukasz-soszynski-eliatra@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:45:56 +0200 Subject: [PATCH 08/20] One unit test and integration tests for the lookup operator. Signed-off-by: Lukasz Soszynski --- .../sql/planner/DefaultImplementor.java | 2 +- .../sql/planner/logical/LogicalPlanDSL.java | 2 +- .../sql/planner/DefaultImplementorTest.java | 17 ++ .../sql/legacy/SQLIntegTestCase.java | 12 +- .../opensearch/sql/legacy/TestsConstants.java | 2 + .../opensearch/sql/ppl/LookupCommandIT.java | 187 +++++++++++++++--- .../iot_readings_index_mappings.json | 24 +++ .../iot_sensors_index_mappings.json | 36 ++++ .../src/test/resources/iot_readings.json | 13 ++ .../src/test/resources/iot_sensors.json | 6 + 10 files changed, 276 insertions(+), 25 deletions(-) create mode 100644 integ-test/src/test/resources/indexDefinitions/iot_readings_index_mappings.json create mode 100644 integ-test/src/test/resources/indexDefinitions/iot_sensors_index_mappings.json create mode 100644 integ-test/src/test/resources/iot_readings.json create mode 100644 integ-test/src/test/resources/iot_sensors.json diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index 96a3be1f69..6d19bb312f 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -85,7 +85,7 @@ public PhysicalPlan visitLookup(LogicalLookup node, C context) { node.getAppendOnly(), node.getCopyFieldMap(), (a, b) -> { - throw new RuntimeException("not implemented by DefaultImplementor"); + throw new UnsupportedOperationException("Lookup not implemented by DefaultImplementor"); }); } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 9fb242a3bc..f2dfdfe661 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -143,7 +143,7 @@ public static LogicalPlan lookup( LogicalPlan input, String indexName, Map matchFieldMap, - boolean appendOnly, + Boolean appendOnly, Map copyFields) { return new LogicalLookup(input, indexName, matchFieldMap, appendOnly, copyFields); } diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 423163dc0e..930eb63a03 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -48,6 +48,7 @@ import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.model.ExprBooleanValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.expression.DSL; @@ -59,6 +60,7 @@ import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.expression.window.ranking.RowNumberFunction; import org.opensearch.sql.planner.logical.LogicalCloseCursor; +import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; @@ -308,4 +310,19 @@ public void visitLookup_should_build_LookupOperator() { assertEquals(expectedPhysicalPlan, lookupOperator); } + + @Test + public void visitLookup_should_throw_unsupportedOperationException() { + LogicalLookup input = mock(LogicalLookup.class); + LogicalPlan dataSource = mock(LogicalPlan.class); + PhysicalPlan physicalSource = mock(PhysicalPlan.class); + when(dataSource.accept(implementor, null)).thenReturn(physicalSource); + when(input.getChild()).thenReturn(List.of(dataSource)); + PhysicalPlan lookupOperator = implementor.visitLookup(input, null); + when(physicalSource.next()).thenReturn(ExprValueUtils.tupleValue(Map.of("field", "value"))); + + var ex = assertThrows(UnsupportedOperationException.class, () -> lookupOperator.next()); + + assertEquals("Lookup not implemented by DefaultImplementor", ex.getMessage()); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 63c44bf831..8d46a3dfd3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -724,7 +724,17 @@ public enum Index { TestsConstants.TEST_INDEX_NESTED_WITH_NULLS, "multi_nested", getNestedTypeIndexMapping(), - "src/test/resources/nested_with_nulls.json"); + "src/test/resources/nested_with_nulls.json"), + IOT_READINGS( + TestsConstants.TEST_INDEX_IOT_READINGS, + "iot_readings", + getMappingFile("iot_readings_index_mapping.json"), + "src/test/resources/iot_readings.json"), + IOT_SENSORS( + TestsConstants.TEST_INDEX_IOT_SENSORS, + "iot_sensors", + getMappingFile("iot_sensors_index_mapping.json"), + "src/test/resources/iot_sensors.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 29bc9813fa..6243c78fe4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -57,6 +57,8 @@ public class TestsConstants { public static final String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; + public static final String TEST_INDEX_IOT_READINGS = TEST_INDEX + "_iot_readings"; + public static final String TEST_INDEX_IOT_SENSORS = TEST_INDEX + "_iot_sensors"; public static final String DATASOURCES = ".ql-datasources"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java index cb05285ea3..ade37b1241 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java @@ -5,8 +5,8 @@ package org.opensearch.sql.ppl; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_IOT_READINGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_IOT_SENSORS; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -18,47 +18,190 @@ public class LookupCommandIT extends PPLIntegTestCase { @Override public void init() throws IOException { - loadIndex(Index.BANK); - loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.IOT_READINGS); + loadIndex(Index.IOT_SENSORS); } @Test public void testLookup() throws IOException { - JSONObject result = executeQuery(String.format("source=%s | lookup %s male", TEST_INDEX_BANK)); - verifyDataRows(result, rows(true), rows(false)); + JSONObject result = + executeQuery( + String.format( + "source=%s | lookup %s did as device-id | sort @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); + verifyDataRows( + result, + rows( + 28.1, + "2015-01-20 15:31:32.406431", + 255, + "temperature-basement", + "meter", + 255, + "VendorOne"), + rows( + 27.8, + "2016-01-20 15:31:33.509334", + 256, + "temperature-living-room", + "temperature meter", + 256, + "VendorTwo"), + rows( + 27.4, + "2017-01-20 15:31:35.732436", + 257, + "temperature-bedroom", + "camcorder", + 257, + "VendorThree"), + rows( + 28.5, + "2018-01-20 15:32:32.406431", + 255, + "temperature-basement", + "meter", + 255, + "VendorOne"), + rows( + 27.9, + "2019-01-20 15:32:33.509334", + 256, + "temperature-living-room", + "temperature meter", + 256, + "VendorTwo"), + rows( + 27.4, + "2020-01-20 15:32:35.732436", + 257, + "temperature-bedroom", + "camcorder", + 257, + "VendorThree")); } @Test - public void testConsecutiveDedup() throws IOException { + public void testLookupSelectedAttribute() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | dedup male consecutive=true | fields male", TEST_INDEX_BANK)); - verifyDataRows(result, rows(true), rows(false), rows(true), rows(false)); + "source=%s | lookup %s did as device-id type, vendor | sort @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); + verifyDataRows( + result, + rows(28.1, "2015-01-20 15:31:32.406431", 255, "meter", "VendorOne"), + rows(27.8, "2016-01-20 15:31:33.509334", 256, "temperature meter", "VendorTwo"), + rows(27.4, "2017-01-20 15:31:35.732436", 257, "camcorder", "VendorThree"), + rows(28.5, "2018-01-20 15:32:32.406431", 255, "meter", "VendorOne"), + rows(27.9, "2019-01-20 15:32:33.509334", 256, "temperature meter", "VendorTwo"), + rows(27.4, "2020-01-20 15:32:35.732436", 257, "camcorder", "VendorThree")); } @Test - public void testAllowMoreDuplicates() throws IOException { + public void testLookupRenameSelectedAttributes() throws IOException { JSONObject result = - executeQuery(String.format("source=%s | dedup 2 male | fields male", TEST_INDEX_BANK)); - verifyDataRows(result, rows(true), rows(true), rows(false), rows(false)); + executeQuery( + String.format( + "source=%s | lookup %s did as device-id did as dev_id, type as kind, vendor | sort" + + " @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); + verifyDataRows( + result, + rows(28.1, "2015-01-20 15:31:32.406431", 255, 255, "meter", "VendorOne"), + rows(27.8, "2016-01-20 15:31:33.509334", 256, 256, "temperature meter", "VendorTwo"), + rows(27.4, "2017-01-20 15:31:35.732436", 257, 257, "camcorder", "VendorThree"), + rows(28.5, "2018-01-20 15:32:32.406431", 255, 255, "meter", "VendorOne"), + rows(27.9, "2019-01-20 15:32:33.509334", 256, 256, "temperature meter", "VendorTwo"), + rows(27.4, "2020-01-20 15:32:35.732436", 257, 257, "camcorder", "VendorThree")); + } + + @Test + public void testLookupSelectedMultipleAttributes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | lookup %s did as device-id type | sort @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); + verifyDataRows( + result, + rows(28.1, "2015-01-20 15:31:32.406431", 255, "meter"), + rows(27.8, "2016-01-20 15:31:33.509334", 256, "temperature meter"), + rows(27.4, "2017-01-20 15:31:35.732436", 257, "camcorder"), + rows(28.5, "2018-01-20 15:32:32.406431", 255, "meter"), + rows(27.9, "2019-01-20 15:32:33.509334", 256, "temperature meter"), + rows(27.4, "2020-01-20 15:32:35.732436", 257, "camcorder")); + } + + @Test + public void testLookupShouldAppendOnlyShouldBeFalseByDefault() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | rename temperature as vendor | lookup %s did as device-id | sort" + + " @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); + verifyDataRows( + result, + rows("2015-01-20 15:31:32.406431", 255, "VendorOne", "temperature-basement", "meter", 255), + rows( + "2016-01-20 15:31:33.509334", + 256, + "VendorTwo", + "temperature-living-room", + "temperature meter", + 256), + rows( + "2017-01-20 15:31:35.732436", + 257, + "VendorThree", + "temperature-bedroom", + "camcorder", + 257), + rows("2018-01-20 15:32:32.406431", 255, "VendorOne", "temperature-basement", "meter", 255), + rows( + "2019-01-20 15:32:33.509334", + 256, + "VendorTwo", + "temperature-living-room", + "temperature meter", + 256), + rows( + "2020-01-20 15:32:35.732436", + 257, + "VendorThree", + "temperature-bedroom", + "camcorder", + 257)); } @Test - public void testKeepEmptyDedup() throws IOException { + public void testLookupWithAppendOnlyFalse() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | dedup balance keepempty=true | fields firstname, balance", - TEST_INDEX_BANK_WITH_NULL_VALUES)); + "source=%s | rename temperature as vendor | lookup %s did as device-id appendonly =" + + " true | sort @timestamp", + TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows("Amber JOHnny", 39225), - rows("Hattie", null), - rows("Nanette", 32838), - rows("Dale", 4180), - rows("Elinor", null), - rows("Virginia", null), - rows("Dillard", 48086)); + rows("2015-01-20 15:31:32.406431", 255, 28.1, "temperature-basement", "meter", 255), + rows( + "2016-01-20 15:31:33.509334", + 256, + 27.8, + "temperature-living-room", + "temperature meter", + 256), + rows("2017-01-20 15:31:35.732436", 257, 27.4, "temperature-bedroom", "camcorder", 257), + rows("2018-01-20 15:32:32.406431", 255, 28.5, "temperature-basement", "meter", 255), + rows( + "2019-01-20 15:32:33.509334", + 256, + 27.9, + "temperature-living-room", + "temperature meter", + 256), + rows("2020-01-20 15:32:35.732436", 257, 27.4, "temperature-bedroom", "camcorder", 257)); } } diff --git a/integ-test/src/test/resources/indexDefinitions/iot_readings_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/iot_readings_index_mappings.json new file mode 100644 index 0000000000..a97c6c170e --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/iot_readings_index_mappings.json @@ -0,0 +1,24 @@ +{ + "mappings": { + "properties": { + "device-id": { + "type": "long" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "temperature": { + "type": "float" + }, + "timestamp": { + "type": "date" + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/indexDefinitions/iot_sensors_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/iot_sensors_index_mappings.json new file mode 100644 index 0000000000..e9682a390e --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/iot_sensors_index_mappings.json @@ -0,0 +1,36 @@ +{ + "mappings": { + "properties": { + "did": { + "type": "long" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "vendor": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/iot_readings.json b/integ-test/src/test/resources/iot_readings.json new file mode 100644 index 0000000000..be7e542e87 --- /dev/null +++ b/integ-test/src/test/resources/iot_readings.json @@ -0,0 +1,13 @@ +{ "index" : { "_id" : "1" } } +{ "device-id":255, "temperature":28.1, "@timestamp":"2015-01-20T15:31:32.406431+00:00" } +{ "index" : { "_id" : "2" } } +{ "device-id":256, "temperature":27.8, "@timestamp":"2016-01-20T15:31:33.509334+00:00" } +{ "index" : { "_id" : "3" } } +{ "device-id":257, "temperature":27.4, "@timestamp":"2017-01-20T15:31:35.732436+00:00" } +{ "index" : { "_id" : "4" } } +{ "device-id":255, "temperature":28.5, "@timestamp":"2018-01-20T15:32:32.406431+00:00" } +{ "index" : { "_id" : "5" } } +{ "device-id":256, "temperature":27.9, "@timestamp":"2019-01-20T15:32:33.509334+00:00" } +{ "index" : { "_id" : "6" } } +{ "device-id":257, "temperature":27.4, "@timestamp":"2020-01-20T15:32:35.732436+00:00" } +{ "index" : { "_id" : "7" } } diff --git a/integ-test/src/test/resources/iot_sensors.json b/integ-test/src/test/resources/iot_sensors.json new file mode 100644 index 0000000000..a36dd0aaa4 --- /dev/null +++ b/integ-test/src/test/resources/iot_sensors.json @@ -0,0 +1,6 @@ +{ "index" : { "_id" : "1" } } +{ "did" : 255, "name":"temperature-basement", "vendor":"VendorOne", "type":"meter"} +{ "index" : { "_id" : "2" } } +{ "did" : 256, "name":"temperature-living-room", "vendor":"VendorTwo", "type":"temperature meter" } +{ "index" : { "_id" : "3" } } +{ "did" : 257, "name":"temperature-bedroom", "vendor":"VendorThree", "type":"camcorder"} From e9d8389d52f5a91a65db6bc5082c22c51cd9d590 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 9 Jul 2024 11:21:15 +0200 Subject: [PATCH 09/20] Added "appendonly" example to docs Signed-off-by: Hendrik Saly --- docs/user/ppl/cmd/lookup.rst | 66 ++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst index 8580161753..03cb089b5c 100644 --- a/docs/user/ppl/cmd/lookup.rst +++ b/docs/user/ppl/cmd/lookup.rst @@ -103,35 +103,49 @@ The example show a lookup to add the name of a person from a lookup index with d PPL query:: os> source=accounts; - fetched rows / total rows = 3/3 - +------------------+----------+------------+ - | account_number | gender | department | - |------------------+----------+------------| - | 1 | M | finance | - | 13 | F | it | - | 20 | M | it | - +------------------+----------+------------+ + fetched rows / total rows = 4/4 + +------------------+----------+------------+------------------+ + | account_number | gender | department | name | + |------------------+----------+------------+------------------| + | 1 | M | finance | John Miller | + | 13 | F | it | Melinda Williams | + | 20 | M | it | NULL | + | 21 | F | finance | Mandy Smith | + +------------------+----------+------------+------------------+ os> source=hr; + fetched rows / total rows = 5/5 + +------------------+--------------+------------+--------+ + | employee_number | name | dep | active | + |------------------+--------------|------------|--------| + | 1 | John n/a | finance | true | + | 13 | Alice n/a | finance | false | + | 13 | Melinda n/a | it | true | + | 19 | Jack n/a | finance | true | + | 21 | NULL | finance | false | + +------------------+--------------+------------+--------+ + + os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=true; fetched rows / total rows = 4/4 - +------------------+----------+------------+--------+ - | employee_number | name | dep | active | - |------------------+----------|------------|--------| - | 1 | John | finance | true | - | 13 | Alice | finance | false | - | 13 | Melinda | it | true | - | 19 | Jack | finance | true | - +------------------+----------+------------+--------+ - - os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=true name AS given_name, active AS is_active ; - fetched rows / total rows = 2/2 - +------------------+----------+----------------+------------+-----------+ - | account_number | gender | given_name | department | is_active | - |------------------+----------|----------------|------------|-----------| - | 1 | M | John | finance | true | - | 13 | F | Melinda | it | true | - | 20 | M | NULL | it | true | - +------------------+----------+----------------+------------+-----------+ + +------------------+----------+------------------+------------+-----------+---------+-----------------+ + | account_number | gender | name | department | active | dep | employee_number | + |------------------+----------|------------------|------------|-----------|---------|-----------------| + | 1 | M | John Miller | finance | true | finance | 1 | + | 13 | F | Melinda Williams | it | true | it | 13 | + | 20 | M | NULL | it | NULL | NULL | NULL | + | 21 | F | Mandy Smith | it | NULL | it | 21 | + +------------------+----------+------------------+------------+-----------+---------+-----------------+ + + os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=false; + fetched rows / total rows = 4/4 + +------------------+----------+------------------+------------+-----------+---------+-----------------+ + | account_number | gender | name | department | active | dep | employee_number | + |------------------+----------|------------------|------------|-----------|---------|-----------------| + | 1 | M | John n/a | finance | true | finance | 1 | + | 13 | F | Melinda /na | it | true | it | 13 | + | 20 | M | NULL | it | NULL | NULL | NULL | + | 21 | F | Mandy Smith | it | NULL | it | 21 | + +------------------+----------+------------------+------------+-----------+---------+-----------------+ Limitation From 0c4e48860b22bb942be589316b8e5a3457575b2b Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 16 Jul 2024 13:15:11 +0200 Subject: [PATCH 10/20] Move implementation from ExecutionProtector to OpenSearchIndex class Signed-off-by: Hendrik Saly --- .../sql/planner/DefaultImplementor.java | 15 ---- .../OpenSearchExecutionProtector.java | 72 +----------------- .../opensearch/storage/OpenSearchIndex.java | 74 +++++++++++++++++++ .../plugin/config/OpenSearchPluginModule.java | 4 +- 4 files changed, 77 insertions(+), 88 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index 6d19bb312f..b53d17b38f 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -13,7 +13,6 @@ import org.opensearch.sql.planner.logical.LogicalFetchCursor; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; -import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; @@ -32,7 +31,6 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; -import org.opensearch.sql.planner.physical.LookupOperator; import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.ProjectOperator; @@ -76,19 +74,6 @@ public PhysicalPlan visitDedupe(LogicalDedupe node, C context) { node.getConsecutive()); } - @Override - public PhysicalPlan visitLookup(LogicalLookup node, C context) { - return new LookupOperator( - visitChild(node, context), - node.getIndexName(), - node.getMatchFieldMap(), - node.getAppendOnly(), - node.getCopyFieldMap(), - (a, b) -> { - throw new UnsupportedOperationException("Lookup not implemented by DefaultImplementor"); - }); - } - @Override public PhysicalPlan visitProject(LogicalProject node, C context) { return new ProjectOperator( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index cb86978168..e7fb9df5cd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -5,20 +5,8 @@ package org.opensearch.sql.opensearch.executor.protector; -import java.util.Map; -import java.util.Set; -import java.util.function.BiFunction; -import javax.annotation.Nullable; -import lombok.AllArgsConstructor; import lombok.RequiredArgsConstructor; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.MatchQueryBuilder; -import org.opensearch.index.query.TermQueryBuilder; -import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.monitor.ResourceMonitor; -import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.planner.physical.ADOperator; import org.opensearch.sql.opensearch.planner.physical.MLCommonsOperator; import org.opensearch.sql.opensearch.planner.physical.MLOperator; @@ -42,16 +30,11 @@ /** OpenSearch Execution Protector. */ @RequiredArgsConstructor -@AllArgsConstructor public class OpenSearchExecutionProtector extends ExecutionProtector { /** OpenSearch resource monitor. */ private final ResourceMonitor resourceMonitor; - @Nullable - /** OpenSearch client. Maybe null * */ - private OpenSearchClient openSearchClient; - public PhysicalPlan protect(PhysicalPlan physicalPlan) { return physicalPlan.accept(this, null); } @@ -142,60 +125,7 @@ public PhysicalPlan visitLookup(LookupOperator node, Object context) { node.getMatchFieldMap(), node.getAppendOnly(), node.getCopyFieldMap(), - lookup()); - } - - private BiFunction, Map> lookup() { - - if (openSearchClient == null) { - throw new RuntimeException( - "Can not perform lookup because openSearchClient was null. This is likely a bug."); - } - - return (indexName, inputMap) -> { - Map matchMap = (Map) inputMap.get("_match"); - Set copySet = (Set) inputMap.get("_copy"); - - BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - - for (Map.Entry f : matchMap.entrySet()) { - BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); - - // Todo: Search with term and a match query? Or terms only? - orQueryBuilder.should(new TermQueryBuilder(f.getKey(), f.getValue().toString())); - orQueryBuilder.should(new MatchQueryBuilder(f.getKey(), f.getValue().toString())); - orQueryBuilder.minimumShouldMatch(1); - - // filter is the same as "must" but ignores scoring - boolQueryBuilder.filter(orQueryBuilder); - } - - SearchResponse result = - openSearchClient - .getNodeClient() - .search( - new SearchRequest(indexName) - .source( - SearchSourceBuilder.searchSource() - .fetchSource( - copySet == null ? null : copySet.toArray(new String[0]), null) - .query(boolQueryBuilder) - .size(2))) - .actionGet(); - - int hits = result.getHits().getHits().length; - - if (hits == 0) { - // null indicates no hits for the lookup found - return null; - } - - if (hits != 1) { - throw new RuntimeException("too many hits for " + indexName + " (" + hits + ")"); - } - - return result.getHits().getHits()[0].getSourceAsMap(); - }; + node.getLookup()); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index c6afdb8511..f250bbea15 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -9,9 +9,17 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import lombok.RequiredArgsConstructor; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -28,9 +36,11 @@ import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScanBuilder; import org.opensearch.sql.planner.DefaultImplementor; import org.opensearch.sql.planner.logical.LogicalAD; +import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalML; import org.opensearch.sql.planner.logical.LogicalMLCommons; import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.physical.LookupOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.Table; import org.opensearch.sql.storage.read.TableScanBuilder; @@ -204,5 +214,69 @@ public PhysicalPlan visitAD(LogicalAD node, OpenSearchIndexScan context) { public PhysicalPlan visitML(LogicalML node, OpenSearchIndexScan context) { return new MLOperator(visitChild(node, context), node.getArguments(), client.getNodeClient()); } + + @Override + public PhysicalPlan visitLookup(LogicalLookup node, OpenSearchIndexScan context) { + return new LookupOperator( + visitChild(node, context), + node.getIndexName(), + node.getMatchFieldMap(), + node.getAppendOnly(), + node.getCopyFieldMap(), + lookup()); + } + + BiFunction, Map> lookup() { + + if (client.getNodeClient() == null) { + throw new RuntimeException( + "Can not perform lookup because openSearchClient was null. This is likely a bug."); + } + + return (indexName, inputMap) -> { + Map matchMap = (Map) inputMap.get("_match"); + Set copySet = (Set) inputMap.get("_copy"); + + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + + for (Map.Entry f : matchMap.entrySet()) { + BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); + + // Todo: Search with term and a match query? Or terms only? + orQueryBuilder.should(new TermQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.should(new MatchQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.minimumShouldMatch(1); + + // filter is the same as "must" but ignores scoring + boolQueryBuilder.filter(orQueryBuilder); + } + + SearchResponse result = + client + .getNodeClient() + .search( + new SearchRequest(indexName) + .source( + SearchSourceBuilder.searchSource() + .fetchSource( + copySet == null ? null : copySet.toArray(new String[0]), null) + .query(boolQueryBuilder) + .size(2))) + .actionGet(); + + int hits = result.getHits().getHits().length; + + if (hits == 0) { + // null indicates no hits for the lookup found + return null; + } + + if (hits != 1) { + throw new RuntimeException("too many hits for " + indexName + " (" + hits + ")"); + } + + return result.getHits().getHits()[0].getSourceAsMap(); + }; + } } } diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 8789c79c30..33a785c498 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -69,8 +69,8 @@ public ResourceMonitor resourceMonitor(Settings settings) { } @Provides - public ExecutionProtector protector(ResourceMonitor resourceMonitor, OpenSearchClient client) { - return new OpenSearchExecutionProtector(resourceMonitor, client); + public ExecutionProtector protector(ResourceMonitor resourceMonitor) { + return new OpenSearchExecutionProtector(resourceMonitor); } @Provides From ad96d222655878118151dea5486ab6563c46002d Mon Sep 17 00:00:00 2001 From: Lukasz Soszynski Date: Fri, 19 Jul 2024 13:34:49 +0200 Subject: [PATCH 11/20] Corrections related to LookupCommandIT and tests related to OpenSearchIndex Signed-off-by: Lukasz Soszynski --- .../sql/planner/physical/LookupOperator.java | 3 +- .../sql/planner/DefaultImplementorTest.java | 115 +++----- .../opensearch/sql/ppl/LookupCommandIT.java | 148 +++++++---- .../opensearch/storage/OpenSearchIndex.java | 113 ++++---- .../OpenSearchExecutionProtectorTest.java | 103 ++++---- .../storage/OpenSearchIndexTest.java | 101 +++++-- .../storage/SingleRowQueryTest.java | 246 ++++++++++++++++++ 7 files changed, 590 insertions(+), 239 deletions(-) create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java index 7117d87f5d..c4b1ccd824 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java @@ -9,6 +9,7 @@ import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.BiFunction; @@ -108,7 +109,7 @@ public ExprValue next() { } Map tupleInputValue = ExprValueUtils.getTupleValue(inputValue); - Map resultTupleBuilder = new HashMap<>(); + Map resultTupleBuilder = new LinkedHashMap<>(); resultTupleBuilder.putAll(tupleInputValue); for (Map.Entry sourceOfAdditionalField : lookupResult.entrySet()) { String lookedUpFieldName = sourceOfAdditionalField.getKey(); diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 930eb63a03..45d8f6c03c 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -21,7 +21,6 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.eval; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; -import static org.opensearch.sql.planner.logical.LogicalPlanDSL.lookup; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.rareTopN; @@ -48,7 +47,6 @@ import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.model.ExprBooleanValue; -import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.expression.DSL; @@ -60,7 +58,6 @@ import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.expression.window.ranking.RowNumberFunction; import org.opensearch.sql.planner.logical.LogicalCloseCursor; -import org.opensearch.sql.planner.logical.LogicalLookup; import org.opensearch.sql.planner.logical.LogicalPaginate; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; @@ -125,27 +122,22 @@ public void visit_should_return_default_physical_operator() { nested( limit( LogicalPlanDSL.dedupe( - lookup( - rareTopN( - sort( - eval( - remove( - rename( - aggregation( - filter(values(emptyList()), filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - "lookup_index_name", - Map.of(), - false, - Map.of()), + rareTopN( + sort( + eval( + remove( + rename( + aggregation( + filter(values(emptyList()), filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), dedupeField), limit, offset), @@ -160,30 +152,24 @@ public void visit_should_return_default_physical_operator() { PhysicalPlanDSL.nested( PhysicalPlanDSL.limit( PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.lookup( - PhysicalPlanDSL.rareTopN( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - PhysicalPlanDSL.filter( - PhysicalPlanDSL.values(emptyList()), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - "lookup_index_name", - Map.of(), - false, - Map.of(), - null), + PhysicalPlanDSL.rareTopN( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + PhysicalPlanDSL.filter( + PhysicalPlanDSL.values(emptyList()), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), dedupeField), limit, offset), @@ -292,37 +278,4 @@ public void visitPaginate_should_remove_it_from_tree() { new ProjectOperator(new ValuesOperator(List.of(List.of())), List.of(), List.of()); assertEquals(physicalPlanTree, logicalPlanTree.accept(implementor, null)); } - - @Test - public void visitLookup_should_build_LookupOperator() { - LogicalPlan values = values(List.of(DSL.literal("to be or not to be"))); - var logicalPlan = lookup(values, "lookup_index_name", Map.of(), false, Map.of()); - var expectedPhysicalPlan = - PhysicalPlanDSL.lookup( - new ValuesOperator(List.of(List.of(DSL.literal("to be or not to be")))), - "lookup_index_name", - Map.of(), - false, - Map.of(), - null); - - PhysicalPlan lookupOperator = logicalPlan.accept(implementor, null); - - assertEquals(expectedPhysicalPlan, lookupOperator); - } - - @Test - public void visitLookup_should_throw_unsupportedOperationException() { - LogicalLookup input = mock(LogicalLookup.class); - LogicalPlan dataSource = mock(LogicalPlan.class); - PhysicalPlan physicalSource = mock(PhysicalPlan.class); - when(dataSource.accept(implementor, null)).thenReturn(physicalSource); - when(input.getChild()).thenReturn(List.of(dataSource)); - PhysicalPlan lookupOperator = implementor.visitLookup(input, null); - when(physicalSource.next()).thenReturn(ExprValueUtils.tupleValue(Map.of("field", "value"))); - - var ex = assertThrows(UnsupportedOperationException.class, () -> lookupOperator.next()); - - assertEquals("Lookup not implemented by DefaultImplementor", ex.getMessage()); - } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java index ade37b1241..d6a85f4687 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import java.io.IOException; +import java.math.BigDecimal; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -32,49 +33,49 @@ public void testLookup() throws IOException { verifyDataRows( result, rows( - 28.1, - "2015-01-20 15:31:32.406431", + new BigDecimal("28.1"), 255, + "2015-01-20 15:31:32.406431", "temperature-basement", "meter", 255, "VendorOne"), rows( - 27.8, - "2016-01-20 15:31:33.509334", + new BigDecimal("27.8"), 256, + "2016-01-20 15:31:33.509334", "temperature-living-room", "temperature meter", 256, "VendorTwo"), rows( - 27.4, - "2017-01-20 15:31:35.732436", + new BigDecimal("27.4"), 257, + "2017-01-20 15:31:35.732436", "temperature-bedroom", "camcorder", 257, "VendorThree"), rows( - 28.5, - "2018-01-20 15:32:32.406431", + new BigDecimal("28.5"), 255, + "2018-01-20 15:32:32.406431", "temperature-basement", "meter", 255, "VendorOne"), rows( - 27.9, - "2019-01-20 15:32:33.509334", + new BigDecimal("27.9"), 256, + "2019-01-20 15:32:33.509334", "temperature-living-room", "temperature meter", 256, "VendorTwo"), rows( - 27.4, - "2020-01-20 15:32:35.732436", + new BigDecimal("27.4"), 257, + "2020-01-20 15:32:35.732436", "temperature-bedroom", "camcorder", 257, @@ -90,12 +91,23 @@ public void testLookupSelectedAttribute() throws IOException { TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows(28.1, "2015-01-20 15:31:32.406431", 255, "meter", "VendorOne"), - rows(27.8, "2016-01-20 15:31:33.509334", 256, "temperature meter", "VendorTwo"), - rows(27.4, "2017-01-20 15:31:35.732436", 257, "camcorder", "VendorThree"), - rows(28.5, "2018-01-20 15:32:32.406431", 255, "meter", "VendorOne"), - rows(27.9, "2019-01-20 15:32:33.509334", 256, "temperature meter", "VendorTwo"), - rows(27.4, "2020-01-20 15:32:35.732436", 257, "camcorder", "VendorThree")); + rows(new BigDecimal("28.1"), 255, "2015-01-20 15:31:32.406431", "meter", "VendorOne"), + rows( + new BigDecimal("27.8"), + 256, + "2016-01-20 15:31:33.509334", + "temperature meter", + "VendorTwo"), + rows(new BigDecimal("27.4"), 257, "2017-01-20 15:31:35.732436", "camcorder", "VendorThree"), + rows(new BigDecimal("28.5"), 255, "2018-01-20 15:32:32.406431", "meter", "VendorOne"), + rows( + new BigDecimal("27.9"), + 256, + "2019-01-20 15:32:33.509334", + "temperature meter", + "VendorTwo"), + rows( + new BigDecimal("27.4"), 257, "2020-01-20 15:32:35.732436", "camcorder", "VendorThree")); } @Test @@ -108,12 +120,36 @@ public void testLookupRenameSelectedAttributes() throws IOException { TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows(28.1, "2015-01-20 15:31:32.406431", 255, 255, "meter", "VendorOne"), - rows(27.8, "2016-01-20 15:31:33.509334", 256, 256, "temperature meter", "VendorTwo"), - rows(27.4, "2017-01-20 15:31:35.732436", 257, 257, "camcorder", "VendorThree"), - rows(28.5, "2018-01-20 15:32:32.406431", 255, 255, "meter", "VendorOne"), - rows(27.9, "2019-01-20 15:32:33.509334", 256, 256, "temperature meter", "VendorTwo"), - rows(27.4, "2020-01-20 15:32:35.732436", 257, 257, "camcorder", "VendorThree")); + rows(new BigDecimal("28.1"), 255, "2015-01-20 15:31:32.406431", 255, "meter", "VendorOne"), + rows( + new BigDecimal("27.8"), + 256, + "2016-01-20 15:31:33.509334", + 256, + "temperature meter", + "VendorTwo"), + rows( + new BigDecimal("27.4"), + 257, + "2017-01-20 15:31:35.732436", + 257, + "camcorder", + "VendorThree"), + rows(new BigDecimal("28.5"), 255, "2018-01-20 15:32:32.406431", 255, "meter", "VendorOne"), + rows( + new BigDecimal("27.9"), + 256, + "2019-01-20 15:32:33.509334", + 256, + "temperature meter", + "VendorTwo"), + rows( + new BigDecimal("27.4"), + 257, + "2020-01-20 15:32:35.732436", + 257, + "camcorder", + "VendorThree")); } @Test @@ -125,12 +161,12 @@ public void testLookupSelectedMultipleAttributes() throws IOException { TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows(28.1, "2015-01-20 15:31:32.406431", 255, "meter"), - rows(27.8, "2016-01-20 15:31:33.509334", 256, "temperature meter"), - rows(27.4, "2017-01-20 15:31:35.732436", 257, "camcorder"), - rows(28.5, "2018-01-20 15:32:32.406431", 255, "meter"), - rows(27.9, "2019-01-20 15:32:33.509334", 256, "temperature meter"), - rows(27.4, "2020-01-20 15:32:35.732436", 257, "camcorder")); + rows(new BigDecimal("28.1"), 255, "2015-01-20 15:31:32.406431", "meter"), + rows(new BigDecimal("27.8"), 256, "2016-01-20 15:31:33.509334", "temperature meter"), + rows(new BigDecimal("27.4"), 257, "2017-01-20 15:31:35.732436", "camcorder"), + rows(new BigDecimal("28.5"), 255, "2018-01-20 15:32:32.406431", "meter"), + rows(new BigDecimal("27.9"), 256, "2019-01-20 15:32:33.509334", "temperature meter"), + rows(new BigDecimal("27.4"), 257, "2020-01-20 15:32:35.732436", "camcorder")); } @Test @@ -143,32 +179,32 @@ public void testLookupShouldAppendOnlyShouldBeFalseByDefault() throws IOExceptio TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows("2015-01-20 15:31:32.406431", 255, "VendorOne", "temperature-basement", "meter", 255), + rows(255, "2015-01-20 15:31:32.406431", "VendorOne", "temperature-basement", "meter", 255), rows( - "2016-01-20 15:31:33.509334", 256, + "2016-01-20 15:31:33.509334", "VendorTwo", "temperature-living-room", "temperature meter", 256), rows( - "2017-01-20 15:31:35.732436", 257, + "2017-01-20 15:31:35.732436", "VendorThree", "temperature-bedroom", "camcorder", 257), - rows("2018-01-20 15:32:32.406431", 255, "VendorOne", "temperature-basement", "meter", 255), + rows(255, "2018-01-20 15:32:32.406431", "VendorOne", "temperature-basement", "meter", 255), rows( - "2019-01-20 15:32:33.509334", 256, + "2019-01-20 15:32:33.509334", "VendorTwo", "temperature-living-room", "temperature meter", 256), rows( - "2020-01-20 15:32:35.732436", 257, + "2020-01-20 15:32:35.732436", "VendorThree", "temperature-bedroom", "camcorder", @@ -185,23 +221,47 @@ public void testLookupWithAppendOnlyFalse() throws IOException { TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( result, - rows("2015-01-20 15:31:32.406431", 255, 28.1, "temperature-basement", "meter", 255), rows( - "2016-01-20 15:31:33.509334", + 255, + "2015-01-20 15:31:32.406431", + new BigDecimal("28.1"), + "temperature-basement", + "meter", + 255), + rows( 256, - 27.8, + "2016-01-20 15:31:33.509334", + new BigDecimal("27.8"), "temperature-living-room", "temperature meter", 256), - rows("2017-01-20 15:31:35.732436", 257, 27.4, "temperature-bedroom", "camcorder", 257), - rows("2018-01-20 15:32:32.406431", 255, 28.5, "temperature-basement", "meter", 255), rows( - "2019-01-20 15:32:33.509334", + 257, + "2017-01-20 15:31:35.732436", + new BigDecimal("27.4"), + "temperature-bedroom", + "camcorder", + 257), + rows( + 255, + "2018-01-20 15:32:32.406431", + new BigDecimal("28.5"), + "temperature-basement", + "meter", + 255), + rows( 256, - 27.9, + "2019-01-20 15:32:33.509334", + new BigDecimal("27.9"), "temperature-living-room", "temperature meter", 256), - rows("2020-01-20 15:32:35.732436", 257, 27.4, "temperature-bedroom", "camcorder", 257)); + rows( + 257, + "2020-01-20 15:32:35.732436", + new BigDecimal("27.4"), + "temperature-bedroom", + "camcorder", + 257)); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index f250bbea15..640d49355f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -9,16 +9,20 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.function.BiFunction; import java.util.function.Function; import lombok.RequiredArgsConstructor; +import org.jetbrains.annotations.Nullable; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.node.NodeClient; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; @@ -217,66 +221,79 @@ public PhysicalPlan visitML(LogicalML node, OpenSearchIndexScan context) { @Override public PhysicalPlan visitLookup(LogicalLookup node, OpenSearchIndexScan context) { + SingleRowQuery singleRowQuery = new SingleRowQuery(client); return new LookupOperator( visitChild(node, context), node.getIndexName(), node.getMatchFieldMap(), node.getAppendOnly(), node.getCopyFieldMap(), - lookup()); + lookup(singleRowQuery)); } - BiFunction, Map> lookup() { - - if (client.getNodeClient() == null) { - throw new RuntimeException( - "Can not perform lookup because openSearchClient was null. This is likely a bug."); - } - + BiFunction, Map> lookup( + SingleRowQuery singleRowQuery) { + Objects.requireNonNull(singleRowQuery, "SingleRowQuery is required to perform lookup"); return (indexName, inputMap) -> { Map matchMap = (Map) inputMap.get("_match"); Set copySet = (Set) inputMap.get("_copy"); - - BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - - for (Map.Entry f : matchMap.entrySet()) { - BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); - - // Todo: Search with term and a match query? Or terms only? - orQueryBuilder.should(new TermQueryBuilder(f.getKey(), f.getValue().toString())); - orQueryBuilder.should(new MatchQueryBuilder(f.getKey(), f.getValue().toString())); - orQueryBuilder.minimumShouldMatch(1); - - // filter is the same as "must" but ignores scoring - boolQueryBuilder.filter(orQueryBuilder); - } - - SearchResponse result = - client - .getNodeClient() - .search( - new SearchRequest(indexName) - .source( - SearchSourceBuilder.searchSource() - .fetchSource( - copySet == null ? null : copySet.toArray(new String[0]), null) - .query(boolQueryBuilder) - .size(2))) - .actionGet(); - - int hits = result.getHits().getHits().length; - - if (hits == 0) { - // null indicates no hits for the lookup found - return null; - } - - if (hits != 1) { - throw new RuntimeException("too many hits for " + indexName + " (" + hits + ")"); - } - - return result.getHits().getHits()[0].getSourceAsMap(); + return singleRowQuery.executeQuery(indexName, matchMap, copySet); }; } } + + static class SingleRowQuery { + + private final NodeClient nodeClient; + + public SingleRowQuery(OpenSearchClient openSearchClient) { + Objects.requireNonNull(openSearchClient, "Opensearch client is required to perform lookup"); + this.nodeClient = + Objects.requireNonNull( + openSearchClient.getNodeClient(), + "Can not perform lookup because openSearchClient was null. This is likely a bug."); + } + + public @Nullable Map executeQuery( + String indexName, Map matchMap, Set copySet) { + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + + for (Map.Entry f : matchMap.entrySet()) { + BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); + + // Todo: Search with term and a match query? Or terms only? + orQueryBuilder.should(new TermQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.should(new MatchQueryBuilder(f.getKey(), f.getValue().toString())); + orQueryBuilder.minimumShouldMatch(1); + + // filter is the same as "must" but ignores scoring + boolQueryBuilder.filter(orQueryBuilder); + } + + SearchResponse result = + nodeClient + .search( + new SearchRequest(indexName) + .source( + SearchSourceBuilder.searchSource() + .fetchSource( + copySet == null ? null : copySet.toArray(new String[0]), null) + .query(boolQueryBuilder) + .size(2))) + .actionGet(); + + SearchHit[] searchHits = result.getHits().getHits(); + int hits = searchHits.length; + if (hits == 0) { + // null indicates no hits for the lookup found + return null; + } + + if (hits != 1) { + throw new RuntimeException("too many hits for " + indexName + " (" + hits + ")"); + } + + return searchHits[0].getSourceAsMap(); + } + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index b2dc042110..7d5ec81127 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -81,6 +82,8 @@ class OpenSearchExecutionProtectorTest { @Mock private OpenSearchSettings settings; + @Mock private BiFunction, Map> lookupFunction; + private OpenSearchExecutionProtector executionProtector; @BeforeEach @@ -120,59 +123,71 @@ void test_protect_indexScan() { settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)); assertEquals( PhysicalPlanDSL.project( - PhysicalPlanDSL.limit( - PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.rareTopN( - resourceMonitor( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - filter( - resourceMonitor( - new OpenSearchIndexScan( - client, maxResultWindow, request)), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField)), - CommandType.TOP, - topExprs, - topField), - dedupeField), - limit, - offset), - include), - executionProtector.protect( - PhysicalPlanDSL.project( + PhysicalPlanDSL.lookup( PhysicalPlanDSL.limit( PhysicalPlanDSL.dedupe( PhysicalPlanDSL.rareTopN( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - filter( - new OpenSearchIndexScan( - client, maxResultWindow, request), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), + resourceMonitor( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + filter( + resourceMonitor( + new OpenSearchIndexScan( + client, maxResultWindow, request)), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField)), CommandType.TOP, topExprs, topField), dedupeField), limit, offset), + "lookup_index_name", + Map.of(), + false, + Map.of(), + lookupFunction), + include), + executionProtector.protect( + PhysicalPlanDSL.project( + PhysicalPlanDSL.lookup( + PhysicalPlanDSL.limit( + PhysicalPlanDSL.dedupe( + PhysicalPlanDSL.rareTopN( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + filter( + new OpenSearchIndexScan( + client, maxResultWindow, request), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topExprs, + topField), + dedupeField), + limit, + offset), + "lookup_index_name", + Map.of(), + false, + Map.of(), + lookupFunction), include))); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java index 3ddb07d86a..d7ce2dc7b5 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java @@ -28,8 +28,11 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.sort; import com.google.common.collect.ImmutableMap; +import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Set; +import java.util.function.BiFunction; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -37,7 +40,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.client.node.NodeClient; import org.opensearch.common.unit.TimeValue; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.common.setting.Settings; @@ -53,6 +58,8 @@ import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex.OpenSearchDefaultImplementor; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex.SingleRowQuery; import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; @@ -65,6 +72,9 @@ class OpenSearchIndexTest { public static final TimeValue SCROLL_TIMEOUT = new TimeValue(1); public static final OpenSearchRequest.IndexName INDEX_NAME = new OpenSearchRequest.IndexName("test"); + public static final String LOOKUP_INDEX_NAME = "lookup-index-name"; + public static final String LOOKUP_TABLE_FIELD = "lookup_table_field"; + public static final String QUERY_FIELD = "query_field"; @Mock private OpenSearchClient client; @@ -74,6 +84,8 @@ class OpenSearchIndexTest { @Mock private IndexMapping mapping; + @Mock private NodeClient nodeClient; + private OpenSearchIndex index; @BeforeEach @@ -222,6 +234,7 @@ void implementRelationOperatorWithOptimization() { void implementOtherLogicalOperators() { when(client.getIndexMaxResultWindows("test")).thenReturn(Map.of("test", 10000)); when(settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT)).thenReturn(200); + when(client.getNodeClient()).thenReturn(nodeClient); NamedExpression include = named("age", ref("age", INTEGER)); ReferenceExpression exclude = ref("name", STRING); ReferenceExpression dedupeField = ref("name", STRING); @@ -234,34 +247,80 @@ void implementOtherLogicalOperators() { LogicalPlan plan = project( - LogicalPlanDSL.dedupe( - sort( - eval( - remove(rename(index.createScanBuilder(), mappings), exclude), newEvalField), - sortField), - dedupeField), + LogicalPlanDSL.lookup( + LogicalPlanDSL.dedupe( + sort( + eval( + remove(rename(index.createScanBuilder(), mappings), exclude), + newEvalField), + sortField), + dedupeField), + LOOKUP_INDEX_NAME, + Map.of( + new ReferenceExpression(LOOKUP_TABLE_FIELD, STRING), + new ReferenceExpression(QUERY_FIELD, STRING)), + true, + Collections.emptyMap()), include); Integer maxResultWindow = index.getMaxResultWindow(); final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory); + + BiFunction, Map> anyBifunction = + new BiFunction<>() { + @Override + public Map apply(String s, Map stringObjectMap) { + return Map.of(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof BiFunction; + } + }; assertEquals( PhysicalPlanDSL.project( - PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - new OpenSearchIndexScan( - client, - QUERY_SIZE_LIMIT, - requestBuilder.build( - INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT)), - mappings), - exclude), - newEvalField), - sortField), - dedupeField), + PhysicalPlanDSL.lookup( + PhysicalPlanDSL.dedupe( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + new OpenSearchIndexScan( + client, + QUERY_SIZE_LIMIT, + requestBuilder.build( + INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT)), + mappings), + exclude), + newEvalField), + sortField), + dedupeField), + LOOKUP_INDEX_NAME, + Map.of( + new ReferenceExpression(LOOKUP_TABLE_FIELD, STRING), + new ReferenceExpression(QUERY_FIELD, STRING)), + true, + Collections.emptyMap(), + anyBifunction), include), index.implement(plan)); } + + @Test + public void lookupShouldExecuteQuery() { + OpenSearchDefaultImplementor implementor = new OpenSearchDefaultImplementor(client); + Map matchMap = Map.of("column name", "required value"); + Set copySet = Set.of("column_1", "column_2"); + Map parameters = Map.of("_match", matchMap, "_copy", copySet); + SingleRowQuery singleRowQuery = Mockito.mock(SingleRowQuery.class); + Map resultRow = Map.of("column_1", 1, "column_2", 2); + when(singleRowQuery.executeQuery("lookup_index_name", matchMap, copySet)).thenReturn(resultRow); + BiFunction, Map> lookup = + implementor.lookup(singleRowQuery); + + Map givenResult = lookup.apply("lookup_index_name", parameters); + + assertEquals(resultRow, givenResult); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java new file mode 100644 index 0000000000..dbbb09f705 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java @@ -0,0 +1,246 @@ +package org.opensearch.sql.opensearch.storage; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.emptyArray; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.node.NodeClient; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex.SingleRowQuery; + +@ExtendWith(MockitoExtension.class) +class SingleRowQueryTest { + + @Mock private OpenSearchClient openSearchClient; + @Mock private NodeClient nodeClient; + @Mock private ActionFuture searchFuture; + @Mock private SearchResponse response; + @Mock private SearchHits searchHits; + @Mock private SearchHit hit; + @Captor private ArgumentCaptor searchRequestCaptor; + + private SingleRowQuery singleRowQuery; + + @BeforeEach + public void beforeEach() { + when(openSearchClient.getNodeClient()).thenReturn(nodeClient); + singleRowQuery = new SingleRowQuery(openSearchClient); + } + + @AfterEach + public void shouldUseExactlyOneSearch() { + verify(nodeClient, times(1)).search(any()); + } + + @Test + void shouldReturnNullWhenRowDoesNotExist() { + Map predicates = Map.of("column_name", "value 1"); + Set projection = Set.of("returned column name"); + mockSearch(new SearchHit[0]); + + Map row = singleRowQuery.executeQuery("index_name", predicates, projection); + + assertThat(row, nullValue()); + } + + @Test + void shouldThrowExceptionWhenMoreThanOneRowIsFound() { + Map predicates = Map.of("column_name", "value 1"); + Set projection = Set.of("returned column name"); + mockSearch(new SearchHit[2]); + + RuntimeException ex = + assertThrows( + RuntimeException.class, + () -> singleRowQuery.executeQuery("index_name", predicates, projection)); + + assertThat(ex.getMessage(), Matchers.containsString("too many hits")); + } + + @ParameterizedTest + @ValueSource(strings = {"table_1", "other_table", "yet_another_table"}) + void shouldQueryCorrectTable(String tableName) { + Map predicates = Map.of("column_name", "value row criteria"); + Set projection = Set.of("returned column name"); + + mockSearch(new SearchHit[0]); + + Map row = singleRowQuery.executeQuery(tableName, predicates, projection); + + SearchRequest searchRequest = searchRequestCaptor.getValue(); + assertThat(searchRequest.indices(), equalTo(new String[] {tableName})); + assertThat(row, nullValue()); + } + + @ParameterizedTest + @CsvSource({ + "field,value row criteria,fetched_column_name_one", + "column,another value row criteria,another_fetched_column_name", + "attribute,yet another value row criteria,third_fetched_column_name", + "regular_name,one_2_three,this_is_the_fetched_column_name", + "extra_ordinary_column_name,I am an expected value,my_name_is_the_fetched_column", + "nice_column_name,last value row criteria,my_nice_fetched_column", + }) + public void shouldBuildOpenSearchQueryForOnePredicate( + String columnName, String valuePredicate, String projection) { + Map predicates = Map.of(columnName, valuePredicate); + mockSearch(new SearchHit[0]); + + Map row = + singleRowQuery.executeQuery("index_name", predicates, Set.of(projection)); + + SearchRequest searchRequest = searchRequestCaptor.getValue(); + BoolQueryBuilder filterQueryForSinglePredicate = new BoolQueryBuilder(); + filterQueryForSinglePredicate.should(new TermQueryBuilder(columnName, valuePredicate)); + filterQueryForSinglePredicate.should(new MatchQueryBuilder(columnName, valuePredicate)); + filterQueryForSinglePredicate.minimumShouldMatch(1); + BoolQueryBuilder expectedQuery = new BoolQueryBuilder(); + expectedQuery.filter(filterQueryForSinglePredicate); + + assertThat(searchRequest.source().query(), equalTo(expectedQuery)); + assertThat(searchRequest.source().size(), equalTo(2)); + assertThat(searchRequest.source().fetchSource().includes(), equalTo(new String[] {projection})); + assertThat(searchRequest.source().fetchSource().excludes(), emptyArray()); + assertThat(row, nullValue()); + } + + @ParameterizedTest + @CsvSource({ + "columnName1,columnName2,columnName3", + "columnName4,columnName5,columnName6", + "columnName,columnName8,columnName9", + "extraOrdinaryOne,eXtraOrdinaryTwo,extraOrdinaryThree" + }) + void shouldFetchVariousColumns(String columnOne, String columnTwo, String columnThree) { + Map predicates = Map.of("find_only_row", "with_value_abc"); + mockSearch(new SearchHit[0]); + + Map row = + singleRowQuery.executeQuery( + "index_name", predicates, Set.of(columnOne, columnTwo, columnThree)); + + assertThat(row, nullValue()); + } + + @ParameterizedTest + @MethodSource("variousPredicates") + void shouldUseComplexPredicate(Map predicates) { + Set projection = Set.of("returned column name"); + mockSearch(new SearchHit[0]); + + Map row = singleRowQuery.executeQuery("index_name", predicates, projection); + + SearchRequest searchRequest = searchRequestCaptor.getValue(); + BoolQueryBuilder expectedQuery = new BoolQueryBuilder(); + predicates.entrySet().stream() + .map( + entry -> { + String columnName = entry.getKey(); + Object value = entry.getValue(); + BoolQueryBuilder filterQueryForSinglePredicate = new BoolQueryBuilder(); + filterQueryForSinglePredicate.should(new TermQueryBuilder(columnName, value)); + filterQueryForSinglePredicate.should(new MatchQueryBuilder(columnName, value)); + filterQueryForSinglePredicate.minimumShouldMatch(1); + return filterQueryForSinglePredicate; + }) + .forEach(expectedQuery::filter); + + assertThat(searchRequest.source().query(), equalTo(expectedQuery)); + assertThat(row, nullValue()); + } + + static Stream variousPredicates() { + return Stream.of( + Arguments.of(Map.of("column_name_1", "value row criteria_12")), + Arguments.of( + Map.of( + "column_name_2", "value row criteria_23", "another_column_5", "another value_8")), + Arguments.of( + Map.of( + "column_name_3", + "value row criteria_34", + "another_column_6", + "another value_8", + "yet_another_column_11", + "yet another value_13")), + Arguments.of( + Map.of( + "column_name_4", + "value row criteria_45", + "another_column_7", + "another value_10", + "yet_another_column_12", + "yet another value_14", + "extra_column_15", + "extra value_16"))); + } + + @Test + public void shouldReturnRow() { + Map predicates = Map.of("column_name", "value row criteria"); + Set projection = Set.of("returned column name"); + Map searchResult = + Map.of("column_name", "value row criteria", "returned column name", "value 2"); + mockSearch(searchResult); + + Map row = singleRowQuery.executeQuery("index_name", predicates, projection); + + assertThat(row, equalTo(searchResult)); + } + + @Test + public void shouldTreatProjectionAsOptionalParameter() { + Map predicates = Map.of("column_name", "value row criteria"); + Set projection = null; + Map searchResult = + Map.of("column_name", "value row criteria", "returned column name", "value 2"); + mockSearch(searchResult); + + Map row = singleRowQuery.executeQuery("index_name", predicates, projection); + + assertThat(row, equalTo(searchResult)); + } + + private void mockSearch(Map searchResult) { + when(hit.getSourceAsMap()).thenReturn(searchResult); + mockSearch(new SearchHit[] {hit}); + } + + private void mockSearch(SearchHit[] searchResult) { + when(nodeClient.search(searchRequestCaptor.capture())).thenReturn(searchFuture); + when(searchFuture.actionGet()).thenReturn(response); + when(response.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(searchResult); + } +} From f2410e8698b064dc6dfb90cbb054349ab4a0c719 Mon Sep 17 00:00:00 2001 From: Lukasz Soszynski Date: Fri, 19 Jul 2024 13:42:02 +0200 Subject: [PATCH 12/20] Typo corrected. Signed-off-by: Lukasz Soszynski --- core/src/main/java/org/opensearch/sql/analysis/Analyzer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index aeff7443dd..28238dd55c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -554,10 +554,10 @@ private ImmutableMap analyzeLookupMatc for (Map resultMap : inputMap) { Expression origin = expressionAnalyzer.analyze(resultMap.getOrigin(), lookupTableContext); if (resultMap.getTarget() instanceof Field) { - Expression targerExpression = + Expression targetExpression = expressionAnalyzer.analyze(resultMap.getTarget(), queryContext); ReferenceExpression targetReference = - DSL.ref(targerExpression.toString(), targerExpression.type()); + DSL.ref(targetExpression.toString(), targetExpression.type()); ReferenceExpression originReference = DSL.ref(origin.toString(), origin.type()); TypeEnvironment curEnv = queryContext.peek(); curEnv.remove(originReference); From 127f22ba7769833d8af547d1b1e90c0a8a188099 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Tue, 23 Jul 2024 14:26:54 +0200 Subject: [PATCH 13/20] Enable doc test Signed-off-by: Hendrik Saly --- docs/category.json | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/category.json b/docs/category.json index e90c674a2e..a62e47f76d 100644 --- a/docs/category.json +++ b/docs/category.json @@ -9,6 +9,7 @@ "ppl_cli": [ "user/ppl/cmd/ad.rst", "user/ppl/cmd/dedup.rst", + "user/ppl/cmd/lookup.rst", "user/ppl/cmd/describe.rst", "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/information_schema.rst", From 0326c6c8e484736dd46a4b0a0b63eac942e4bb67 Mon Sep 17 00:00:00 2001 From: yang-db Date: Mon, 2 Dec 2024 11:59:03 -0800 Subject: [PATCH 14/20] update spotless issue Signed-off-by: YANGDB --- .../java/org/opensearch/sql/planner/DefaultImplementor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index f962c3e4bf..9a5cf03766 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -132,7 +132,8 @@ public PhysicalPlan visitValues(LogicalValues node, C context) { public PhysicalPlan visitLimit(LogicalLimit node, C context) { PhysicalPlan child = visitChild(node, context); // Optimize sort + limit to take ordered operator - if (child instanceof SortOperator sortChild) { + if (child instanceof SortOperator) { + SortOperator sortChild = (SortOperator) child; return new TakeOrderedOperator( sortChild.getInput(), node.getLimit(), node.getOffset(), sortChild.getSortList()); } From 1d2be37d7cb0f7efab9363069f283b669b9ce4c1 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Mon, 2 Dec 2024 12:21:55 -0800 Subject: [PATCH 15/20] update spotless issue Signed-off-by: YANGDB --- .../sql/legacy/SQLIntegTestCase.java | 20 +++--- .../storage/OpenSearchIndexTest.java | 64 +++++++++---------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 6a5584c636..4075ab8c1d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -741,16 +741,16 @@ public enum Index { "multi_nested", getNestedTypeIndexMapping(), "src/test/resources/nested_with_nulls.json"), - IOT_READINGS( - TestsConstants.TEST_INDEX_IOT_READINGS, - "iot_readings", - getMappingFile("iot_readings_index_mapping.json"), - "src/test/resources/iot_readings.json"), - IOT_SENSORS( - TestsConstants.TEST_INDEX_IOT_SENSORS, - "iot_sensors", - getMappingFile("iot_sensors_index_mapping.json"), - "src/test/resources/iot_sensors.json"), + IOT_READINGS( + TestsConstants.TEST_INDEX_IOT_READINGS, + "iot_readings", + getMappingFile("iot_readings_index_mapping.json"), + "src/test/resources/iot_readings.json"), + IOT_SENSORS( + TestsConstants.TEST_INDEX_IOT_SENSORS, + "iot_sensors", + getMappingFile("iot_sensors_index_mapping.json"), + "src/test/resources/iot_sensors.json"), GEOPOINTS( TestsConstants.TEST_INDEX_GEOPOINT, "dates", diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java index e176c21844..fcc49545a7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java @@ -271,43 +271,43 @@ void implementOtherLogicalOperators() { include); Integer maxResultWindow = index.getMaxResultWindow(); - BiFunction, Map> anyBifunction = - new BiFunction<>() { - @Override - public Map apply(String s, Map stringObjectMap) { - return Map.of(); - } - - @Override - public boolean equals(Object obj) { - return obj instanceof BiFunction; - } - }; - + BiFunction, Map> anyBifunction = + new BiFunction<>() { + @Override + public Map apply(String s, Map stringObjectMap) { + return Map.of(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof BiFunction; + } + }; + final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory, settings); assertEquals( - PhysicalPlanDSL.project( - PhysicalPlanDSL.lookup( - PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - new OpenSearchIndexScan( - client, - QUERY_SIZE_LIMIT, - requestBuilder.build( - INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT, client)), - mappings), - exclude), - newEvalField), - sortField), - dedupeField), + PhysicalPlanDSL.project( + PhysicalPlanDSL.lookup( + PhysicalPlanDSL.dedupe( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + new OpenSearchIndexScan( + client, + QUERY_SIZE_LIMIT, + requestBuilder.build( + INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT, client)), + mappings), + exclude), + newEvalField), + sortField), + dedupeField), LOOKUP_INDEX_NAME, Map.of( - new ReferenceExpression(LOOKUP_TABLE_FIELD, STRING), - new ReferenceExpression(QUERY_FIELD, STRING)), + new ReferenceExpression(LOOKUP_TABLE_FIELD, STRING), + new ReferenceExpression(QUERY_FIELD, STRING)), true, Collections.emptyMap(), anyBifunction), From 92e2abba22f2e2e02de747b2dd5f9b044cedd7c9 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Mon, 2 Dec 2024 13:09:42 -0800 Subject: [PATCH 16/20] rename appendonly into overwrite Signed-off-by: YANGDB --- .../org/opensearch/sql/analysis/Analyzer.java | 4 +-- .../org/opensearch/sql/executor/Explain.java | 2 +- .../sql/planner/logical/LogicalLookup.java | 6 ++-- .../sql/planner/physical/LookupOperator.java | 8 ++--- .../opensearch/sql/analysis/AnalyzerTest.java | 32 +++++++++---------- .../opensearch/sql/executor/ExplainTest.java | 2 +- .../planner/physical/LookupOperatorTest.java | 4 +-- .../OpenSearchExecutionProtector.java | 2 +- .../opensearch/storage/OpenSearchIndex.java | 2 +- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 2 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../sql/ppl/utils/ArgumentFactory.java | 6 ++-- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 6 ++-- .../sql/ppl/parser/AstBuilderTest.java | 2 +- .../sql/ppl/utils/ArgumentFactoryTest.java | 12 +++---- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 8 ++--- 16 files changed, 50 insertions(+), 50 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 28238dd55c..00af5c6167 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -516,7 +516,7 @@ public LogicalPlan visitLookup(Lookup node, AnalysisContext queryContext) { LogicalPlan child = node.getChild().get(0).accept(this, queryContext); List options = node.getOptions(); // Todo, refactor the option. - Boolean appendOnly = (Boolean) options.get(0).getValue().getValue(); + Boolean overwrite = (Boolean) options.get(0).getValue().getValue(); Table table = dataSourceService @@ -543,7 +543,7 @@ public LogicalPlan visitLookup(Lookup node, AnalysisContext queryContext) { child, node.getIndexName(), matchFieldMap, - appendOnly, + overwrite, analyzeLookupCopyFields(node.getCopyFieldList(), queryContext, table)); } diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index 802ce35ebf..f628474c39 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -183,7 +183,7 @@ public ExplainResponseNode visitLookup(LookupOperator node, Object context) { "copyfields", node.getCopyFieldMap().toString(), "matchfields", node.getMatchFieldMap().toString(), "indexname", node.getIndexName(), - "appendonly", node.getAppendOnly()))); + "overwrite", node.getOverwrite()))); } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java index 3079b69941..7b0db5bc72 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalLookup.java @@ -21,20 +21,20 @@ public class LogicalLookup extends LogicalPlan { private final String indexName; private final Map matchFieldMap; private final Map copyFieldMap; - private final Boolean appendOnly; + private final Boolean overwrite; /** Constructor of LogicalLookup. */ public LogicalLookup( LogicalPlan child, String indexName, Map matchFieldMap, - Boolean appendOnly, + Boolean overwrite, Map copyFieldMap) { super(Arrays.asList(child)); this.indexName = indexName; this.copyFieldMap = copyFieldMap; this.matchFieldMap = matchFieldMap; - this.appendOnly = appendOnly; + this.overwrite = overwrite; } @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java index c4b1ccd824..4c9e3bfa65 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/LookupOperator.java @@ -31,7 +31,7 @@ public class LookupOperator extends PhysicalPlan { @Getter private final String indexName; @Getter private final Map matchFieldMap; @Getter private final Map copyFieldMap; - @Getter private final Boolean appendOnly; + @Getter private final Boolean overwrite; @EqualsAndHashCode.Exclude private final BiFunction, Map> lookup; @@ -42,13 +42,13 @@ public LookupOperator( PhysicalPlan input, String indexName, Map matchFieldMap, - Boolean appendOnly, + Boolean overwrite, Map copyFieldMap, BiFunction, Map> lookup) { this.input = input; this.indexName = indexName; this.matchFieldMap = matchFieldMap; - this.appendOnly = appendOnly; + this.overwrite = overwrite; this.copyFieldMap = copyFieldMap; this.lookup = lookup; } @@ -116,7 +116,7 @@ public ExprValue next() { Object lookedUpFieldValue = sourceOfAdditionalField.getValue(); String finalFieldName = copyMap.getOrDefault(lookedUpFieldName, lookedUpFieldName); ExprValue value = ExprValueUtils.fromObjectValue(lookedUpFieldValue); - if (appendOnly) { + if (overwrite) { resultTupleBuilder.putIfAbsent(finalFieldName, value); } else { resultTupleBuilder.put(finalFieldName, value); diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 5070d9fa49..cce8eacb21 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -1872,7 +1872,7 @@ public void visit_lookup_same_field_name_in_query_and_lookup_index() { AstDSL.relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(false))), Collections.emptyList())); } @@ -1894,7 +1894,7 @@ public void visit_lookup_use_multiple_fields() { LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of( AstDSL.map("comment", "comment"), AstDSL.map("dev_id", "field_value1")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(false))), Collections.emptyList())); } @@ -1913,7 +1913,7 @@ public void visit_lookup_various_field_name_in_query_and_lookup_index() { AstDSL.relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("dev_id", "comment")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(true))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(true))), Collections.emptyList())); } @@ -1925,7 +1925,7 @@ public void visit_lookup_should_report_error_when_lookup_table_does_not_exist(St relation("schema"), tableName, ImmutableList.of(AstDSL.map("string_value", "comment")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -1938,7 +1938,7 @@ public void visit_lookup_should_report_error_when_join_field_does_not_exist() { relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("no_such_field", "comment")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -1951,7 +1951,7 @@ public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_l relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("no_such_field", "comment")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -1964,7 +1964,7 @@ public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_q relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "no_such_field")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -1978,7 +1978,7 @@ public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_q relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "dev_id")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -1992,7 +1992,7 @@ public void visit_lookup_should_report_error_when_join_field_does_not_exist_in_q relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("string_value", "field_value2")), - ImmutableList.of(argument("appendonly", booleanLiteral(true))), + ImmutableList.of(argument("overwrite", booleanLiteral(true))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -2005,7 +2005,7 @@ public void visit_lookup_should_report_error_when_target_expression_point_out_fu relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map(field("ip_v4"), intLiteral(7))), - ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(argument("overwrite", booleanLiteral(false))), emptyList()); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -2028,7 +2028,7 @@ public void visit_lookup_copy_field() { AstDSL.relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(false))), ImmutableList.of(AstDSL.map("vendor", "maker")))); } @@ -2051,7 +2051,7 @@ public void visit_lookup_copy_multiple_fields() { AstDSL.relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(false))), ImmutableList.of( AstDSL.map("vendor", "maker"), AstDSL.map("serial_number", "serial")))); } @@ -2073,7 +2073,7 @@ public void visit_lookup_copy_field_when_origin_and_target_is_the_same() { AstDSL.relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(AstDSL.argument("appendonly", AstDSL.booleanLiteral(false))), + ImmutableList.of(AstDSL.argument("overwrite", AstDSL.booleanLiteral(false))), ImmutableList.of(AstDSL.map("vendor", "vendor")))); } @@ -2084,7 +2084,7 @@ public void visit_lookup_should_report_error_when_copy_field_does_not_exist() { relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(argument("overwrite", booleanLiteral(false))), ImmutableList.of(AstDSL.map("no_such_field", "maker"))); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -2097,7 +2097,7 @@ public void visit_lookup_should_report_error_when_copy_target_is_not_a_field() { relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(argument("overwrite", booleanLiteral(false))), ImmutableList.of(AstDSL.map(AstDSL.field("vendor"), AstDSL.intLiteral(8)))); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); @@ -2110,7 +2110,7 @@ public void visit_lookup_should_report_error_when_copy_source_is_not_a_field() { relation("schema"), LOOKUP_TABLE_DEVICE_NAMES, ImmutableList.of(AstDSL.map("ip_v4", "field_value2")), - ImmutableList.of(argument("appendonly", booleanLiteral(false))), + ImmutableList.of(argument("overwrite", booleanLiteral(false))), ImmutableList.of(AstDSL.map(AstDSL.booleanLiteral(true), AstDSL.field("maker")))); assertThrows(SemanticCheckException.class, () -> analyze(lookup)); diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index a2002f9191..fcbdcae9da 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -278,7 +278,7 @@ void can_explain_lookup() { "{lookup_index_field=query_index_field}", "indexname", "lookup_index_name", - "appendonly", + "overwrite", true), singletonList(tableScan.explainNode()))), explain.apply(plan)); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java index 2da1811e86..c7ab22dfde 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java @@ -580,7 +580,7 @@ public void lookup_ignore_non_struct_input() { @ParameterizedTest @ValueSource(booleans = {true, false}) - public void lookup_table_with_nulls(boolean appendOnly) { + public void lookup_table_with_nulls(boolean overwrite) { when(lookupFunction.apply(eq(LOOKUP_INDEX), anyMap())) .thenAnswer(lookupTableQueryResults("ip_v4", LOOKUP_TABLE_WITH_NULLS)); PhysicalPlan plan = @@ -589,7 +589,7 @@ public void lookup_table_with_nulls(boolean appendOnly) { LOOKUP_INDEX, ImmutableMap.of( new ReferenceExpression("ip_v4", STRING), new ReferenceExpression("ip", STRING)), - appendOnly, + overwrite, ImmutableMap.of(), lookupFunction); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index ece0b0205a..68ccaa4cdf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -124,7 +124,7 @@ public PhysicalPlan visitLookup(LookupOperator node, Object context) { visitInput(node.getInput(), context), node.getIndexName(), node.getMatchFieldMap(), - node.getAppendOnly(), + node.getOverwrite(), node.getCopyFieldMap(), node.getLookup()); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index 3103218fb1..d742996c73 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -231,7 +231,7 @@ public PhysicalPlan visitLookup(LogicalLookup node, OpenSearchIndexScan context) visitChild(node, context), node.getIndexName(), node.getMatchFieldMap(), - node.getAppendOnly(), + node.getOverwrite(), node.getCopyFieldMap(), lookup(singleRowQuery)); } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index f9c8382004..d930fdbe3c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -57,7 +57,7 @@ NUM: 'NUM'; // ARGUMENT KEYWORDS KEEPEMPTY: 'KEEPEMPTY'; CONSECUTIVE: 'CONSECUTIVE'; -APPENDONLY: 'APPENDONLY'; +OVERWRITE: 'OVERWRITE'; DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; PARTITIONS: 'PARTITIONS'; ALLNUM: 'ALLNUM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index a05729c5b2..630725fd66 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -95,7 +95,7 @@ copyFieldWithOptAs ; lookupCommand - : LOOKUP tableSource matchFieldWithOptAs (COMMA matchFieldWithOptAs)* (APPENDONLY EQUAL appendonly = booleanLiteral)? (copyFieldWithOptAs (COMMA copyFieldWithOptAs)*)* + : LOOKUP tableSource matchFieldWithOptAs (COMMA matchFieldWithOptAs)* (OVERWRITE EQUAL overwrite = booleanLiteral)? (copyFieldWithOptAs (COMMA copyFieldWithOptAs)*)* ; sortCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 44c90b4d89..44b150fd3d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -83,9 +83,9 @@ public static List getArgumentList(DedupCommandContext ctx) { public static List getArgumentList(LookupCommandContext ctx) { return Arrays.asList( - ctx.appendonly != null - ? new Argument("appendonly", getArgumentValue(ctx.appendonly)) - : new Argument("appendonly", new Literal(false, DataType.BOOLEAN))); + ctx.overwrite != null + ? new Argument("overwrite", getArgumentValue(ctx.overwrite)) + : new Argument("overwrite", new Literal(false, DataType.BOOLEAN))); } /** diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 888fe5fc4b..1062299a7d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -241,11 +241,11 @@ public String visitLookup(Lookup node, String context) { .collect(Collectors.joining(",")); List options = node.getOptions(); - Boolean appendonly = (Boolean) options.get(0).getValue().getValue(); + Boolean overwrite = (Boolean) options.get(0).getValue().getValue(); return StringUtils.format( - "%s | lookup %s %s appendonly=%b %s", - child, lookupIndexName, matches, appendonly, copies) + "%s | lookup %s %s overwrite=%b %s", + child, lookupIndexName, matches, overwrite, copies) .trim(); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index f8cbe95ef1..2b72fa33db 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -404,7 +404,7 @@ public void testLookupCommand() { relation("t"), "a", fieldMap("field", "field"), - exprList(argument("appendonly", booleanLiteral(false))), + exprList(argument("overwrite", booleanLiteral(false))), Collections.emptyList())); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index 102f9591fe..c0c71f862a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -114,7 +114,7 @@ public void testLookupCommandRequiredArguments() { relation("t"), "a", fieldMap("field", "field"), - exprList(argument("appendonly", booleanLiteral(false))), + exprList(argument("overwrite", booleanLiteral(false))), Collections.emptyList())); } @@ -127,31 +127,31 @@ public void testLookupCommandFieldArguments() { relation("t"), "a", fieldMap("field", "field1", "field2", "field3"), - exprList(argument("appendonly", booleanLiteral(false))), + exprList(argument("overwrite", booleanLiteral(false))), fieldMap("destfield", "destfield1", "destfield2", "destfield3"))); } @Test public void testLookupCommandAppendTrueArgument() { assertEqual( - "source=t | lookup a field appendonly=true", + "source=t | lookup a field overwrite=true", lookup( relation("t"), "a", fieldMap("field", "field"), - exprList(argument("appendonly", booleanLiteral(true))), + exprList(argument("overwrite", booleanLiteral(true))), Collections.emptyList())); } @Test public void testLookupCommandAppendFalseArgument() { assertEqual( - "source=t | lookup a field appendonly=false", + "source=t | lookup a field overwrite=false", lookup( relation("t"), "a", fieldMap("field", "field"), - exprList(argument("appendonly", booleanLiteral(false))), + exprList(argument("overwrite", booleanLiteral(false))), Collections.emptyList())); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 88e6db1ab4..2121c6ce51 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -92,13 +92,13 @@ public void testDedupCommand() { @Test public void testLookupCommand() { assertEquals( - "source=t | lookup index field1 as field1,field2 as field2 appendonly=false", + "source=t | lookup index field1 as field1,field2 as field2 overwrite=false", anonymize("source=t | lookup index field1,field2")); assertEquals( - "source=t | lookup index field1 as field1,field2 as field2 appendonly=true", - anonymize("source=t | lookup index field1,field2 appendonly=true")); + "source=t | lookup index field1 as field1,field2 as field2 overwrite=true", + anonymize("source=t | lookup index field1,field2 overwrite=true")); assertEquals( - "source=t | lookup index field1 as field12,field2 as field22 appendonly=false copyfield1 as" + "source=t | lookup index field1 as field12,field2 as field22 overwrite=false copyfield1 as" + " copyfield1,copyfield2 as copyfield22", anonymize( "source=t | lookup index field1 as field12, field2 AS field22 copyfield1, copyfield2 as" From bc74c0c85ffe5acaf787766deff202b72ced8643 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Mon, 2 Dec 2024 13:16:43 -0800 Subject: [PATCH 17/20] add missing header licenses Signed-off-by: YANGDB --- .../opensearch/sql/planner/physical/LookupOperatorTest.java | 4 ++++ .../sql/opensearch/storage/SingleRowQueryTest.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java index c7ab22dfde..382028216c 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/LookupOperatorTest.java @@ -1,3 +1,7 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ package org.opensearch.sql.planner.physical; import static org.hamcrest.MatcherAssert.assertThat; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java index dbbb09f705..bd8369878d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/SingleRowQueryTest.java @@ -1,3 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.opensearch.storage; import static org.hamcrest.MatcherAssert.assertThat; From 8e6ec1bdbc1dbbef31a7b1c331cd24cbfed344df Mon Sep 17 00:00:00 2001 From: YANGDB Date: Mon, 2 Dec 2024 13:16:43 -0800 Subject: [PATCH 18/20] update appendonly with overwrite for test names Signed-off-by: YANGDB --- .../opensearch/sql/planner/logical/LogicalPlanDSL.java | 4 ++-- .../opensearch/sql/planner/physical/PhysicalPlanDSL.java | 4 ++-- docs/user/ppl/cmd/lookup.rst | 8 ++++---- .../test/java/org/opensearch/sql/ppl/LookupCommandIT.java | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index f2dfdfe661..2ab7aeb533 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -143,8 +143,8 @@ public static LogicalPlan lookup( LogicalPlan input, String indexName, Map matchFieldMap, - Boolean appendOnly, + Boolean overwrite, Map copyFields) { - return new LogicalLookup(input, indexName, matchFieldMap, appendOnly, copyFields); + return new LogicalLookup(input, indexName, matchFieldMap, overwrite, copyFields); } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index acb7df9f0b..ed95cd8dbd 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -88,11 +88,11 @@ public static LookupOperator lookup( PhysicalPlan input, String indexName, Map matchFieldMap, - Boolean appendOnly, + Boolean overwrite, Map copyFieldMap, BiFunction, Map> lookupFunction) { return new LookupOperator( - input, indexName, matchFieldMap, appendOnly, copyFieldMap, lookupFunction); + input, indexName, matchFieldMap, overwrite, copyFieldMap, lookupFunction); } public WindowOperator window( diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst index 03cb089b5c..3e64fd46f1 100644 --- a/docs/user/ppl/cmd/lookup.rst +++ b/docs/user/ppl/cmd/lookup.rst @@ -15,12 +15,12 @@ Description Syntax ============ -lookup [AS ] ["," [AS ]]... [appendonly=] [ [AS ]] ["," [AS ]]... +lookup [AS ] ["," [AS ]]... [overwrite=] [ [AS ]] ["," [AS ]]... * lookup-index: mandatory. the name of the lookup index. If more than one is provided, all of them must match. * lookup-field: mandatory. the name of the lookup field. Must be existing in the lookup-index. It is used to match to a local field (in the current search) to get the lookup document. When there is no lookup document matching it is a no-op. If there is more than one an exception is thrown. * local-lookup-field: optional. the name of a field in the current search to match against the lookup-field. **Default:** value of lookup-field. -* appendonly: optional. indicates if the values to copy over to the search result from the lookup document should overwrite existing values. If true no existing values are overwritten. **Default:** false. +* overwrite: optional. indicates if the values to copy over to the search result from the lookup document should overwrite existing values. If true no existing values are overwritten. **Default:** false. * source-field: optional. the fields to copy over from the lookup document to the search result. If no such fields are given all fields are copied. **Default:** all fields * local-source-field: optional. the final name of the field in the search result (if different from the field name in the lookup document). **Default:** value of source-field. @@ -125,7 +125,7 @@ PPL query:: | 21 | NULL | finance | false | +------------------+--------------+------------+--------+ - os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=true; + os> source=accounts | lookup hr employee_number AS account_number, dep AS department overwrite=true; fetched rows / total rows = 4/4 +------------------+----------+------------------+------------+-----------+---------+-----------------+ | account_number | gender | name | department | active | dep | employee_number | @@ -136,7 +136,7 @@ PPL query:: | 21 | F | Mandy Smith | it | NULL | it | 21 | +------------------+----------+------------------+------------+-----------+---------+-----------------+ - os> source=accounts | lookup hr employee_number AS account_number, dep AS department appendonly=false; + os> source=accounts | lookup hr employee_number AS account_number, dep AS department overwrite=false; fetched rows / total rows = 4/4 +------------------+----------+------------------+------------+-----------+---------+-----------------+ | account_number | gender | name | department | active | dep | employee_number | diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java index d6a85f4687..1353cf7ddf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LookupCommandIT.java @@ -170,7 +170,7 @@ public void testLookupSelectedMultipleAttributes() throws IOException { } @Test - public void testLookupShouldAppendOnlyShouldBeFalseByDefault() throws IOException { + public void testLookupShouldOverwriteShouldBeFalseByDefault() throws IOException { JSONObject result = executeQuery( String.format( @@ -212,11 +212,11 @@ public void testLookupShouldAppendOnlyShouldBeFalseByDefault() throws IOExceptio } @Test - public void testLookupWithAppendOnlyFalse() throws IOException { + public void testLookupWithOverwriteFalse() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | rename temperature as vendor | lookup %s did as device-id appendonly =" + "source=%s | rename temperature as vendor | lookup %s did as device-id overwrite =" + " true | sort @timestamp", TEST_INDEX_IOT_READINGS, TEST_INDEX_IOT_SENSORS)); verifyDataRows( From c13192e84d55d1ce96500416d2a65df07c87c711 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Tue, 3 Dec 2024 09:24:56 -0800 Subject: [PATCH 19/20] update :spotlessApply Signed-off-by: YANGDB --- .../org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 1062299a7d..97f9d9723b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -244,8 +244,7 @@ public String visitLookup(Lookup node, String context) { Boolean overwrite = (Boolean) options.get(0).getValue().getValue(); return StringUtils.format( - "%s | lookup %s %s overwrite=%b %s", - child, lookupIndexName, matches, overwrite, copies) + "%s | lookup %s %s overwrite=%b %s", child, lookupIndexName, matches, overwrite, copies) .trim(); } From 37fec28ef85c34ace2a118dbef1c85e4fbb0b199 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Tue, 3 Dec 2024 12:38:03 -0800 Subject: [PATCH 20/20] replace lookup.rst usage of `accounts` index with `account_data` index name Signed-off-by: YANGDB --- docs/user/ppl/cmd/lookup.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst index 3e64fd46f1..75ae380421 100644 --- a/docs/user/ppl/cmd/lookup.rst +++ b/docs/user/ppl/cmd/lookup.rst @@ -33,7 +33,7 @@ The example shows a simple lookup to add the name of a person from a lookup inde PPL query:: - os> source=accounts; + os> source=account_data; fetched rows / total rows = 2/2 +------------------+----------+ | account_number | gender | @@ -51,7 +51,7 @@ PPL query:: | 13 | Alice | +------------------+----------+ - os> source=accounts | lookup hr account_number; + os> source=account_data | lookup hr account_number; fetched rows / total rows = 2/2 +------------------+----------+----------+ | account_number | gender | name | @@ -68,7 +68,7 @@ The example show a lookup to add the name of a person from a lookup index with d PPL query:: - os> source=accounts; + os> source=account_data; fetched rows / total rows = 2/2 +------------------+----------+ | account_number | gender | @@ -86,7 +86,7 @@ PPL query:: | 13 | Alice | +------------------+----------+ - os> source=accounts | lookup hr employee_number AS account_number name AS given_name; + os> source=account_data | lookup hr employee_number AS account_number name AS given_name; fetched rows / total rows = 2/2 +------------------+----------+----------------+ | account_number | gender | given_name | @@ -102,7 +102,7 @@ The example show a lookup to add the name of a person from a lookup index with d PPL query:: - os> source=accounts; + os> source=account_data; fetched rows / total rows = 4/4 +------------------+----------+------------+------------------+ | account_number | gender | department | name | @@ -125,7 +125,7 @@ PPL query:: | 21 | NULL | finance | false | +------------------+--------------+------------+--------+ - os> source=accounts | lookup hr employee_number AS account_number, dep AS department overwrite=true; + os> source=account_data | lookup hr employee_number AS account_number, dep AS department overwrite=true; fetched rows / total rows = 4/4 +------------------+----------+------------------+------------+-----------+---------+-----------------+ | account_number | gender | name | department | active | dep | employee_number | @@ -136,7 +136,7 @@ PPL query:: | 21 | F | Mandy Smith | it | NULL | it | 21 | +------------------+----------+------------------+------------+-----------+---------+-----------------+ - os> source=accounts | lookup hr employee_number AS account_number, dep AS department overwrite=false; + os> source=account_data | lookup hr employee_number AS account_number, dep AS department overwrite=false; fetched rows / total rows = 4/4 +------------------+----------+------------------+------------+-----------+---------+-----------------+ | account_number | gender | name | department | active | dep | employee_number |