From 6a020581414b10fc84679079b460a839d5677430 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 12 Sep 2024 19:59:48 +0000 Subject: [PATCH] ispresent implemented as function (#651) Signed-off-by: Lukasz Soszynski (cherry picked from commit 4563bda70d6a6bf55f532e9e468156cba135334d) Signed-off-by: github-actions[bot] --- .../FlintSparkPPLBuiltinFunctionITSuite.scala | 23 ++++++++++++++++++- ppl-spark-integration/README.md | 2 ++ .../src/main/antlr4/OpenSearchPPLLexer.g4 | 1 + .../src/main/antlr4/OpenSearchPPLParser.g4 | 1 + .../function/BuiltinFunctionName.java | 1 + .../sql/ppl/parser/AstExpressionBuilder.java | 1 + .../ppl/utils/BuiltinFunctionTranslator.java | 1 + ...anStringFunctionsTranslatorTestSuite.scala | 15 ++++++++++++ 8 files changed, 44 insertions(+), 1 deletion(-) diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index c9bf8a926..152d3f003 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -475,7 +475,7 @@ class FlintSparkPPLBuiltinFunctionITSuite assert(results.sorted.sameElements(expectedResults.sorted)) } - test("test boolean condition functions - isnull isnotnull ifnull nullif") { + test("test boolean condition functions - isnull isnotnull ifnull nullif ispresent") { val frameIsNull = sql(s""" | source = $testNullTable | where isnull(name) | fields age | """.stripMargin) @@ -513,6 +513,27 @@ class FlintSparkPPLBuiltinFunctionITSuite val expectedResults4: Array[Row] = Array(Row("John", 25), Row("Jane", null), Row(null, 10), Row("Jake", 70), Row("Hello", 30)) assert(results4.sameElements(expectedResults4)) + + val frameIsPresent = sql(s""" + | source = $testNullTable | where ispresent(name) | fields name + | """.stripMargin) + + val results5: Array[Row] = frameIsPresent.collect() + val expectedResults5: Array[Row] = Array(Row("John"), Row("Jane"), Row("Jake"), Row("Hello")) + assert(results5.sameElements(expectedResults5)) + + val frameEvalIsPresent = sql(s""" + | source = $testNullTable | eval hasName = ispresent(name) | fields name, hasName + | """.stripMargin) + + val results6: Array[Row] = frameEvalIsPresent.collect() + val expectedResults6: Array[Row] = Array( + Row("John", true), + Row("Jane", true), + Row(null, false), + Row("Jake", true), + Row("Hello", true)) + assert(results6.sameElements(expectedResults6)) } test("test typeof function") { diff --git a/ppl-spark-integration/README.md b/ppl-spark-integration/README.md index c31171127..fa668041d 100644 --- a/ppl-spark-integration/README.md +++ b/ppl-spark-integration/README.md @@ -242,6 +242,7 @@ See the next samples of PPL queries : - `source = table | where a < 1 | fields a,b,c` - `source = table | where b != 'test' | fields a,b,c` - `source = table | where c = 'test' | fields a,b,c | head 3` + - `source = table where ispresent(b)` **Filters With Logical Conditions** - `source = table | where c = 'test' AND a = 1 | fields a,b,c` @@ -259,6 +260,7 @@ Assumptions: `a`, `b`, `c` are existing fields in `table` - `source = table | eval f = a * 2 | eval h = f * 2 | fields a,f,h` - `source = table | eval f = a * 2, h = f * 2 | fields a,f,h` (Spark 3.4.0+ required) - `source = table | eval f = a * 2, h = b | stats avg(f) by h` + - `source = table | eval f = ispresent(a)` Limitation: Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" - `source = table | eval a = 10 | fields a,b,c` diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index d202f5ff6..fc3e23514 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -331,6 +331,7 @@ CAST: 'CAST'; LIKE: 'LIKE'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; +ISPRESENT: 'ISPRESENT'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index f4065be6d..ae313fdc5 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -626,6 +626,7 @@ conditionFunctionBase | ISNOTNULL | IFNULL | NULLIF + | ISPRESENT ; systemFunctionName diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index eb22164b9..bcb805c25 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -205,6 +205,7 @@ public enum BuiltinFunctionName { IF(FunctionName.of("if")), NULLIF(FunctionName.of("nullif")), ISNULL(FunctionName.of("isnull")), + ISPRESENT(FunctionName.of("ispresent")), ROW_NUMBER(FunctionName.of("row_number")), RANK(FunctionName.of("rank")), diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 352853398..b957ae357 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -61,6 +61,7 @@ public class AstExpressionBuilder extends OpenSearchPPLParserBaseVisitor() .put("isnull", IS_NULL.getName().getFunctionName()) .put("isnotnull", IS_NOT_NULL.getName().getFunctionName()) + .put("ispresent", IS_NOT_NULL.getName().getFunctionName()) .build(); @Override diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java index 53c6673a8..cb2246db8 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java @@ -66,6 +66,7 @@ public interface BuiltinFunctionTranslator { //condition functions .put(IS_NULL, "isnull") .put(IS_NOT_NULL, "isnotnull") + .put(BuiltinFunctionName.ISPRESENT, "isnotnull") .build(); static Expression builtinFunction(org.opensearch.sql.ast.expression.Function function, List args) { diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanStringFunctionsTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanStringFunctionsTranslatorTestSuite.scala index 0d3c12b82..fd6fd8866 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanStringFunctionsTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanStringFunctionsTranslatorTestSuite.scala @@ -222,4 +222,19 @@ class PPLLogicalPlanStringFunctionsTranslatorTestSuite val expectedPlan = Project(projectList, filterPlan) comparePlans(expectedPlan, logPlan, false) } + + test("test ispresent") { + val context = new CatalystPlanContext + val logPlan = + planTransformer.visit(plan(pplParser, "source=t a = ispresent(b)", false), context) + + val table = UnresolvedRelation(Seq("t")) + val filterExpr = EqualTo( + UnresolvedAttribute("a"), + UnresolvedFunction("isnotnull", seq(UnresolvedAttribute("b")), isDistinct = false)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedStar(None)) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(expectedPlan, logPlan, false) + } }