From 6a265608ce8473882103a713f0ba39de90975004 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 7 Oct 2024 10:44:52 +0200 Subject: [PATCH] Implement isblank Signed-off-by: Hendrik Saly --- docs/ppl-lang/PPL-Example-Commands.md | 2 + docs/ppl-lang/ppl-eval-command.md | 1 + docs/ppl-lang/ppl-where-command.md | 1 + .../FlintSparkPPLBuiltinFunctionITSuite.scala | 90 +++++++++++++++++++ .../src/main/antlr4/OpenSearchPPLLexer.g4 | 1 + .../src/main/antlr4/OpenSearchPPLParser.g4 | 3 +- 6 files changed, 97 insertions(+), 1 deletion(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index fbe5f6ace..4395f7e23 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -37,6 +37,7 @@ _- **Limitation: new field added by eval command with a function cannot be dropp - `source = table | where ispresent(b)` - `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3` - `source = table | where isempty(a)` +- `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` ```sql @@ -82,6 +83,7 @@ Assumptions: `a`, `b`, `c` are existing fields in `table` - `source = table | eval f = ispresent(a)` - `source = table | eval r = coalesce(a, b, c) | fields r` - `source = table | eval e = isempty(a) | fields e` +- `source = table | eval e = isblank(a) | fields e` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md index aa86220db..cd0898c1b 100644 --- a/docs/ppl-lang/ppl-eval-command.md +++ b/docs/ppl-lang/ppl-eval-command.md @@ -76,6 +76,7 @@ Assumptions: `a`, `b`, `c` are existing fields in `table` - `source = table | eval f = ispresent(a)` - `source = table | eval r = coalesce(a, b, c) | fields r` - `source = table | eval e = isempty(a) | fields e` +- `source = table | eval e = isblank(a) | fields e` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md index f6f069f11..94ddc1f5c 100644 --- a/docs/ppl-lang/ppl-where-command.md +++ b/docs/ppl-lang/ppl-where-command.md @@ -39,6 +39,7 @@ PPL query: - `source = table | where ispresent(b)` - `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3` - `source = table | where isempty(a)` +- `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` - `source = table | eval status_category = diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index be894ad49..4c35549df 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -382,6 +382,96 @@ class FlintSparkPPLBuiltinFunctionITSuite comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) } + test("test string functions - isblank eval") { + val frame = sql(s""" + | source = $testNullTable | head 1 | eval a = isblank('full'), b = isblank(''), c = isblank(' ') | fields a, b, c + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = Array(Row(false, true, true)) + assert(results.sameElements(expectedResults)) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test_null")) + val localLimit = LocalLimit(Literal(1), table) + val globalLimit = GlobalLimit(Literal(1), localLimit) + + // val projectList = Seq(UnresolvedStar(None)) + + val caseOne = CaseWhen( + Seq( + ( + EqualTo( + UnresolvedFunction( + "length", + Seq(UnresolvedFunction("trim", Seq(Literal("full")), isDistinct = false)), + isDistinct = false), + Literal(0)), + Literal(true))), + Literal(false)) + val aliasOne = Alias(caseOne, "a")() + + val caseTwo = CaseWhen( + Seq( + ( + EqualTo( + UnresolvedFunction( + "length", + Seq(UnresolvedFunction("trim", Seq(Literal("")), isDistinct = false)), + isDistinct = false), + Literal(0)), + Literal(true))), + Literal(false)) + val aliasTwo = Alias(caseTwo, "b")() + + val caseThree = CaseWhen( + Seq( + ( + EqualTo( + UnresolvedFunction( + "length", + Seq(UnresolvedFunction("trim", Seq(Literal(" ")), isDistinct = false)), + isDistinct = false), + Literal(0)), + Literal(true))), + Literal(false)) + val aliasThree = Alias(caseThree, "c")() + + val projectList = Seq(UnresolvedStar(None), aliasOne, aliasTwo, aliasThree) + val innerProject = Project(projectList, globalLimit) + + val expectedPlan = Project( + Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b"), UnresolvedAttribute("c")), + innerProject) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + + test("test string functions - isblank where") { + val frame = sql(s""" + | source = $testNullTable | where isblank('I am not blank'); + | """.stripMargin) + val results: Array[Row] = frame.collect() + assert(results.length == 0) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test_null")) + val caseIsEmpty = CaseWhen( + Seq( + ( + EqualTo( + UnresolvedFunction( + "length", + Seq(UnresolvedFunction("trim", Seq(Literal("I am not blank")), isDistinct = false)), + isDistinct = false), + Literal(0)), + Literal(true))), + Literal(false)) + val filterPlan = Filter(caseIsEmpty, table) + val expectedPlan = Project(Seq(UnresolvedStar(None)), filterPlan) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + test("test math functions - abs with field") { val frame = sql(s""" | source = $testTable |where abs(age) = 30 | fields name, age diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 7af3e2109..64eaf415d 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -350,6 +350,7 @@ REPLACE: 'REPLACE'; REVERSE: 'REVERSE'; CAST: 'CAST'; ISEMPTY: 'ISEMPTY'; +ISBLANK: 'ISBLANK'; // BOOL FUNCTIONS LIKE: 'LIKE'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 12ec4ed26..06b3166f0 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -376,7 +376,7 @@ booleanExpression ; isEmptyExpression - : ISEMPTY LT_PRTHS functionArg RT_PRTHS + : (ISEMPTY | ISBLANK) LT_PRTHS functionArg RT_PRTHS ; caseFunction @@ -745,6 +745,7 @@ textFunctionName | REPLACE | REVERSE | ISEMPTY + | ISBLANK ; positionFunctionName