From 21af1a9228f797ff4c620a95cbd39a96e89d1f6b Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Sun, 6 Oct 2024 19:09:51 +0200 Subject: [PATCH 01/15] Implement between Signed-off-by: Hendrik Saly --- .../src/main/antlr4/OpenSearchPPLLexer.g4 | 1 + .../src/main/antlr4/OpenSearchPPLParser.g4 | 1 + .../opensearch/sql/ppl/CatalystQueryPlanVisitor.java | 12 ++++++++++++ .../sql/ppl/parser/AstExpressionBuilder.java | 7 +++++++ 4 files changed, 21 insertions(+) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 3b5f605df..82fdeb42f 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -392,6 +392,7 @@ LIKE: 'LIKE'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; ISPRESENT: 'ISPRESENT'; +BETWEEN: 'BETWEEN'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 12aa1332d..d1213de2a 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -402,6 +402,7 @@ logicalExpression | left = logicalExpression OR right = logicalExpression # logicalOr | left = logicalExpression XOR right = logicalExpression # logicalXor | booleanExpression # booleanExpr + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between ; comparisonExpression diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java index fabc42580..441287ddb 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java @@ -16,7 +16,9 @@ import org.apache.spark.sql.catalyst.expressions.Exists$; import org.apache.spark.sql.catalyst.expressions.Expression; import org.apache.spark.sql.catalyst.expressions.In$; +import org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual; import org.apache.spark.sql.catalyst.expressions.InSubquery$; +import org.apache.spark.sql.catalyst.expressions.LessThanOrEqual; import org.apache.spark.sql.catalyst.expressions.ListQuery$; import org.apache.spark.sql.catalyst.expressions.NamedExpression; import org.apache.spark.sql.catalyst.expressions.Predicate; @@ -35,6 +37,7 @@ import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Between; import org.opensearch.sql.ast.expression.BinaryExpression; import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Compare; @@ -867,5 +870,14 @@ public Expression visitExistsSubquery(ExistsSubquery node, CatalystPlanContext c Option.empty()); return context.getNamedParseExpressions().push(existsSubQuery); } + + @Override + public Expression visitBetween(Between node, CatalystPlanContext context) { + Expression value = analyze(node.getValue(), context); + Expression lower = analyze(node.getLowerBound(), context); + Expression upper = analyze(node.getUpperBound(), context); + context.retainAllNamedParseExpressions(p -> p); + return context.getNamedParseExpressions().push(new org.apache.spark.sql.catalyst.expressions.And(new GreaterThanOrEqual(value, lower), new LessThanOrEqual(value, upper))); + } } } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 20c55a401..6a0c80c16 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -17,6 +17,7 @@ import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.AttributeList; +import org.opensearch.sql.ast.expression.Between; import org.opensearch.sql.ast.expression.Case; import org.opensearch.sql.ast.expression.Compare; import org.opensearch.sql.ast.expression.DataType; @@ -277,6 +278,12 @@ public UnresolvedExpression visitConvertedDataType(OpenSearchPPLParser.Converted return new Literal(ctx.getText(), DataType.STRING); } + @Override + public UnresolvedExpression visitBetween(OpenSearchPPLParser.BetweenContext ctx) { + UnresolvedExpression betweenExpr = new Between(visit(ctx.expr1),visit(ctx.expr2),visit(ctx.expr3)); + return ctx.NOT() != null ? new Not(betweenExpr) : betweenExpr; + } + private Function buildFunction( String functionName, List args) { return new Function( From 98d68fe21373062b180d5532f8a2f86666a2ec8a Mon Sep 17 00:00:00 2001 From: Jens Schmidt Date: Wed, 16 Oct 2024 07:06:59 +0200 Subject: [PATCH 02/15] add integration test for between command to the straight and NOT usage Signed-off-by: Jens Schmidt --- .../ppl/FlintSparkPPLBetweenITSuite.scala | 65 +++++++++++++++++++ ...BetweenExpressionTranslatorTestSuite.scala | 55 ++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala create mode 100644 ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala new file mode 100644 index 000000000..ae142d944 --- /dev/null +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.flint.spark.ppl + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.streaming.StreamTest + +class FlintSparkPPLBetweenITSuite + extends QueryTest + with LogicalPlanTestUtils + with FlintPPLSuite + with StreamTest { + + /** Test table and index name */ + private val testTable = "spark_catalog.default.flint_ppl_test" + + override def beforeAll(): Unit = { + super.beforeAll() + + // Create test table + createPartitionedStateCountryTable(testTable) + } + + protected override def afterEach(): Unit = { + super.afterEach() + // Stop all streaming jobs if any + spark.streams.active.foreach { job => + job.stop() + job.awaitTermination() + } + } + + test("test between should return records between two values") { + val frame = sql(s""" + | source = $testTable | where age between 20 and 30 + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 3) + assert(frame.columns.length == 6) + + results.foreach(row => { + val age = row.getAs[Int]("age") + assert(age >= 20 && age <= 30, s"Age $age is not between 20 and 30") + }) + } + + test("test between should return records NOT between two values") { + val frame = sql(s""" + | source = $testTable | where age NOT between 20 and 30 + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 1) + assert(frame.columns.length == 6) + + results.foreach(row => { + val age = row.getAs[Int]("age") + assert(age < 20 || age > 30, s"Age $age is not between 20 and 30") + }) + } +} diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala new file mode 100644 index 000000000..d694ebce7 --- /dev/null +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.flint.spark.ppl + +import org.opensearch.flint.spark.ppl.PlaneUtils.plan +import org.opensearch.sql.ppl.{CatalystPlanContext, CatalystQueryPlanVisitor} +import org.scalatest.matchers.should.Matchers + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.expressions.{And, GreaterThanOrEqual, LessThanOrEqual, Literal} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ + +class PPLLogicalPlanBetweenExpressionTranslatorTestSuite + extends SparkFunSuite + with PlanTest + with LogicalPlanTestUtils + with Matchers { + + private val planTransformer = new CatalystQueryPlanVisitor() + private val pplParser = new PPLSyntaxParser() + + test("test between expression") { + // if successful build ppl logical plan and translate to catalyst logical plan + val context = new CatalystPlanContext + val logPlan = { + planTransformer.visit( + plan( + pplParser, + "source = table | where datetime_field >= '2024-09-10' and datetime_field <= '2024-09-15'"), + context) + } + // SQL: SELECT * FROM table WHERE datetime_field BETWEEN '2024-09-10' AND '2024-09-15' + val star = Seq(UnresolvedStar(None)) + + val datetime_field = UnresolvedAttribute("datetime_field") + val tableRelation = UnresolvedRelation(Seq("table")) + + val lowerBound = Literal("2024-09-10") + val upperBound = Literal("2024-09-15") + val betweenCondition = And( + GreaterThanOrEqual(datetime_field, lowerBound), + LessThanOrEqual(datetime_field, upperBound)) + + val filterPlan = Filter(betweenCondition, tableRelation) + val expectedPlan = Project(star, filterPlan) + + comparePlans(expectedPlan, logPlan, false) + } + +} From 08acaa2fd629d768b0329875eaebda845561f240 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Wed, 16 Oct 2024 08:29:56 +0200 Subject: [PATCH 03/15] Add docs Signed-off-by: Hendrik Saly --- docs/ppl-lang/PPL-Example-Commands.md | 2 ++ docs/ppl-lang/ppl-where-command.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 9ca6cf258..e4707efb5 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -56,6 +56,8 @@ _- **Limitation: new field added by eval command with a function cannot be dropp - `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` - `source = table | where a not in (1, 2, 3) | fields a,b,c` +- `source = table | where a between 1 and 4` +- `source = table | where b not between '2024-09-10' and '2025-09-10'` ```sql source = table | eval status_category = diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md index 94ddc1f5c..b93ef301e 100644 --- a/docs/ppl-lang/ppl-where-command.md +++ b/docs/ppl-lang/ppl-where-command.md @@ -41,6 +41,8 @@ PPL query: - `source = table | where isempty(a)` - `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` +- `source = table | where a between 1 and 4` +- `source = table | where b not between '2024-09-10' and '2025-09-10'` - `source = table | eval status_category = case(a >= 200 AND a < 300, 'Success', From 55965329a06704e3352e31b3e14a047b3bf57720 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Wed, 16 Oct 2024 08:35:59 +0200 Subject: [PATCH 04/15] Add proposed syntax to ppl planning Signed-off-by: Hendrik Saly --- docs/ppl-lang/planning/ppl-between.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/ppl-lang/planning/ppl-between.md diff --git a/docs/ppl-lang/planning/ppl-between.md b/docs/ppl-lang/planning/ppl-between.md new file mode 100644 index 000000000..df68f4086 --- /dev/null +++ b/docs/ppl-lang/planning/ppl-between.md @@ -0,0 +1,20 @@ +## between syntax proposal + +1. **Proposed syntax** + - `... | where expr1 [NOT] BETWEEN expr2 AND expr3` + - evaluate if expr1 is [not] in between expr2 and expr3 + - `... | where a between 1 and 4` + - `... | where b not between '2024-09-10' and '2025-09-10'` + +2. **Proposed impl** + - forward to sparks built-in function of between + +### New syntax definition in ANTLR + +```ANTLR + +logicalExpression + ... + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between + +``` \ No newline at end of file From 62c467db9ac7178071b96961ae321691974068e0 Mon Sep 17 00:00:00 2001 From: Jens Schmidt Date: Wed, 9 Oct 2024 14:08:29 +0200 Subject: [PATCH 05/15] adjust gitignore Signed-off-by: Jens Schmidt --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index bc1705ce3..2e066403e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,10 @@ logs/ *.zip gen/ +/add-opensearch-data.sh +/rebase.sh +/run-opensearch.sh +/run-spark.sh +/sync_fork.sh +/commands.scala +/opensearch-3.0.0/ From 811ab1896c76c7414d3951c8651ea5cc03bb02fe Mon Sep 17 00:00:00 2001 From: Jens Schmidt Date: Wed, 9 Oct 2024 14:09:27 +0200 Subject: [PATCH 06/15] adjust gitignore: add spark-bin Signed-off-by: Jens Schmidt --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2e066403e..713f86318 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ gen/ /sync_fork.sh /commands.scala /opensearch-3.0.0/ +/spark-bin/ From 3d705c58aa920610162e952cb3412cdb3cc350f5 Mon Sep 17 00:00:00 2001 From: Jens Schmidt Date: Wed, 9 Oct 2024 14:14:04 +0200 Subject: [PATCH 07/15] clean .gitignore: remove local adjustments Signed-off-by: Jens Schmidt --- .gitignore | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.gitignore b/.gitignore index 713f86318..bc1705ce3 100644 --- a/.gitignore +++ b/.gitignore @@ -21,11 +21,3 @@ logs/ *.zip gen/ -/add-opensearch-data.sh -/rebase.sh -/run-opensearch.sh -/run-spark.sh -/sync_fork.sh -/commands.scala -/opensearch-3.0.0/ -/spark-bin/ From 84b43b22f8f2847f7eabca03e13c9fabb2593879 Mon Sep 17 00:00:00 2001 From: Jens Schmidt Date: Fri, 18 Oct 2024 10:12:14 +0200 Subject: [PATCH 08/15] update integration test to use between keyword Signed-off-by: Jens Schmidt --- .../PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala index d694ebce7..6defcb766 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBetweenExpressionTranslatorTestSuite.scala @@ -31,7 +31,7 @@ class PPLLogicalPlanBetweenExpressionTranslatorTestSuite planTransformer.visit( plan( pplParser, - "source = table | where datetime_field >= '2024-09-10' and datetime_field <= '2024-09-15'"), + "source = table | where datetime_field between '2024-09-10' and '2024-09-15'"), context) } // SQL: SELECT * FROM table WHERE datetime_field BETWEEN '2024-09-10' AND '2024-09-15' From 2bef42229ada188cbec006a78a3182344e327ded Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 16:01:37 +0100 Subject: [PATCH 09/15] Move to comparisonExpression Signed-off-by: Hendrik Saly --- ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index d1213de2a..18a4c57e0 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -402,12 +402,12 @@ logicalExpression | left = logicalExpression OR right = logicalExpression # logicalOr | left = logicalExpression XOR right = logicalExpression # logicalXor | booleanExpression # booleanExpr - | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between ; comparisonExpression : left = valueExpression comparisonOperator right = valueExpression # compareExpr | valueExpression NOT? IN valueList # inExpr + | expr1 = functionArg BETWEEN expr2 = functionArg AND expr3 = functionArg # between ; valueExpressionList From f95f870e0e25a03a00acf83d1fc5eed5a4389efa Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 16:03:30 +0100 Subject: [PATCH 10/15] Added to keywordsCanBeId Signed-off-by: Hendrik Saly --- ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 | 1 + 1 file changed, 1 insertion(+) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 18a4c57e0..f120fde04 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -1115,4 +1115,5 @@ keywordsCanBeId | FULL | SEMI | ANTI + | BETWEEN ; From 1490db0a0b627f50808464a11ae9742b75b42bfe Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 16:12:29 +0100 Subject: [PATCH 11/15] Update docs Signed-off-by: Hendrik Saly --- docs/ppl-lang/PPL-Example-Commands.md | 4 ++-- docs/ppl-lang/ppl-where-command.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index e4707efb5..82843cd80 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -56,8 +56,8 @@ _- **Limitation: new field added by eval command with a function cannot be dropp - `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` - `source = table | where a not in (1, 2, 3) | fields a,b,c` -- `source = table | where a between 1 and 4` -- `source = table | where b not between '2024-09-10' and '2025-09-10'` +- `source = table | where a between 1 and 4` - Note: This returns a >= 1 and a <= 4, i.e. [1, 4] +- `source = table | where b not between '2024-09-10' and '2025-09-10'` - Note: This returns b >= '2024-09-10' and b <= '2025-09-10' ```sql source = table | eval status_category = diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md index b93ef301e..89a7e61fa 100644 --- a/docs/ppl-lang/ppl-where-command.md +++ b/docs/ppl-lang/ppl-where-command.md @@ -41,8 +41,8 @@ PPL query: - `source = table | where isempty(a)` - `source = table | where isblank(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` -- `source = table | where a between 1 and 4` -- `source = table | where b not between '2024-09-10' and '2025-09-10'` +- `source = table | where a between 1 and 4` - Note: This returns a >= 1 and a <= 4, i.e. [1, 4] +- `source = table | where b not between '2024-09-10' and '2025-09-10'` - Note: This returns b >= '2024-09-10' and b <= '2025-09-10' - `source = table | eval status_category = case(a >= 200 AND a < 300, 'Success', From a209f3138b803c4eea6e4f31f13042ca5fd415b9 Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 16:25:08 +0100 Subject: [PATCH 12/15] Add additional tests Signed-off-by: Hendrik Saly --- .../ppl/FlintSparkPPLBetweenITSuite.scala | 58 ++++++++++++++++++- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala index ae142d944..bbf532304 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala @@ -16,12 +16,14 @@ class FlintSparkPPLBetweenITSuite /** Test table and index name */ private val testTable = "spark_catalog.default.flint_ppl_test" + private val timeSeriesTestTable = "spark_catalog.default.flint_ppl_timeseries_test" override def beforeAll(): Unit = { super.beforeAll() - // Create test table + // Create test tables createPartitionedStateCountryTable(testTable) + createTimeSeriesTable(timeSeriesTestTable) } protected override def afterEach(): Unit = { @@ -33,7 +35,7 @@ class FlintSparkPPLBetweenITSuite } } - test("test between should return records between two values") { + test("test between should return records between two integer values") { val frame = sql(s""" | source = $testTable | where age between 20 and 30 | """.stripMargin) @@ -48,7 +50,22 @@ class FlintSparkPPLBetweenITSuite }) } - test("test between should return records NOT between two values") { + test("test between should return records between two integer computed values") { + val frame = sql(s""" + | source = $testTable | where age between 20 + 1 and 30 - 1 + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 3) + assert(frame.columns.length == 6) + + results.foreach(row => { + val age = row.getAs[Int]("age") + assert(age >= 20 && age <= 30, s"Age $age is not between 20 and 30") + }) + } + + test("test between should return records NOT between two integer values") { val frame = sql(s""" | source = $testTable | where age NOT between 20 and 30 | """.stripMargin) @@ -62,4 +79,39 @@ class FlintSparkPPLBetweenITSuite assert(age < 20 || age > 30, s"Age $age is not between 20 and 30") }) } + + test("test between should return records between two date values") { + val frame = sql(s""" + | source = $timeSeriesTestTable | where timestamp between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 2) + assert(frame.columns.length == 4) + + results.foreach(row => { + val ts = row.getAs[String]("timestamp") + assert( + ts >= "2023-10-01 00:01:00" && ts <= "2023-10-01 00:01:00", + s"Timestamp $ts is not between '2023-10-01 00:01:00' and '2023-10-01 00:10:00'") + }) + } + + test("test between should return records NOT between two date values") { + val frame = sql(s""" + | source = $timeSeriesTestTable | where timestamp NOT between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 4) + assert(frame.columns.length == 4) + + results.foreach(row => { + val ts = row.getAs[String]("timestamp") + assert( + ts < "2023-10-01 00:01:00" || ts > "2023-10-01 00:01:00", + s"Timestamp $ts is not between '2023-10-01 00:01:00' and '2023-10-01 00:10:00'") + }) + + } } From fadd2d28a879afe7420c05e06b754fd022b5c06e Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 16:39:22 +0100 Subject: [PATCH 13/15] Move to comparisonExpression -2- Signed-off-by: Hendrik Saly --- .../org/opensearch/sql/ppl/parser/AstExpressionBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 6a0c80c16..5a196f311 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -281,7 +281,7 @@ public UnresolvedExpression visitConvertedDataType(OpenSearchPPLParser.Converted @Override public UnresolvedExpression visitBetween(OpenSearchPPLParser.BetweenContext ctx) { UnresolvedExpression betweenExpr = new Between(visit(ctx.expr1),visit(ctx.expr2),visit(ctx.expr3)); - return ctx.NOT() != null ? new Not(betweenExpr) : betweenExpr; + return betweenExpr; } private Function buildFunction( From f8be3a7ef90ed5d3a2ab618eee9fa3ff0d06af8d Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 19:39:11 +0100 Subject: [PATCH 14/15] Fix docs Signed-off-by: Hendrik Saly --- docs/ppl-lang/planning/ppl-between.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/ppl-lang/planning/ppl-between.md b/docs/ppl-lang/planning/ppl-between.md index df68f4086..6c8e300e8 100644 --- a/docs/ppl-lang/planning/ppl-between.md +++ b/docs/ppl-lang/planning/ppl-between.md @@ -3,11 +3,8 @@ 1. **Proposed syntax** - `... | where expr1 [NOT] BETWEEN expr2 AND expr3` - evaluate if expr1 is [not] in between expr2 and expr3 - - `... | where a between 1 and 4` - - `... | where b not between '2024-09-10' and '2025-09-10'` - -2. **Proposed impl** - - forward to sparks built-in function of between + - `... | where a between 1 and 4` - Note: This returns a >= 1 and a <= 4, i.e. [1, 4] + - `... | where b not between '2024-09-10' and '2025-09-10'` - Note: This returns b >= '2024-09-10' and b <= '2025-09-10' ### New syntax definition in ANTLR From a0f6af825f20d2d2a13a70f5068ccdf4e16b41dc Mon Sep 17 00:00:00 2001 From: Hendrik Saly Date: Mon, 28 Oct 2024 20:04:17 +0100 Subject: [PATCH 15/15] Added IT tests Signed-off-by: Hendrik Saly --- .../ppl/FlintSparkPPLBetweenITSuite.scala | 37 ++++++++++++++----- .../src/main/antlr4/OpenSearchPPLParser.g4 | 2 +- .../sql/ppl/parser/AstExpressionBuilder.java | 2 +- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala index bbf532304..ce0be1409 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBetweenITSuite.scala @@ -5,6 +5,8 @@ package org.opensearch.flint.spark.ppl +import java.sql.Timestamp + import org.apache.spark.sql.QueryTest import org.apache.spark.sql.streaming.StreamTest @@ -56,12 +58,12 @@ class FlintSparkPPLBetweenITSuite | """.stripMargin) val results = frame.collect() - assert(results.length == 3) + assert(results.length == 1) assert(frame.columns.length == 6) results.foreach(row => { val age = row.getAs[Int]("age") - assert(age >= 20 && age <= 30, s"Age $age is not between 20 and 30") + assert(age >= 21 && age <= 29, s"Age $age is not between 21 and 29") }) } @@ -80,9 +82,24 @@ class FlintSparkPPLBetweenITSuite }) } + test("test between should return records where NOT between two integer values") { + val frame = sql(s""" + | source = $testTable | where NOT age between 20 and 30 + | """.stripMargin) + + val results = frame.collect() + assert(results.length == 1) + assert(frame.columns.length == 6) + + results.foreach(row => { + val age = row.getAs[Int]("age") + assert(age < 20 || age > 30, s"Age $age is not between 20 and 30") + }) + } + test("test between should return records between two date values") { val frame = sql(s""" - | source = $timeSeriesTestTable | where timestamp between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' + | source = $timeSeriesTestTable | where time between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' | """.stripMargin) val results = frame.collect() @@ -90,26 +107,28 @@ class FlintSparkPPLBetweenITSuite assert(frame.columns.length == 4) results.foreach(row => { - val ts = row.getAs[String]("timestamp") + val ts = row.getAs[Timestamp]("time") assert( - ts >= "2023-10-01 00:01:00" && ts <= "2023-10-01 00:01:00", + !ts.before(Timestamp.valueOf("2023-10-01 00:01:00")) || !ts.after( + Timestamp.valueOf("2023-10-01 00:01:00")), s"Timestamp $ts is not between '2023-10-01 00:01:00' and '2023-10-01 00:10:00'") }) } test("test between should return records NOT between two date values") { val frame = sql(s""" - | source = $timeSeriesTestTable | where timestamp NOT between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' + | source = $timeSeriesTestTable | where time NOT between '2023-10-01 00:01:00' and '2023-10-01 00:10:00' | """.stripMargin) val results = frame.collect() - assert(results.length == 4) + assert(results.length == 3) assert(frame.columns.length == 4) results.foreach(row => { - val ts = row.getAs[String]("timestamp") + val ts = row.getAs[Timestamp]("time") assert( - ts < "2023-10-01 00:01:00" || ts > "2023-10-01 00:01:00", + ts.before(Timestamp.valueOf("2023-10-01 00:01:00")) || ts.after( + Timestamp.valueOf("2023-10-01 00:01:00")), s"Timestamp $ts is not between '2023-10-01 00:01:00' and '2023-10-01 00:10:00'") }) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index f120fde04..48984b3a5 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -407,7 +407,7 @@ logicalExpression comparisonExpression : left = valueExpression comparisonOperator right = valueExpression # compareExpr | valueExpression NOT? IN valueList # inExpr - | expr1 = functionArg BETWEEN expr2 = functionArg AND expr3 = functionArg # between + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between ; valueExpressionList diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 5a196f311..6a0c80c16 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -281,7 +281,7 @@ public UnresolvedExpression visitConvertedDataType(OpenSearchPPLParser.Converted @Override public UnresolvedExpression visitBetween(OpenSearchPPLParser.BetweenContext ctx) { UnresolvedExpression betweenExpr = new Between(visit(ctx.expr1),visit(ctx.expr2),visit(ctx.expr3)); - return betweenExpr; + return ctx.NOT() != null ? new Not(betweenExpr) : betweenExpr; } private Function buildFunction(