From b44dad7883da0e40297bc3c70ea79c94a0d709e1 Mon Sep 17 00:00:00 2001 From: currantw Date: Mon, 16 Dec 2024 08:48:38 -0800 Subject: [PATCH] Initial implementation --- docs/ppl-lang/PPL-Example-Commands.md | 2 +- docs/ppl-lang/ppl-parse-command.md | 2 +- .../spark/ppl/FlintSparkPPLParseITSuite.scala | 2 +- .../src/main/antlr4/OpenSearchPPLLexer.g4 | 7 ------- .../src/main/antlr4/OpenSearchPPLParser.g4 | 14 +------------- .../sql/ppl/parser/AstExpressionBuilder.java | 12 +++--------- .../opensearch/sql/ppl/utils/ArgumentFactory.java | 11 +---------- .../PPLLogicalPlanParseTranslatorTestSuite.scala | 11 +++++++---- 8 files changed, 15 insertions(+), 46 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 5a61992de..0afee9255 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -274,7 +274,7 @@ source = table | where ispresent(a) | - `source=accounts | parse email '.+@(?.+)' | stats count() by host` - `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` - `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` -- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` +- `source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort streetNumber | fields streetNumber, street` - Limitation: [see limitations](ppl-parse-command.md#limitations) #### **Grok** diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md index 0e000756e..434ee07c2 100644 --- a/docs/ppl-lang/ppl-parse-command.md +++ b/docs/ppl-lang/ppl-parse-command.md @@ -58,7 +58,7 @@ The example shows how to sort street numbers that are higher than 500 in ``addre PPL query: - os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; + os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort streetNumber | fields streetNumber, street ; fetched rows / total rows = 3/3 +----------------+----------------+ | streetNumber | street | diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala index e69999a8e..f091596d0 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala @@ -216,7 +216,7 @@ class FlintSparkPPLParseITSuite test("test parse email & host expressions including cast and sort commands") { val frame = sql(s""" - | source = $testTable| parse street_address '(?\\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street + | source = $testTable| parse street_address '(?\\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort streetNumber | fields streetNumber, street | """.stripMargin) // Retrieve the results val results: Array[Row] = frame.collect() diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index d15f5c8e3..8ebe1f3f3 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -82,13 +82,6 @@ DATASOURCES: 'DATASOURCES'; USING: 'USING'; WITH: 'WITH'; -// FIELD KEYWORDS -AUTO: 'AUTO'; -STR: 'STR'; -IP: 'IP'; -NUM: 'NUM'; - - // FIELDSUMMARY keywords FIELDSUMMARY: 'FIELDSUMMARY'; INCLUDEFIELDS: 'INCLUDEFIELDS'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 2466a3d23..4b170d50d 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -515,15 +515,7 @@ wcFieldList ; sortField - : (PLUS | MINUS)? sortFieldExpression - ; - -sortFieldExpression - : fieldExpression - | AUTO LT_PRTHS fieldExpression RT_PRTHS - | STR LT_PRTHS fieldExpression RT_PRTHS - | IP LT_PRTHS fieldExpression RT_PRTHS - | NUM LT_PRTHS fieldExpression RT_PRTHS + : (PLUS | MINUS)? fieldExpression ; fieldExpression @@ -1095,10 +1087,6 @@ keywordsCanBeId | INDEX | DESC | DATASOURCES - | AUTO - | STR - | IP - | NUM | FROM | PATTERN | NEW_FIELD diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 1fe57d13e..590d547a2 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -45,8 +45,6 @@ import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; import org.opensearch.sql.ast.expression.subquery.InSubquery; import org.opensearch.sql.ast.expression.subquery.ScalarSubquery; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.utils.ArgumentFactory; @@ -58,7 +56,6 @@ import java.util.stream.IntStream; import java.util.stream.Stream; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.EQUAL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LENGTH; @@ -80,7 +77,7 @@ public class AstExpressionBuilder extends OpenSearchPPLParserBaseVisitor timestampFunctionArguments( OpenSearchPPLParser.TimestampFunctionCallContext ctx) { - List args = - Arrays.asList( + return Arrays.asList( new Literal(ctx.timestampFunction().simpleDateTimePart().getText(), DataType.STRING), visitFunctionArg(ctx.timestampFunction().firstArg), visitFunctionArg(ctx.timestampFunction().secondArg)); - return args; } private QualifiedName visitIdentifiers(List ctx) { diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 43f696bcd..255147190 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -82,16 +82,7 @@ public static List getArgumentList(OpenSearchPPLParser.SortFieldContex return Arrays.asList( ctx.MINUS() != null ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN)), - ctx.sortFieldExpression().AUTO() != null - ? new Argument("type", new Literal("auto", DataType.STRING)) - : ctx.sortFieldExpression().IP() != null - ? new Argument("type", new Literal("ip", DataType.STRING)) - : ctx.sortFieldExpression().NUM() != null - ? new Argument("type", new Literal("num", DataType.STRING)) - : ctx.sortFieldExpression().STR() != null - ? new Argument("type", new Literal("str", DataType.STRING)) - : new Argument("type", new Literal(null, DataType.NULL))); + : new Argument("asc", new Literal(true, DataType.BOOLEAN))); } /** diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala index 1d00b9484..61f02cae3 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala @@ -13,9 +13,10 @@ import org.scalatest.matchers.should.Matchers import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.ScalaReflection.universe.Star import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} -import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Cast, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Project, Sort} +import org.apache.spark.sql.types.{DataType, IntegerType} class PPLLogicalPlanParseTranslatorTestSuite extends SparkFunSuite @@ -120,13 +121,13 @@ class PPLLogicalPlanParseTranslatorTestSuite assert(compareByString(expectedPlan) === compareByString(logPlan)) } - test("test parse email & host expressions including cast and sort commands") { + test("test parse street number & address expressions including cast and sort commands") { val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, - "source=t | parse address '(?\\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street"), + "source=t | parse address '(?\\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort streetNumber | fields streetNumber, street"), context) val addressAttribute = UnresolvedAttribute("address") @@ -147,13 +148,15 @@ class PPLLogicalPlanParseTranslatorTestSuite Literal("2")), "street")() + val castExpression = Cast(streetNumberAttribute, IntegerType) + val expectedPlan = Project( Seq(streetNumberAttribute, streetAttribute), Sort( Seq(SortOrder(streetNumberAttribute, Ascending, NullsFirst, Seq.empty)), global = true, Filter( - GreaterThan(streetNumberAttribute, Literal(500)), + GreaterThan(castExpression, Literal(500)), Project( Seq(addressAttribute, streetNumberExpression, streetExpression, UnresolvedStar(None)), UnresolvedRelation(Seq("t"))))))