From a5f35bc8cb22fc2da25f7e5056ec96cf11431958 Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 19 Dec 2024 08:32:40 -0800 Subject: [PATCH] Revert changes, and update to mark syntax as deprecated and add TODOs. Signed-off-by: currantw --- docs/ppl-lang/PPL-Example-Commands.md | 3 ++- docs/ppl-lang/ppl-parse-command.md | 4 +++- .../spark/ppl/FlintSparkPPLParseITSuite.scala | 2 +- .../src/main/antlr4/OpenSearchPPLLexer.g4 | 13 ++++++++--- .../src/main/antlr4/OpenSearchPPLParser.g4 | 22 ++++++++++++++++--- .../sql/ppl/parser/AstExpressionBuilder.java | 4 +++- .../sql/ppl/utils/ArgumentFactory.java | 11 +++++++++- ...LLogicalPlanParseTranslatorTestSuite.scala | 15 ++++++++----- 8 files changed, 57 insertions(+), 17 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 0450d05a4..779212fd8 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -274,7 +274,8 @@ source = table | where ispresent(a) | - `source=accounts | parse email '.+@(?.+)' | stats count() by host` - `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` - `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` -- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort streetNumber | fields streetNumber, street` +- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` + - **Note**: The `sort num` syntax above is deprecated. To sort numerically, cast to a numerical data type - e.g. `sort cast(streetNumber as integer)`. See [#963](https://github.com/opensearch-project/opensearch-spark/issues/963) for more details. - Limitation: [see limitations](ppl-parse-command.md#limitations) #### **Grok** diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md index 434ee07c2..6100564e7 100644 --- a/docs/ppl-lang/ppl-parse-command.md +++ b/docs/ppl-lang/ppl-parse-command.md @@ -58,7 +58,7 @@ The example shows how to sort street numbers that are higher than 500 in ``addre PPL query: - os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort streetNumber | fields streetNumber, street ; + os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; fetched rows / total rows = 3/3 +----------------+----------------+ | streetNumber | street | @@ -68,6 +68,8 @@ PPL query: | 880 | Holmes Lane | +----------------+----------------+ +**Note**: The `sort num` syntax is deprecated. To sort numerically, cast to a numerical data type - e.g. `sort cast(streetNumber as integer)`. See [#963](https://github.com/opensearch-project/opensearch-spark/issues/963) for more details. + ### Limitations There are a few limitations with parse command: diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala index 5d38ad32d..e69999a8e 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLParseITSuite.scala @@ -216,7 +216,7 @@ class FlintSparkPPLParseITSuite test("test parse email & host expressions including cast and sort commands") { val frame = sql(s""" - | source = $testTable| parse street_address '(?\\d+) (?.+)' | where streetNumber > 500 | sort streetNumber | fields streetNumber, street + | source = $testTable| parse street_address '(?\\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street | """.stripMargin) // Retrieve the results val results: Array[Row] = frame.collect() diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index eccf07ae9..6c72d5a56 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -82,6 +82,13 @@ DATASOURCES: 'DATASOURCES'; USING: 'USING'; WITH: 'WITH'; +// SORT FIELD KEYWORDS +// TODO #963: Remove unimplemented sort syntax +AUTO: 'AUTO'; +STR: 'STR'; +IP: 'IP'; +NUM: 'NUM'; + // FIELDSUMMARY keywords FIELDSUMMARY: 'FIELDSUMMARY'; INCLUDEFIELDS: 'INCLUDEFIELDS'; @@ -378,11 +385,11 @@ JSON_ARRAY: 'JSON_ARRAY'; JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH'; TO_JSON_STRING: 'TO_JSON_STRING'; JSON_EXTRACT: 'JSON_EXTRACT'; -JSON_DELETE : 'JSON_DELETE'; JSON_KEYS: 'JSON_KEYS'; JSON_VALID: 'JSON_VALID'; -JSON_APPEND: 'JSON_APPEND'; -//JSON_EXTEND : 'JSON_EXTEND'; +//JSON_APPEND: 'JSON_APPEND'; +//JSON_DELETE: 'JSON_DELETE'; +//JSON_EXTEND: 'JSON_EXTEND'; //JSON_SET: 'JSON_SET'; //JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH'; //JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index e6414706f..12fd7626c 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -515,7 +515,17 @@ wcFieldList ; sortField - : (PLUS | MINUS)? fieldExpression + : (PLUS | MINUS)? sortFieldExpression + ; + +sortFieldExpression + : fieldExpression + + // TODO #963: Remove unimplemented sort syntax + | AUTO LT_PRTHS fieldExpression RT_PRTHS + | STR LT_PRTHS fieldExpression RT_PRTHS + | IP LT_PRTHS fieldExpression RT_PRTHS + | NUM LT_PRTHS fieldExpression RT_PRTHS ; fieldExpression @@ -867,10 +877,10 @@ jsonFunctionName | JSON_ARRAY_LENGTH | TO_JSON_STRING | JSON_EXTRACT - | JSON_DELETE - | JSON_APPEND | JSON_KEYS | JSON_VALID +// | JSON_APPEND +// | JSON_DELETE // | JSON_EXTEND // | JSON_SET // | JSON_ARRAY_ALL_MATCH @@ -1169,4 +1179,10 @@ keywordsCanBeId | BETWEEN | CIDRMATCH | trendlineType + // SORT FIELD KEYWORDS + // TODO #963: Remove unimplemented sort syntax + | AUTO + | STR + | IP + | NUM ; diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 590d547a2..edc6a7923 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -180,8 +180,10 @@ public UnresolvedExpression visitWcFieldExpression(OpenSearchPPLParser.WcFieldEx @Override public UnresolvedExpression visitSortField(OpenSearchPPLParser.SortFieldContext ctx) { + + // TODO #963: Remove unimplemented sort syntax return new Field((QualifiedName) - visit(ctx.fieldExpression().qualifiedName()), + visit(ctx.sortFieldExpression().fieldExpression().qualifiedName()), ArgumentFactory.getArgumentList(ctx)); } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 255147190..43f696bcd 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -82,7 +82,16 @@ public static List getArgumentList(OpenSearchPPLParser.SortFieldContex return Arrays.asList( ctx.MINUS() != null ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN))); + : new Argument("asc", new Literal(true, DataType.BOOLEAN)), + ctx.sortFieldExpression().AUTO() != null + ? new Argument("type", new Literal("auto", DataType.STRING)) + : ctx.sortFieldExpression().IP() != null + ? new Argument("type", new Literal("ip", DataType.STRING)) + : ctx.sortFieldExpression().NUM() != null + ? new Argument("type", new Literal("num", DataType.STRING)) + : ctx.sortFieldExpression().STR() != null + ? new Argument("type", new Literal("str", DataType.STRING)) + : new Argument("type", new Literal(null, DataType.NULL))); } /** diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala index 4cbdaa070..0fe0db6da 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala @@ -122,22 +122,24 @@ class PPLLogicalPlanParseTranslatorTestSuite test("test parse email & host expressions including cast and sort commands") { val context = new CatalystPlanContext - val logPlan = - planTransformer.visit( - plan( - pplParser, - "source=t | parse address '(?\\d+) (?.+)' | where streetNumber > 500 | sort streetNumber | fields streetNumber, street"), - context) + + // TODO #963: Remove unimplemented sort syntax + val query = + "source=t | parse address '(?\\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street" + + val logPlan = planTransformer.visit(plan(pplParser, query), context) val addressAttribute = UnresolvedAttribute("address") val streetNumberAttribute = UnresolvedAttribute("streetNumber") val streetAttribute = UnresolvedAttribute("street") + val streetNumberExpression = Alias( RegExpExtract( addressAttribute, Literal("(?\\d+) (?.+)"), Literal("1")), "streetNumber")() + val streetExpression = Alias( RegExpExtract( addressAttribute, @@ -155,6 +157,7 @@ class PPLLogicalPlanParseTranslatorTestSuite Project( Seq(addressAttribute, streetNumberExpression, streetExpression, UnresolvedStar(None)), UnresolvedRelation(Seq("t")))))) + assert(compareByString(expectedPlan) === compareByString(logPlan)) }