From 3639399fbb31c50d8891dda03b1cf9d79fbba88a Mon Sep 17 00:00:00 2001 From: currantw Date: Thu, 19 Dec 2024 22:33:56 -0800 Subject: [PATCH] Update to use `eval` for casting Signed-off-by: currantw --- docs/ppl-lang/PPL-Example-Commands.md | 2 +- docs/ppl-lang/ppl-parse-command.md | 2 +- ...LLogicalPlanParseTranslatorTestSuite.scala | 43 +++++++++++-------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 9e5acc077..f9819436b 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -274,7 +274,7 @@ source = table | where ispresent(a) | - `source=accounts | parse email '.+@(?.+)' | stats count() by host` - `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` - `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` -- `source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as integer) > 500 | sort cast(streetNumber as integer) | fields streetNumber, street` +- `source=accounts | parse address '(?\d+) (?.+)' | eval streetNumberInt = cast(streetNumber as integer) | where streetNumberInt > 500 | sort streetNumberInt | fields streetNumber, street` - **Note**: The `sort num` syntax is deprecated. To sort numerically, cast to a numerical data type - e.g. `sort cast(streetNumber as integer)`. See [#963](https://github.com/opensearch-project/opensearch-spark/issues/963) for more details. - Limitation: [see limitations](ppl-parse-command.md#limitations) diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md index 14d7a4bab..f5a65184e 100644 --- a/docs/ppl-lang/ppl-parse-command.md +++ b/docs/ppl-lang/ppl-parse-command.md @@ -58,7 +58,7 @@ The example shows how to sort street numbers that are higher than 500 in ``addre PPL query: - os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as integer) > 500 | sort cast(streetNumber as integer) | fields streetNumber, street ; + os> source=accounts | parse address '(?\d+) (?.+)' | eval streetNumberInt = cast(streetNumber as integer) | where streetNumberInt > 500 | sort streetNumberInt | fields streetNumber, street ; fetched rows / total rows = 3/3 +----------------+----------------+ | streetNumber | street | diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala index eadd550a3..ab4ef1ad4 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanParseTranslatorTestSuite.scala @@ -13,9 +13,10 @@ import org.scalatest.matchers.should.Matchers import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.ScalaReflection.universe.Star import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} -import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Cast, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Project, Sort} +import org.apache.spark.sql.types.IntegerType class PPLLogicalPlanParseTranslatorTestSuite extends SparkFunSuite @@ -125,38 +126,44 @@ class PPLLogicalPlanParseTranslatorTestSuite // TODO #963: Remove unimplemented sort syntax val query = - "source=t | parse address '(?\\d+) (?.+)' | where streetNumber > 500 | sort cast(streetNumber as integer) | fields streetNumber, street" + "source=t" + + " | parse address '(?\\d+) (?.+)'" + + " | eval streetNumberInt = cast(streetNumber as integer)" + + " | where streetNumberInt > 500" + + " | sort streetNumberInt" + + " | fields streetNumber, street" val logPlan = planTransformer.visit(plan(pplParser, query), context) val addressAttribute = UnresolvedAttribute("address") val streetNumberAttribute = UnresolvedAttribute("streetNumber") val streetAttribute = UnresolvedAttribute("street") + val streetNumberIntAttribute = UnresolvedAttribute("streetNumberInt") - val streetNumberExpression = Alias( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("1")), - "streetNumber")() + val regexLiteral = Literal("(?\\d+) (?.+)") + val streetNumberExpression = + Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("1")), "streetNumber")() + val streetExpression = + Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("2")), "street")() - val streetExpression = Alias( - RegExpExtract( - addressAttribute, - Literal("(?\\d+) (?.+)"), - Literal("2")), - "street")() + val castExpression = Cast(streetNumberAttribute, IntegerType) val expectedPlan = Project( Seq(streetNumberAttribute, streetAttribute), Sort( - Seq(SortOrder(streetNumberAttribute, Ascending, NullsFirst, Seq.empty)), + Seq(SortOrder(streetNumberIntAttribute, Ascending, NullsFirst, Seq.empty)), global = true, Filter( - GreaterThan(streetNumberAttribute, Literal(500)), + GreaterThan(streetNumberIntAttribute, Literal(500)), Project( - Seq(addressAttribute, streetNumberExpression, streetExpression, UnresolvedStar(None)), - UnresolvedRelation(Seq("t")))))) + Seq(UnresolvedStar(None), Alias(castExpression, "streetNumberInt")()), + Project( + Seq( + addressAttribute, + streetNumberExpression, + streetExpression, + UnresolvedStar(None)), + UnresolvedRelation(Seq("t"))))))) assert(compareByString(expectedPlan) === compareByString(logPlan)) }