Skip to content

Commit

Permalink
Update to use eval for casting
Browse files Browse the repository at this point in the history
Signed-off-by: currantw <[email protected]>
  • Loading branch information
currantw committed Dec 20, 2024
1 parent 9a76e17 commit 3639399
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 20 deletions.
2 changes: 1 addition & 1 deletion docs/ppl-lang/PPL-Example-Commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ source = table | where ispresent(a) |
- `source=accounts | parse email '.+@(?<host>.+)' | stats count() by host`
- `source=accounts | parse email '.+@(?<host>.+)' | eval eval_result=1 | fields host, eval_result`
- `source=accounts | parse email '.+@(?<host>.+)' | where age > 45 | sort - age | fields age, email, host`
- `source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | where cast(streetNumber as integer) > 500 | sort cast(streetNumber as integer) | fields streetNumber, street`
- `source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | eval streetNumberInt = cast(streetNumber as integer) | where streetNumberInt > 500 | sort streetNumberInt | fields streetNumber, street`
- **Note**: The `sort num` syntax is deprecated. To sort numerically, cast to a numerical data type - e.g. `sort cast(streetNumber as integer)`. See [#963](https://github.com/opensearch-project/opensearch-spark/issues/963) for more details.
- Limitation: [see limitations](ppl-parse-command.md#limitations)

Expand Down
2 changes: 1 addition & 1 deletion docs/ppl-lang/ppl-parse-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ The example shows how to sort street numbers that are higher than 500 in ``addre

PPL query:

os> source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | where cast(streetNumber as integer) > 500 | sort cast(streetNumber as integer) | fields streetNumber, street ;
os> source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | eval streetNumberInt = cast(streetNumber as integer) | where streetNumberInt > 500 | sort streetNumberInt | fields streetNumber, street ;
fetched rows / total rows = 3/3
+----------------+----------------+
| streetNumber | street |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ import org.scalatest.matchers.should.Matchers
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.ScalaReflection.universe.Star
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar}
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder}
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Cast, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Project, Sort}
import org.apache.spark.sql.types.IntegerType

class PPLLogicalPlanParseTranslatorTestSuite
extends SparkFunSuite
Expand Down Expand Up @@ -125,38 +126,44 @@ class PPLLogicalPlanParseTranslatorTestSuite

// TODO #963: Remove unimplemented sort syntax
val query =
"source=t | parse address '(?<streetNumber>\\d+) (?<street>.+)' | where streetNumber > 500 | sort cast(streetNumber as integer) | fields streetNumber, street"
"source=t" +
" | parse address '(?<streetNumber>\\d+) (?<street>.+)'" +
" | eval streetNumberInt = cast(streetNumber as integer)" +
" | where streetNumberInt > 500" +
" | sort streetNumberInt" +
" | fields streetNumber, street"

val logPlan = planTransformer.visit(plan(pplParser, query), context)

val addressAttribute = UnresolvedAttribute("address")
val streetNumberAttribute = UnresolvedAttribute("streetNumber")
val streetAttribute = UnresolvedAttribute("street")
val streetNumberIntAttribute = UnresolvedAttribute("streetNumberInt")

val streetNumberExpression = Alias(
RegExpExtract(
addressAttribute,
Literal("(?<streetNumber>\\d+) (?<street>.+)"),
Literal("1")),
"streetNumber")()
val regexLiteral = Literal("(?<streetNumber>\\d+) (?<street>.+)")
val streetNumberExpression =
Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("1")), "streetNumber")()
val streetExpression =
Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("2")), "street")()

val streetExpression = Alias(
RegExpExtract(
addressAttribute,
Literal("(?<streetNumber>\\d+) (?<street>.+)"),
Literal("2")),
"street")()
val castExpression = Cast(streetNumberAttribute, IntegerType)

val expectedPlan = Project(
Seq(streetNumberAttribute, streetAttribute),
Sort(
Seq(SortOrder(streetNumberAttribute, Ascending, NullsFirst, Seq.empty)),
Seq(SortOrder(streetNumberIntAttribute, Ascending, NullsFirst, Seq.empty)),
global = true,
Filter(
GreaterThan(streetNumberAttribute, Literal(500)),
GreaterThan(streetNumberIntAttribute, Literal(500)),
Project(
Seq(addressAttribute, streetNumberExpression, streetExpression, UnresolvedStar(None)),
UnresolvedRelation(Seq("t"))))))
Seq(UnresolvedStar(None), Alias(castExpression, "streetNumberInt")()),
Project(
Seq(
addressAttribute,
streetNumberExpression,
streetExpression,
UnresolvedStar(None)),
UnresolvedRelation(Seq("t")))))))

assert(compareByString(expectedPlan) === compareByString(logPlan))
}
Expand Down

0 comments on commit 3639399

Please sign in to comment.