-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * Adding support for Rare & Top PPL top [N] <field-list> [by-clause] N: number of results to return. Default: 10 field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- rare <field-list> [by-clause] field-list: mandatory. comma-delimited list of field names. by-clause: optional. one or more fields to group the results by. ------------------------------------------------------------------------------------------- commands: - #461 - #536 * update scala fmt style * add additional support for `rare` & `top` commands options * add additional support for `rare` & `top` commands options including top N ... * update scalafmtAll style format * remove unrelated agg test --------- (cherry picked from commit 4af03c2) Signed-off-by: YANGDB <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
601c5a4
commit fad5fc6
Showing
8 changed files
with
553 additions
and
18 deletions.
There are no files selected for viewing
216 changes: 216 additions & 0 deletions
216
...src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLTopAndRareITSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.ppl | ||
|
||
import org.apache.spark.sql.{QueryTest, Row} | ||
import org.apache.spark.sql.catalyst.TableIdentifier | ||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Descending, Literal, NamedExpression, SortOrder} | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.execution.command.DescribeTableCommand | ||
import org.apache.spark.sql.streaming.StreamTest | ||
|
||
class FlintSparkPPLTopAndRareITSuite | ||
extends QueryTest | ||
with LogicalPlanTestUtils | ||
with FlintPPLSuite | ||
with StreamTest { | ||
|
||
/** Test table and index name */ | ||
private val testTable = "spark_catalog.default.flint_ppl_test" | ||
|
||
override def beforeAll(): Unit = { | ||
super.beforeAll() | ||
|
||
// Create test table | ||
createPartitionedMultiRowAddressTable(testTable) | ||
} | ||
|
||
protected override def afterEach(): Unit = { | ||
super.afterEach() | ||
// Stop all streaming jobs if any | ||
spark.streams.active.foreach { job => | ||
job.stop() | ||
job.awaitTermination() | ||
} | ||
} | ||
|
||
test("create ppl rare address field query test") { | ||
val frame = sql(s""" | ||
| source = $testTable| rare address | ||
| """.stripMargin) | ||
|
||
// Retrieve the results | ||
val results: Array[Row] = frame.collect() | ||
assert(results.length == 3) | ||
|
||
val expectedRow = Row(1, "Vancouver") | ||
assert( | ||
results.head == expectedRow, | ||
s"Expected least frequent result to be $expectedRow, but got ${results.head}") | ||
|
||
// Retrieve the logical plan | ||
val logicalPlan: LogicalPlan = frame.queryExecution.logical | ||
// Define the expected logical plan | ||
val addressField = UnresolvedAttribute("address") | ||
val projectList: Seq[NamedExpression] = Seq(UnresolvedStar(None)) | ||
|
||
val aggregateExpressions = Seq( | ||
Alias( | ||
UnresolvedFunction(Seq("COUNT"), Seq(addressField), isDistinct = false), | ||
"count(address)")(), | ||
addressField) | ||
val aggregatePlan = | ||
Aggregate( | ||
Seq(addressField), | ||
aggregateExpressions, | ||
UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test"))) | ||
val sortedPlan: LogicalPlan = | ||
Sort( | ||
Seq(SortOrder(UnresolvedAttribute("address"), Descending)), | ||
global = true, | ||
aggregatePlan) | ||
val expectedPlan = Project(projectList, sortedPlan) | ||
comparePlans(expectedPlan, logicalPlan, false) | ||
} | ||
|
||
test("create ppl rare address by age field query test") { | ||
val frame = sql(s""" | ||
| source = $testTable| rare address by age | ||
| """.stripMargin) | ||
|
||
// Retrieve the results | ||
val results: Array[Row] = frame.collect() | ||
assert(results.length == 5) | ||
|
||
val expectedRow = Row(1, "Vancouver", 60) | ||
assert( | ||
results.head == expectedRow, | ||
s"Expected least frequent result to be $expectedRow, but got ${results.head}") | ||
|
||
// Retrieve the logical plan | ||
val logicalPlan: LogicalPlan = frame.queryExecution.logical | ||
val addressField = UnresolvedAttribute("address") | ||
val ageField = UnresolvedAttribute("age") | ||
val ageAlias = Alias(ageField, "age")() | ||
|
||
val projectList: Seq[NamedExpression] = Seq(UnresolvedStar(None)) | ||
|
||
val countExpr = Alias( | ||
UnresolvedFunction(Seq("COUNT"), Seq(addressField), isDistinct = false), | ||
"count(address)")() | ||
|
||
val aggregateExpressions = Seq(countExpr, addressField, ageAlias) | ||
val aggregatePlan = | ||
Aggregate( | ||
Seq(addressField, ageAlias), | ||
aggregateExpressions, | ||
UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test"))) | ||
|
||
val sortedPlan: LogicalPlan = | ||
Sort( | ||
Seq(SortOrder(UnresolvedAttribute("address"), Descending)), | ||
global = true, | ||
aggregatePlan) | ||
|
||
val expectedPlan = Project(projectList, sortedPlan) | ||
comparePlans(expectedPlan, logicalPlan, false) | ||
} | ||
|
||
test("create ppl top address field query test") { | ||
val frame = sql(s""" | ||
| source = $testTable| top address | ||
| """.stripMargin) | ||
|
||
// Retrieve the results | ||
val results: Array[Row] = frame.collect() | ||
assert(results.length == 3) | ||
|
||
val expectedRows = Set(Row(2, "Portland"), Row(2, "Seattle")) | ||
val actualRows = results.take(2).toSet | ||
|
||
// Compare the sets | ||
assert( | ||
actualRows == expectedRows, | ||
s"The first two results do not match the expected rows. Expected: $expectedRows, Actual: $actualRows") | ||
|
||
// Retrieve the logical plan | ||
val logicalPlan: LogicalPlan = frame.queryExecution.logical | ||
// Define the expected logical plan | ||
val addressField = UnresolvedAttribute("address") | ||
val projectList: Seq[NamedExpression] = Seq(UnresolvedStar(None)) | ||
|
||
val aggregateExpressions = Seq( | ||
Alias( | ||
UnresolvedFunction(Seq("COUNT"), Seq(addressField), isDistinct = false), | ||
"count(address)")(), | ||
addressField) | ||
val aggregatePlan = | ||
Aggregate( | ||
Seq(addressField), | ||
aggregateExpressions, | ||
UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test"))) | ||
val sortedPlan: LogicalPlan = | ||
Sort( | ||
Seq(SortOrder(UnresolvedAttribute("address"), Ascending)), | ||
global = true, | ||
aggregatePlan) | ||
val expectedPlan = Project(projectList, sortedPlan) | ||
comparePlans(expectedPlan, logicalPlan, false) | ||
} | ||
|
||
test("create ppl top 3 countries by occupation field query test") { | ||
val newTestTable = "spark_catalog.default.new_flint_ppl_test" | ||
createOccupationTable(newTestTable) | ||
|
||
val frame = sql(s""" | ||
| source = $newTestTable| top 3 country by occupation | ||
| """.stripMargin) | ||
|
||
// Retrieve the results | ||
val results: Array[Row] = frame.collect() | ||
assert(results.length == 3) | ||
|
||
val expectedRows = Set( | ||
Row(1, "Canada", "Doctor"), | ||
Row(1, "Canada", "Scientist"), | ||
Row(1, "Canada", "Unemployed")) | ||
val actualRows = results.take(3).toSet | ||
|
||
// Compare the sets | ||
assert( | ||
actualRows == expectedRows, | ||
s"The first two results do not match the expected rows. Expected: $expectedRows, Actual: $actualRows") | ||
|
||
// Retrieve the logical plan | ||
val logicalPlan: LogicalPlan = frame.queryExecution.logical | ||
|
||
val countryField = UnresolvedAttribute("country") | ||
val occupationField = UnresolvedAttribute("occupation") | ||
val occupationFieldAlias = Alias(occupationField, "occupation")() | ||
|
||
val countExpr = Alias( | ||
UnresolvedFunction(Seq("COUNT"), Seq(countryField), isDistinct = false), | ||
"count(country)")() | ||
val aggregateExpressions = Seq(countExpr, countryField, occupationFieldAlias) | ||
val aggregatePlan = | ||
Aggregate( | ||
Seq(countryField, occupationFieldAlias), | ||
aggregateExpressions, | ||
UnresolvedRelation(Seq("spark_catalog", "default", "new_flint_ppl_test"))) | ||
|
||
val sortedPlan: LogicalPlan = | ||
Sort( | ||
Seq(SortOrder(UnresolvedAttribute("country"), Ascending)), | ||
global = true, | ||
aggregatePlan) | ||
|
||
val planWithLimit = | ||
GlobalLimit(Literal(3), LocalLimit(Literal(3), sortedPlan)) | ||
val expectedPlan = Project(Seq(UnresolvedStar(None)), planWithLimit) | ||
comparePlans(expectedPlan, logicalPlan, false) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,8 @@ commands | |
| dedupCommand | ||
| sortCommand | ||
| headCommand | ||
| topCommand | ||
| rareCommand | ||
| evalCommand | ||
; | ||
|
||
|
23 changes: 23 additions & 0 deletions
23
ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/RareAggregation.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.ast.tree; | ||
|
||
import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
/** Logical plan node of Rare (Aggregation) command, the interface for building aggregation actions in queries. */ | ||
public class RareAggregation extends Aggregation { | ||
/** Aggregation Constructor without span and argument. */ | ||
public RareAggregation( | ||
List<UnresolvedExpression> aggExprList, | ||
List<UnresolvedExpression> sortExprList, | ||
List<UnresolvedExpression> groupExprList) { | ||
super(aggExprList, sortExprList, groupExprList, null, Collections.emptyList()); | ||
} | ||
|
||
} |
32 changes: 32 additions & 0 deletions
32
ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/TopAggregation.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.ast.tree; | ||
|
||
import org.opensearch.sql.ast.expression.Literal; | ||
import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
/** Logical plan node of Top (Aggregation) command, the interface for building aggregation actions in queries. */ | ||
public class TopAggregation extends Aggregation { | ||
private final Optional<Literal> results; | ||
|
||
/** Aggregation Constructor without span and argument. */ | ||
public TopAggregation( | ||
Optional<Literal> results, | ||
List<UnresolvedExpression> aggExprList, | ||
List<UnresolvedExpression> sortExprList, | ||
List<UnresolvedExpression> groupExprList) { | ||
super(aggExprList, sortExprList, groupExprList, null, Collections.emptyList()); | ||
this.results = results; | ||
} | ||
|
||
public Optional<Literal> getResults() { | ||
return results; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.