-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support flatten with alias #927
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -347,4 +347,30 @@ class FlintSparkPPLFlattenITSuite | |||
val expectedPlan = Project(Seq(UnresolvedStar(None)), flattenMultiValue) | ||||
comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) | ||||
} | ||||
|
||||
test("flatten struct nested table using alias") { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a test for duplicated aliases:
And
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Case 1 will be similar to this test except that it aliased Line 298 in b050da3
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I mean that will the duplicated
|
||||
val frame = sql(s""" | ||||
| source = $structNestedTable | ||||
| | flatten struct_col | ||||
| | flatten field1 as subfield_1 | ||||
| | flatten struct_col2 as (field1, field2_2) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a test case: it should fail
|
||||
| | flatten field1 as subfield_2 | ||||
| """.stripMargin) | ||||
|
||||
assert( | ||||
frame.columns.sameElements( | ||||
Array("int_col", "field2", "subfield_1", "field2_2", "subfield_2"))) | ||||
val results: Array[Row] = frame.collect() | ||||
val expectedResults: Array[Row] = | ||||
Array( | ||||
Row(30, 123, "value1", 23, "valueA"), | ||||
Row(40, 123, "value5", 33, "valueB"), | ||||
Row(30, 823, "value4", 83, "valueC"), | ||||
Row(40, 456, "value2", 46, "valueD"), | ||||
Row(50, 789, "value3", 89, "valueE")) | ||||
// Compare the results | ||||
implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, Int](_.getAs[Int](0)) | ||||
assert(results.sorted.sameElements(expectedResults.sorted)) | ||||
} | ||||
|
||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -259,7 +259,7 @@ expandCommand | |
; | ||
|
||
flattenCommand | ||
: FLATTEN fieldExpression | ||
: FLATTEN fieldExpression (AS alias = identifierSeq)? | ||
; | ||
|
||
trendlineCommand | ||
|
@@ -1032,6 +1032,11 @@ qualifiedName | |
: ident (DOT ident)* # identsAsQualifiedName | ||
; | ||
|
||
identifierSeq | ||
: qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq | ||
| LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq | ||
Comment on lines
+1036
to
+1037
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This grammar allows four patterns:
Is
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not restrict to two, as you said, both 4 patterns are all allowed. For most of the case, both is ok with or without (), but having () is safer to avoid ambiguous. For example, in SQL, when having sql like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. That's my point. How about only remains
or
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @YANG-DB any thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @LantaoJin I personally dont mind the extra verbose - so Im good with @qianheng-aws suggestion... |
||
; | ||
|
||
tableQualifiedName | ||
: tableIdent (DOT ident)* # identsAsTableQualifiedName | ||
; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,9 +13,9 @@ import org.scalatest.matchers.should.Matchers | |
|
||
import org.apache.spark.SparkFunSuite | ||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, Descending, GeneratorOuter, Literal, NullsLast, RegExpExtract, SortOrder} | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, GeneratorOuter, Literal, RegExpExtract} | ||
import org.apache.spark.sql.catalyst.plans.PlanTest | ||
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DataFrameDropColumns, Generate, GlobalLimit, LocalLimit, Project, Sort} | ||
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DataFrameDropColumns, Generate, Project} | ||
import org.apache.spark.sql.types.IntegerType | ||
|
||
class PPLLogicalPlanFlattenCommandTranslatorTestSuite | ||
|
@@ -153,4 +153,45 @@ class PPLLogicalPlanFlattenCommandTranslatorTestSuite | |
comparePlans(expectedPlan, logPlan, checkAnalysis = false) | ||
} | ||
|
||
test("test flatten with one alias") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @qianheng-aws plz add additional flatten with aliases queries unit tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @YANG-DB we already have but named "test flatten with alias list" |
||
val context = new CatalystPlanContext | ||
val logPlan = | ||
planTransformer.visit( | ||
plan(pplParser, "source=relation | flatten field_with_array as col1"), | ||
context) | ||
|
||
val relation = UnresolvedRelation(Seq("relation")) | ||
val flattenGenerator = new FlattenGenerator(UnresolvedAttribute("field_with_array")) | ||
val outerGenerator = GeneratorOuter(flattenGenerator) | ||
val generate = | ||
Generate(outerGenerator, seq(), true, None, Seq(UnresolvedAttribute("col1")), relation) | ||
val dropSourceColumn = | ||
DataFrameDropColumns(Seq(UnresolvedAttribute("field_with_array")), generate) | ||
val expectedPlan = Project(seq(UnresolvedStar(None)), dropSourceColumn) | ||
comparePlans(expectedPlan, logPlan, checkAnalysis = false) | ||
} | ||
|
||
test("test flatten with alias list") { | ||
val context = new CatalystPlanContext | ||
val logPlan = | ||
planTransformer.visit( | ||
plan(pplParser, "source=relation | flatten field_with_array as (col1, col2)"), | ||
context) | ||
|
||
val relation = UnresolvedRelation(Seq("relation")) | ||
val flattenGenerator = new FlattenGenerator(UnresolvedAttribute("field_with_array")) | ||
val outerGenerator = GeneratorOuter(flattenGenerator) | ||
val generate = Generate( | ||
outerGenerator, | ||
seq(), | ||
true, | ||
None, | ||
Seq(UnresolvedAttribute("col1"), UnresolvedAttribute("col2")), | ||
relation) | ||
val dropSourceColumn = | ||
DataFrameDropColumns(Seq(UnresolvedAttribute("field_with_array")), generate) | ||
val expectedPlan = Project(seq(UnresolvedStar(None)), dropSourceColumn) | ||
comparePlans(expectedPlan, logPlan, checkAnalysis = false) | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[as aliasSequence]