Skip to content

Commit

Permalink
Make column resolution closer to MySQL (#14426)
Browse files Browse the repository at this point in the history
Signed-off-by: Andres Taylor <[email protected]>
  • Loading branch information
systay committed Nov 6, 2023
1 parent de8eb80 commit a4f350c
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 71 deletions.
5 changes: 4 additions & 1 deletion go/vt/vtgate/planbuilder/operators/aggregation_pushing.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,10 @@ func addColumnsFromLHSInJoinPredicates(ctx *plancontext.PlanningContext, rootAgg
for _, pred := range join.JoinPredicates {
for _, bve := range pred.LHSExprs {
expr := bve.Expr
wexpr := rootAggr.QP.GetSimplifiedExpr(expr)
wexpr, err := rootAggr.QP.GetSimplifiedExpr(ctx, expr)
if err != nil {
return err
}
idx, found := canReuseColumn(ctx, lhs.pushed.Columns, expr, extractExpr)
if !found {
idx = len(lhs.pushed.Columns)
Expand Down
6 changes: 5 additions & 1 deletion go/vt/vtgate/planbuilder/operators/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) error {
return err
}
for idx, col := range columns {
e := d.QP.GetSimplifiedExpr(col.Expr)
e, err := d.QP.GetSimplifiedExpr(ctx, col.Expr)
if err != nil {
// ambiguous columns are not a problem for DISTINCT
e = col.Expr
}
var wsCol *int
typ, coll, _ := ctx.SemTable.TypeForExpr(e)

Expand Down
102 changes: 71 additions & 31 deletions go/vt/vtgate/planbuilder/operators/queryprojection.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,10 @@ func createQPFromSelect(ctx *plancontext.PlanningContext, sel *sqlparser.Select)
if !qp.HasAggr && sel.Having != nil {
qp.HasAggr = containsAggr(sel.Having.Expr)
}
qp.calculateDistinct(ctx)

if err := qp.calculateDistinct(ctx); err != nil {
return nil, err
}

return qp, nil
}
Expand Down Expand Up @@ -368,7 +371,10 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy
canPushSorting := true
es := &expressionSet{}
for _, order := range orderBy {
simpleExpr := qp.GetSimplifiedExpr(order.Expr)
simpleExpr, err := qp.GetSimplifiedExpr(ctx, order.Expr)
if err != nil {
return err
}
if sqlparser.IsNull(simpleExpr) {
// ORDER BY null can safely be ignored
continue
Expand All @@ -386,9 +392,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy
return nil
}

func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) error {
if qp.Distinct && !qp.HasAggr {
if qp.useGroupingOverDistinct(ctx) {
distinct, err := qp.useGroupingOverDistinct(ctx)
if err != nil {
return err
}
if distinct {
// if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct.
qp.Distinct = false
} else {
Expand All @@ -403,7 +413,7 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
}

if !qp.Distinct || len(qp.groupByExprs) == 0 {
return
return nil
}

for _, gb := range qp.groupByExprs {
Expand All @@ -415,24 +425,28 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
return getExpr
})
if !found {
return
return nil
}
}

// since we are returning all grouping expressions, we know the results are guaranteed to be unique
qp.Distinct = false
return nil
}

func (qp *QueryProjection) addGroupBy(ctx *plancontext.PlanningContext, groupBy sqlparser.GroupBy) error {
es := &expressionSet{}
for _, group := range groupBy {
selectExprIdx, aliasExpr := qp.FindSelectExprIndexForExpr(ctx, group)
simpleExpr := qp.GetSimplifiedExpr(group)
err := checkForInvalidGroupingExpressions(simpleExpr)
simpleExpr, err := qp.GetSimplifiedExpr(ctx, group)
if err != nil {
return err
}

if err = checkForInvalidGroupingExpressions(simpleExpr); err != nil {
return err
}

if !es.add(ctx, simpleExpr) {
continue
}
Expand Down Expand Up @@ -475,32 +489,61 @@ func (qp *QueryProjection) isExprInGroupByExprs(ctx *plancontext.PlanningContext
}

// GetSimplifiedExpr takes an expression used in ORDER BY or GROUP BY, and returns an expression that is simpler to evaluate
func (qp *QueryProjection) GetSimplifiedExpr(e sqlparser.Expr) sqlparser.Expr {
func (qp *QueryProjection) GetSimplifiedExpr(ctx *plancontext.PlanningContext, e sqlparser.Expr) (found sqlparser.Expr, err error) {
if qp == nil {
return e
return e, nil
}
// If the ORDER BY is against a column alias, we need to remember the expression
// behind the alias. The weightstring(.) calls needs to be done against that expression and not the alias.
// Eg - select music.foo as bar, weightstring(music.foo) from music order by bar

colExpr, isColName := e.(*sqlparser.ColName)
if !(isColName && colExpr.Qualifier.IsEmpty()) {
// we are only interested in unqualified column names. if it's not a column name and not
return e
in, isColName := e.(*sqlparser.ColName)
if !(isColName && in.Qualifier.IsEmpty()) {
// we are only interested in unqualified column names. if it's not a column name and not unqualified, we're done
return e, nil
}

check := func(e sqlparser.Expr) error {
if found != nil && !ctx.SemTable.EqualsExprWithDeps(found, e) {
return &semantics.AmbiguousColumnError{Column: sqlparser.String(in)}
}
found = e
return nil
}

for _, selectExpr := range qp.SelectExprs {
aliasedExpr, isAliasedExpr := selectExpr.Col.(*sqlparser.AliasedExpr)
if !isAliasedExpr {
ae, ok := selectExpr.Col.(*sqlparser.AliasedExpr)
if !ok {
continue
}
aliased := !aliasedExpr.As.IsEmpty()
if aliased && colExpr.Name.Equal(aliasedExpr.As) {
return aliasedExpr.Expr
aliased := !ae.As.IsEmpty()
if aliased {
if in.Name.Equal(ae.As) {
err = check(ae.Expr)
if err != nil {
return nil, err
}
}
} else {
seCol, ok := ae.Expr.(*sqlparser.ColName)
if !ok {
continue
}
if seCol.Name.Equal(in.Name) {
// If the column name matches, we have a match, even if the table name is not listed
err = check(ae.Expr)
if err != nil {
return nil, err
}
}
}
}

return e
if found == nil {
found = e
}

return found, nil
}

// toString should only be used for tests
Expand Down Expand Up @@ -866,18 +909,21 @@ func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.Plannin
return false
}

func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool {
func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) (bool, error) {
if !qp.orderByOverlapWithSelectExpr(ctx) {
return false
return false, nil
}
var gbs []GroupBy
for idx, selExpr := range qp.SelectExprs {
ae, err := selExpr.GetAliasedExpr()
if err != nil {
// not an alias Expr, cannot continue forward.
return false
return false, nil
}
sExpr, err := qp.GetSimplifiedExpr(ctx, ae.Expr)
if err != nil {
return false, err
}
sExpr := qp.GetSimplifiedExpr(ae.Expr)
// check if the grouping already exists on that column.
found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool {
return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr)
Expand All @@ -892,7 +938,7 @@ func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningCont
gbs = append(gbs, groupBy)
}
qp.groupByExprs = append(qp.groupByExprs, gbs...)
return true
return true, nil
}

func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
Expand All @@ -909,12 +955,6 @@ func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
}, expr)
}

func SortAggregations(a []Aggr) {
sort.Slice(a, func(i, j int) bool {
return CompareRefInt(a[i].Index, a[j].Index)
})
}

func SortGrouping(a []GroupBy) {
sort.Slice(a, func(i, j int) bool {
return CompareRefInt(a[i].InnerIndex, a[j].InnerIndex)
Expand Down
44 changes: 6 additions & 38 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -3300,39 +3300,7 @@
{
"comment": "scatter aggregate symtab lookup error",
"query": "select id, b as id, count(*) from user order by id",
"plan": {
"QueryType": "SELECT",
"Original": "select id, b as id, count(*) from user order by id",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "(1|3) ASC",
"ResultColumns": 3,
"Inputs": [
{
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "any_value(0) AS id, any_value(1) AS id, sum_count_star(2) AS count(*), any_value(3)",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select id, b as id, count(*), weight_string(b) from `user` where 1 != 1",
"Query": "select id, b as id, count(*), weight_string(b) from `user`",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"user.user"
]
}
"plan": "Column 'id' in field list is ambiguous"
},
{
"comment": "aggr and non-aggr without group by (with query does not give useful result out)",
Expand Down Expand Up @@ -3388,9 +3356,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select `user`.id, weight_string(id) from `user` where 1 != 1 group by id, weight_string(id)",
"FieldQuery": "select `user`.id, weight_string(`user`.id) from `user` where 1 != 1 group by id, weight_string(`user`.id)",
"OrderBy": "(0|1) ASC",
"Query": "select `user`.id, weight_string(id) from `user` group by id, weight_string(id) order by id asc",
"Query": "select `user`.id, weight_string(`user`.id) from `user` group by id, weight_string(`user`.id) order by id asc",
"Table": "`user`"
},
{
Expand Down Expand Up @@ -4775,7 +4743,7 @@
"OperatorType": "Distinct",
"Collations": [
"(0:2)",
"(1:2)"
"(1:3)"
],
"ResultColumns": 2,
"Inputs": [
Expand All @@ -4786,8 +4754,8 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select a, b as a, weight_string(b) from `user` where 1 != 1",
"Query": "select distinct a, b as a, weight_string(b) from `user`",
"FieldQuery": "select a, b as a, weight_string(a), weight_string(b) from `user` where 1 != 1",
"Query": "select distinct a, b as a, weight_string(a), weight_string(b) from `user`",
"Table": "`user`"
}
]
Expand Down
43 changes: 43 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4765,5 +4765,48 @@
"user.customer"
]
}
},
{
"comment": "column with qualifier is correctly used",
"query": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ",
"plan": {
"QueryType": "SELECT",
"Original": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "L:0,R:0",
"TableName": "`user`_user_extra",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select u.foo, weight_string(u.foo) from `user` as u where 1 != 1",
"OrderBy": "(0|1) ASC",
"Query": "select u.foo, weight_string(u.foo) from `user` as u order by foo asc",
"Table": "`user`"
},
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select ue.foo as apa from user_extra as ue where 1 != 1",
"Query": "select ue.foo as apa from user_extra as ue",
"Table": "user_extra"
}
]
},
"TablesUsed": [
"user.user",
"user.user_extra"
]
}
}
]

0 comments on commit a4f350c

Please sign in to comment.