From a4f350c7ce9d6426b68d9d385a234f5462dd5fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Taylor?= Date: Thu, 2 Nov 2023 13:03:11 +0100 Subject: [PATCH] Make column resolution closer to MySQL (#14426) Signed-off-by: Andres Taylor --- .../operators/aggregation_pushing.go | 5 +- .../vtgate/planbuilder/operators/distinct.go | 6 +- .../planbuilder/operators/queryprojection.go | 102 ++++++++++++------ .../planbuilder/testdata/aggr_cases.json | 44 ++------ .../planbuilder/testdata/select_cases.json | 43 ++++++++ 5 files changed, 129 insertions(+), 71 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go b/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go index c7fc8c790a7..724ed50a9a8 100644 --- a/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go +++ b/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go @@ -429,7 +429,10 @@ func addColumnsFromLHSInJoinPredicates(ctx *plancontext.PlanningContext, rootAgg for _, pred := range join.JoinPredicates { for _, bve := range pred.LHSExprs { expr := bve.Expr - wexpr := rootAggr.QP.GetSimplifiedExpr(expr) + wexpr, err := rootAggr.QP.GetSimplifiedExpr(ctx, expr) + if err != nil { + return err + } idx, found := canReuseColumn(ctx, lhs.pushed.Columns, expr, extractExpr) if !found { idx = len(lhs.pushed.Columns) diff --git a/go/vt/vtgate/planbuilder/operators/distinct.go b/go/vt/vtgate/planbuilder/operators/distinct.go index f7f4b350fc7..158d327c19c 100644 --- a/go/vt/vtgate/planbuilder/operators/distinct.go +++ b/go/vt/vtgate/planbuilder/operators/distinct.go @@ -52,7 +52,11 @@ func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) error { return err } for idx, col := range columns { - e := d.QP.GetSimplifiedExpr(col.Expr) + e, err := d.QP.GetSimplifiedExpr(ctx, col.Expr) + if err != nil { + // ambiguous columns are not a problem for DISTINCT + e = col.Expr + } var wsCol *int typ, coll, _ := ctx.SemTable.TypeForExpr(e) diff --git a/go/vt/vtgate/planbuilder/operators/queryprojection.go b/go/vt/vtgate/planbuilder/operators/queryprojection.go index f15d3642312..39bfe899223 100644 --- a/go/vt/vtgate/planbuilder/operators/queryprojection.go +++ b/go/vt/vtgate/planbuilder/operators/queryprojection.go @@ -220,7 +220,10 @@ func createQPFromSelect(ctx *plancontext.PlanningContext, sel *sqlparser.Select) if !qp.HasAggr && sel.Having != nil { qp.HasAggr = containsAggr(sel.Having.Expr) } - qp.calculateDistinct(ctx) + + if err := qp.calculateDistinct(ctx); err != nil { + return nil, err + } return qp, nil } @@ -368,7 +371,10 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy canPushSorting := true es := &expressionSet{} for _, order := range orderBy { - simpleExpr := qp.GetSimplifiedExpr(order.Expr) + simpleExpr, err := qp.GetSimplifiedExpr(ctx, order.Expr) + if err != nil { + return err + } if sqlparser.IsNull(simpleExpr) { // ORDER BY null can safely be ignored continue @@ -386,9 +392,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy return nil } -func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { +func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) error { if qp.Distinct && !qp.HasAggr { - if qp.useGroupingOverDistinct(ctx) { + distinct, err := qp.useGroupingOverDistinct(ctx) + if err != nil { + return err + } + if distinct { // if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct. qp.Distinct = false } else { @@ -403,7 +413,7 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { } if !qp.Distinct || len(qp.groupByExprs) == 0 { - return + return nil } for _, gb := range qp.groupByExprs { @@ -415,24 +425,28 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { return getExpr }) if !found { - return + return nil } } // since we are returning all grouping expressions, we know the results are guaranteed to be unique qp.Distinct = false + return nil } func (qp *QueryProjection) addGroupBy(ctx *plancontext.PlanningContext, groupBy sqlparser.GroupBy) error { es := &expressionSet{} for _, group := range groupBy { selectExprIdx, aliasExpr := qp.FindSelectExprIndexForExpr(ctx, group) - simpleExpr := qp.GetSimplifiedExpr(group) - err := checkForInvalidGroupingExpressions(simpleExpr) + simpleExpr, err := qp.GetSimplifiedExpr(ctx, group) if err != nil { return err } + if err = checkForInvalidGroupingExpressions(simpleExpr); err != nil { + return err + } + if !es.add(ctx, simpleExpr) { continue } @@ -475,32 +489,61 @@ func (qp *QueryProjection) isExprInGroupByExprs(ctx *plancontext.PlanningContext } // GetSimplifiedExpr takes an expression used in ORDER BY or GROUP BY, and returns an expression that is simpler to evaluate -func (qp *QueryProjection) GetSimplifiedExpr(e sqlparser.Expr) sqlparser.Expr { +func (qp *QueryProjection) GetSimplifiedExpr(ctx *plancontext.PlanningContext, e sqlparser.Expr) (found sqlparser.Expr, err error) { if qp == nil { - return e + return e, nil } // If the ORDER BY is against a column alias, we need to remember the expression // behind the alias. The weightstring(.) calls needs to be done against that expression and not the alias. // Eg - select music.foo as bar, weightstring(music.foo) from music order by bar - colExpr, isColName := e.(*sqlparser.ColName) - if !(isColName && colExpr.Qualifier.IsEmpty()) { - // we are only interested in unqualified column names. if it's not a column name and not - return e + in, isColName := e.(*sqlparser.ColName) + if !(isColName && in.Qualifier.IsEmpty()) { + // we are only interested in unqualified column names. if it's not a column name and not unqualified, we're done + return e, nil + } + + check := func(e sqlparser.Expr) error { + if found != nil && !ctx.SemTable.EqualsExprWithDeps(found, e) { + return &semantics.AmbiguousColumnError{Column: sqlparser.String(in)} + } + found = e + return nil } for _, selectExpr := range qp.SelectExprs { - aliasedExpr, isAliasedExpr := selectExpr.Col.(*sqlparser.AliasedExpr) - if !isAliasedExpr { + ae, ok := selectExpr.Col.(*sqlparser.AliasedExpr) + if !ok { continue } - aliased := !aliasedExpr.As.IsEmpty() - if aliased && colExpr.Name.Equal(aliasedExpr.As) { - return aliasedExpr.Expr + aliased := !ae.As.IsEmpty() + if aliased { + if in.Name.Equal(ae.As) { + err = check(ae.Expr) + if err != nil { + return nil, err + } + } + } else { + seCol, ok := ae.Expr.(*sqlparser.ColName) + if !ok { + continue + } + if seCol.Name.Equal(in.Name) { + // If the column name matches, we have a match, even if the table name is not listed + err = check(ae.Expr) + if err != nil { + return nil, err + } + } } } - return e + if found == nil { + found = e + } + + return found, nil } // toString should only be used for tests @@ -866,18 +909,21 @@ func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.Plannin return false } -func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool { +func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) (bool, error) { if !qp.orderByOverlapWithSelectExpr(ctx) { - return false + return false, nil } var gbs []GroupBy for idx, selExpr := range qp.SelectExprs { ae, err := selExpr.GetAliasedExpr() if err != nil { // not an alias Expr, cannot continue forward. - return false + return false, nil + } + sExpr, err := qp.GetSimplifiedExpr(ctx, ae.Expr) + if err != nil { + return false, err } - sExpr := qp.GetSimplifiedExpr(ae.Expr) // check if the grouping already exists on that column. found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool { return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr) @@ -892,7 +938,7 @@ func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningCont gbs = append(gbs, groupBy) } qp.groupByExprs = append(qp.groupByExprs, gbs...) - return true + return true, nil } func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error { @@ -909,12 +955,6 @@ func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error { }, expr) } -func SortAggregations(a []Aggr) { - sort.Slice(a, func(i, j int) bool { - return CompareRefInt(a[i].Index, a[j].Index) - }) -} - func SortGrouping(a []GroupBy) { sort.Slice(a, func(i, j int) bool { return CompareRefInt(a[i].InnerIndex, a[j].InnerIndex) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 6491fe8c493..0d1230456b7 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -3300,39 +3300,7 @@ { "comment": "scatter aggregate symtab lookup error", "query": "select id, b as id, count(*) from user order by id", - "plan": { - "QueryType": "SELECT", - "Original": "select id, b as id, count(*) from user order by id", - "Instructions": { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "(1|3) ASC", - "ResultColumns": 3, - "Inputs": [ - { - "OperatorType": "Aggregate", - "Variant": "Scalar", - "Aggregates": "any_value(0) AS id, any_value(1) AS id, sum_count_star(2) AS count(*), any_value(3)", - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select id, b as id, count(*), weight_string(b) from `user` where 1 != 1", - "Query": "select id, b as id, count(*), weight_string(b) from `user`", - "Table": "`user`" - } - ] - } - ] - }, - "TablesUsed": [ - "user.user" - ] - } + "plan": "Column 'id' in field list is ambiguous" }, { "comment": "aggr and non-aggr without group by (with query does not give useful result out)", @@ -3388,9 +3356,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select `user`.id, weight_string(id) from `user` where 1 != 1 group by id, weight_string(id)", + "FieldQuery": "select `user`.id, weight_string(`user`.id) from `user` where 1 != 1 group by id, weight_string(`user`.id)", "OrderBy": "(0|1) ASC", - "Query": "select `user`.id, weight_string(id) from `user` group by id, weight_string(id) order by id asc", + "Query": "select `user`.id, weight_string(`user`.id) from `user` group by id, weight_string(`user`.id) order by id asc", "Table": "`user`" }, { @@ -4775,7 +4743,7 @@ "OperatorType": "Distinct", "Collations": [ "(0:2)", - "(1:2)" + "(1:3)" ], "ResultColumns": 2, "Inputs": [ @@ -4786,8 +4754,8 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select a, b as a, weight_string(b) from `user` where 1 != 1", - "Query": "select distinct a, b as a, weight_string(b) from `user`", + "FieldQuery": "select a, b as a, weight_string(a), weight_string(b) from `user` where 1 != 1", + "Query": "select distinct a, b as a, weight_string(a), weight_string(b) from `user`", "Table": "`user`" } ] diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index 80d9cdf6d23..376278ca68f 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -4765,5 +4765,48 @@ "user.customer" ] } + }, + { + "comment": "column with qualifier is correctly used", + "query": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ", + "plan": { + "QueryType": "SELECT", + "Original": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0,R:0", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select u.foo, weight_string(u.foo) from `user` as u where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select u.foo, weight_string(u.foo) from `user` as u order by foo asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.foo as apa from user_extra as ue where 1 != 1", + "Query": "select ue.foo as apa from user_extra as ue", + "Table": "user_extra" + } + ] + }, + "TablesUsed": [ + "user.user", + "user.user_extra" + ] + } } ]