From bb9c4e716c4ec86c7611bd3685f014ac204a33a4 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Thu, 2 Nov 2023 08:24:40 +0100 Subject: [PATCH 1/2] feat: make column resolution closer to mysql Signed-off-by: Andres Taylor --- .../operators/aggregation_pushing.go | 5 +- .../vtgate/planbuilder/operators/distinct.go | 5 +- .../planbuilder/operators/queryprojection.go | 102 ++++++++++++------ .../planbuilder/testdata/aggr_cases.json | 67 +----------- .../planbuilder/testdata/select_cases.json | 43 ++++++++ 5 files changed, 126 insertions(+), 96 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go b/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go index deb1a8df8c5..85c50427364 100644 --- a/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go +++ b/go/vt/vtgate/planbuilder/operators/aggregation_pushing.go @@ -423,7 +423,10 @@ func addColumnsFromLHSInJoinPredicates(ctx *plancontext.PlanningContext, rootAgg for _, pred := range join.JoinPredicates { for _, bve := range pred.LHSExprs { expr := bve.Expr - wexpr := rootAggr.QP.GetSimplifiedExpr(expr) + wexpr, err := rootAggr.QP.GetSimplifiedExpr(ctx, expr) + if err != nil { + return err + } idx, found := canReuseColumn(ctx, lhs.pushed.Columns, expr, extractExpr) if !found { idx = len(lhs.pushed.Columns) diff --git a/go/vt/vtgate/planbuilder/operators/distinct.go b/go/vt/vtgate/planbuilder/operators/distinct.go index d9121586185..d6f2767896a 100644 --- a/go/vt/vtgate/planbuilder/operators/distinct.go +++ b/go/vt/vtgate/planbuilder/operators/distinct.go @@ -49,7 +49,10 @@ type ( func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) { columns := d.GetColumns(ctx) for idx, col := range columns { - e := d.QP.GetSimplifiedExpr(col.Expr) + e, err := d.QP.GetSimplifiedExpr(ctx, col.Expr) + if err != nil { + panic(err) + } var wsCol *int typ, _ := ctx.SemTable.TypeForExpr(e) diff --git a/go/vt/vtgate/planbuilder/operators/queryprojection.go b/go/vt/vtgate/planbuilder/operators/queryprojection.go index 29b2ec08562..b7040807300 100644 --- a/go/vt/vtgate/planbuilder/operators/queryprojection.go +++ b/go/vt/vtgate/planbuilder/operators/queryprojection.go @@ -219,7 +219,10 @@ func createQPFromSelect(ctx *plancontext.PlanningContext, sel *sqlparser.Select) if !qp.HasAggr && sel.Having != nil { qp.HasAggr = containsAggr(sel.Having.Expr) } - qp.calculateDistinct(ctx) + + if err := qp.calculateDistinct(ctx); err != nil { + return nil, err + } return qp, nil } @@ -367,7 +370,10 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy canPushSorting := true es := &expressionSet{} for _, order := range orderBy { - simpleExpr := qp.GetSimplifiedExpr(order.Expr) + simpleExpr, err := qp.GetSimplifiedExpr(ctx, order.Expr) + if err != nil { + return err + } if sqlparser.IsNull(simpleExpr) { // ORDER BY null can safely be ignored continue @@ -385,9 +391,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy return nil } -func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { +func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) error { if qp.Distinct && !qp.HasAggr { - if qp.useGroupingOverDistinct(ctx) { + distinct, err := qp.useGroupingOverDistinct(ctx) + if err != nil { + return err + } + if distinct { // if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct. qp.Distinct = false } else { @@ -402,7 +412,7 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { } if !qp.Distinct || len(qp.groupByExprs) == 0 { - return + return nil } for _, gb := range qp.groupByExprs { @@ -414,24 +424,28 @@ func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { return getExpr }) if !found { - return + return nil } } // since we are returning all grouping expressions, we know the results are guaranteed to be unique qp.Distinct = false + return nil } func (qp *QueryProjection) addGroupBy(ctx *plancontext.PlanningContext, groupBy sqlparser.GroupBy) error { es := &expressionSet{} for _, group := range groupBy { selectExprIdx, aliasExpr := qp.FindSelectExprIndexForExpr(ctx, group) - simpleExpr := qp.GetSimplifiedExpr(group) - err := checkForInvalidGroupingExpressions(simpleExpr) + simpleExpr, err := qp.GetSimplifiedExpr(ctx, group) if err != nil { return err } + if err = checkForInvalidGroupingExpressions(simpleExpr); err != nil { + return err + } + if !es.add(ctx, simpleExpr) { continue } @@ -474,32 +488,61 @@ func (qp *QueryProjection) isExprInGroupByExprs(ctx *plancontext.PlanningContext } // GetSimplifiedExpr takes an expression used in ORDER BY or GROUP BY, and returns an expression that is simpler to evaluate -func (qp *QueryProjection) GetSimplifiedExpr(e sqlparser.Expr) sqlparser.Expr { +func (qp *QueryProjection) GetSimplifiedExpr(ctx *plancontext.PlanningContext, e sqlparser.Expr) (found sqlparser.Expr, err error) { if qp == nil { - return e + return e, nil } // If the ORDER BY is against a column alias, we need to remember the expression // behind the alias. The weightstring(.) calls needs to be done against that expression and not the alias. // Eg - select music.foo as bar, weightstring(music.foo) from music order by bar - colExpr, isColName := e.(*sqlparser.ColName) - if !(isColName && colExpr.Qualifier.IsEmpty()) { - // we are only interested in unqualified column names. if it's not a column name and not - return e + in, isColName := e.(*sqlparser.ColName) + if !(isColName && in.Qualifier.IsEmpty()) { + // we are only interested in unqualified column names. if it's not a column name and not unqualified, we're done + return e, nil + } + + check := func(e sqlparser.Expr) error { + if found != nil && !ctx.SemTable.EqualsExprWithDeps(found, e) { + return &semantics.AmbiguousColumnError{Column: sqlparser.String(in)} + } + found = e + return nil } for _, selectExpr := range qp.SelectExprs { - aliasedExpr, isAliasedExpr := selectExpr.Col.(*sqlparser.AliasedExpr) - if !isAliasedExpr { + ae, ok := selectExpr.Col.(*sqlparser.AliasedExpr) + if !ok { continue } - aliased := !aliasedExpr.As.IsEmpty() - if aliased && colExpr.Name.Equal(aliasedExpr.As) { - return aliasedExpr.Expr + aliased := !ae.As.IsEmpty() + if aliased { + if in.Name.Equal(ae.As) { + err = check(ae.Expr) + if err != nil { + return nil, err + } + } + } else { + seCol, ok := ae.Expr.(*sqlparser.ColName) + if !ok { + continue + } + if seCol.Name.Equal(in.Name) { + // If the column name matches, we have a match, even if the table name is not listed + err = check(ae.Expr) + if err != nil { + return nil, err + } + } } } - return e + if found == nil { + found = e + } + + return found, nil } // toString should only be used for tests @@ -865,18 +908,21 @@ func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.Plannin return false } -func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool { +func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) (bool, error) { if !qp.orderByOverlapWithSelectExpr(ctx) { - return false + return false, nil } var gbs []GroupBy for idx, selExpr := range qp.SelectExprs { ae, err := selExpr.GetAliasedExpr() if err != nil { // not an alias Expr, cannot continue forward. - return false + return false, nil + } + sExpr, err := qp.GetSimplifiedExpr(ctx, ae.Expr) + if err != nil { + return false, err } - sExpr := qp.GetSimplifiedExpr(ae.Expr) // check if the grouping already exists on that column. found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool { return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr) @@ -891,7 +937,7 @@ func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningCont gbs = append(gbs, groupBy) } qp.groupByExprs = append(qp.groupByExprs, gbs...) - return true + return true, nil } func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error { @@ -908,12 +954,6 @@ func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error { }, expr) } -func SortAggregations(a []Aggr) { - sort.Slice(a, func(i, j int) bool { - return CompareRefInt(a[i].Index, a[j].Index) - }) -} - func SortGrouping(a []GroupBy) { sort.Slice(a, func(i, j int) bool { return CompareRefInt(a[i].InnerIndex, a[j].InnerIndex) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 24ffbb0eee0..59920b5da98 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -3300,39 +3300,7 @@ { "comment": "scatter aggregate symtab lookup error", "query": "select id, b as id, count(*) from user order by id", - "plan": { - "QueryType": "SELECT", - "Original": "select id, b as id, count(*) from user order by id", - "Instructions": { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "(1|3) ASC", - "ResultColumns": 3, - "Inputs": [ - { - "OperatorType": "Aggregate", - "Variant": "Scalar", - "Aggregates": "any_value(0) AS id, any_value(1) AS id, sum_count_star(2) AS count(*), any_value(3)", - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select id, b as id, count(*), weight_string(b) from `user` where 1 != 1", - "Query": "select id, b as id, count(*), weight_string(b) from `user`", - "Table": "`user`" - } - ] - } - ] - }, - "TablesUsed": [ - "user.user" - ] - } + "plan": "Column 'id' in field list is ambiguous" }, { "comment": "aggr and non-aggr without group by (with query does not give useful result out)", @@ -3388,9 +3356,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select `user`.id, weight_string(id) from `user` where 1 != 1 group by id, weight_string(id)", + "FieldQuery": "select `user`.id, weight_string(`user`.id) from `user` where 1 != 1 group by id, weight_string(`user`.id)", "OrderBy": "(0|1) ASC", - "Query": "select `user`.id, weight_string(id) from `user` group by id, weight_string(id) order by id asc", + "Query": "select `user`.id, weight_string(`user`.id) from `user` group by id, weight_string(`user`.id) order by id asc", "Table": "`user`" }, { @@ -4768,34 +4736,7 @@ { "comment": "scatter aggregate with ambiguous aliases", "query": "select distinct a, b as a from user", - "plan": { - "QueryType": "SELECT", - "Original": "select distinct a, b as a from user", - "Instructions": { - "OperatorType": "Distinct", - "Collations": [ - "(0:2)", - "(1:2)" - ], - "ResultColumns": 2, - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select a, b as a, weight_string(b) from `user` where 1 != 1", - "Query": "select distinct a, b as a, weight_string(b) from `user`", - "Table": "`user`" - } - ] - }, - "TablesUsed": [ - "user.user" - ] - } + "plan": "Column 'a' in field list is ambiguous" }, { "comment": "scatter aggregate with complex select list (can't build order by)", diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index 03e509e21d9..f26cfc4f065 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -4787,5 +4787,48 @@ "user.samecolvin" ] } + }, + { + "comment": "column with qualifier is correctly used", + "query": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ", + "plan": { + "QueryType": "SELECT", + "Original": "select u.foo, ue.foo as apa from user u, user_extra ue order by foo ", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0,R:0", + "TableName": "`user`_user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select u.foo, weight_string(u.foo) from `user` as u where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select u.foo, weight_string(u.foo) from `user` as u order by foo asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.foo as apa from user_extra as ue where 1 != 1", + "Query": "select ue.foo as apa from user_extra as ue", + "Table": "user_extra" + } + ] + }, + "TablesUsed": [ + "user.user", + "user.user_extra" + ] + } } ] From 8ed8ee939cd79a0c80b631111950cb4112c5fe39 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Thu, 2 Nov 2023 10:44:16 +0100 Subject: [PATCH 2/2] fix: update DISTINCT behaviour Signed-off-by: Andres Taylor --- .../vtgate/planbuilder/operators/distinct.go | 3 +- .../planbuilder/testdata/aggr_cases.json | 29 ++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/distinct.go b/go/vt/vtgate/planbuilder/operators/distinct.go index d6f2767896a..88503514615 100644 --- a/go/vt/vtgate/planbuilder/operators/distinct.go +++ b/go/vt/vtgate/planbuilder/operators/distinct.go @@ -51,7 +51,8 @@ func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) { for idx, col := range columns { e, err := d.QP.GetSimplifiedExpr(ctx, col.Expr) if err != nil { - panic(err) + // ambiguous columns are not a problem for DISTINCT + e = col.Expr } var wsCol *int typ, _ := ctx.SemTable.TypeForExpr(e) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 59920b5da98..e47b17cd2ae 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -4736,7 +4736,34 @@ { "comment": "scatter aggregate with ambiguous aliases", "query": "select distinct a, b as a from user", - "plan": "Column 'a' in field list is ambiguous" + "plan": { + "QueryType": "SELECT", + "Original": "select distinct a, b as a from user", + "Instructions": { + "OperatorType": "Distinct", + "Collations": [ + "(0:2)", + "(1:3)" + ], + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b as a, weight_string(a), weight_string(b) from `user` where 1 != 1", + "Query": "select distinct a, b as a, weight_string(a), weight_string(b) from `user`", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } }, { "comment": "scatter aggregate with complex select list (can't build order by)",