From d60f6ed3d1a3099c4fe80d92604af5230cdbc27d Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 18:41:19 +0530 Subject: [PATCH] use aggregation engine over distinct engine when overlapping order by (#14359) Signed-off-by: Harshit Gangal --- .../planbuilder/operators/queryprojection.go | 48 ++++++++- .../planbuilder/testdata/oltp_cases.json | 33 +++--- .../testdata/postprocess_cases.json | 100 ++++++++++++++++++ 3 files changed, 159 insertions(+), 22 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/queryprojection.go b/go/vt/vtgate/planbuilder/operators/queryprojection.go index 50bbf3e1720..f15d3642312 100644 --- a/go/vt/vtgate/planbuilder/operators/queryprojection.go +++ b/go/vt/vtgate/planbuilder/operators/queryprojection.go @@ -388,8 +388,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) { if qp.Distinct && !qp.HasAggr { - // grouping and distinct both lead to unique results, so we don't need - qp.groupByExprs = nil + if qp.useGroupingOverDistinct(ctx) { + // if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct. + qp.Distinct = false + } else { + // grouping and distinct both lead to unique results, so we don't need + qp.groupByExprs = nil + } } if qp.HasAggr && len(qp.groupByExprs) == 0 { @@ -851,6 +856,45 @@ func (qp *QueryProjection) GetColumnCount() int { return len(qp.SelectExprs) - qp.AddedColumn } +func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.PlanningContext) bool { + for _, expr := range qp.OrderExprs { + idx, _ := qp.FindSelectExprIndexForExpr(ctx, expr.SimplifiedExpr) + if idx != nil { + return true + } + } + return false +} + +func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool { + if !qp.orderByOverlapWithSelectExpr(ctx) { + return false + } + var gbs []GroupBy + for idx, selExpr := range qp.SelectExprs { + ae, err := selExpr.GetAliasedExpr() + if err != nil { + // not an alias Expr, cannot continue forward. + return false + } + sExpr := qp.GetSimplifiedExpr(ae.Expr) + // check if the grouping already exists on that column. + found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool { + return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr) + }) + if found != -1 { + continue + } + groupBy := NewGroupBy(ae.Expr, sExpr, ae) + selectExprIdx := idx + groupBy.InnerIndex = &selectExprIdx + + gbs = append(gbs, groupBy) + } + qp.groupByExprs = append(qp.groupByExprs, gbs...) + return true +} + func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error { return sqlparser.Walk(func(node sqlparser.SQLNode) (bool, error) { if _, isAggregate := node.(sqlparser.AggrFunc); isAggregate { diff --git a/go/vt/vtgate/planbuilder/testdata/oltp_cases.json b/go/vt/vtgate/planbuilder/testdata/oltp_cases.json index 810f58b4ea9..b366f1a8f04 100644 --- a/go/vt/vtgate/planbuilder/testdata/oltp_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/oltp_cases.json @@ -106,28 +106,21 @@ "QueryType": "SELECT", "Original": "SELECT DISTINCT c FROM sbtest30 WHERE id BETWEEN 1 AND 10 ORDER BY c", "Instructions": { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "0 ASC COLLATE latin1_swedish_ci", + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "0 COLLATE latin1_swedish_ci", "Inputs": [ { - "OperatorType": "Distinct", - "Collations": [ - "0: latin1_swedish_ci" - ], - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "main", - "Sharded": true - }, - "FieldQuery": "select c from sbtest30 where 1 != 1", - "Query": "select distinct c from sbtest30 where id between 1 and 10", - "Table": "sbtest30" - } - ] + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "main", + "Sharded": true + }, + "FieldQuery": "select c from sbtest30 where 1 != 1 group by c", + "OrderBy": "0 ASC COLLATE latin1_swedish_ci", + "Query": "select c from sbtest30 where id between 1 and 10 group by c order by c asc", + "Table": "sbtest30" } ] }, diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json index 3560b0f323f..e55311cf98f 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json @@ -2081,5 +2081,105 @@ "user.user" ] } + }, + { + "comment": "distinct with order by using aggregation engine", + "query": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc", + "plan": { + "QueryType": "SELECT", + "Original": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "0", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col from `user` where 1 != 1 group by col", + "OrderBy": "0 ASC", + "Query": "select col from `user` where id between :vtg1 and :vtg2 group by col order by col asc", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "distinct with order by having additional non-order by columns in the selection using aggregation engine", + "query": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc", + "plan": { + "QueryType": "SELECT", + "Original": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "1, (0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1 group by col, foo, weight_string(foo)", + "OrderBy": "1 ASC, (0|2) ASC", + "Query": "select foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2 group by col, foo, weight_string(foo) order by col asc, foo asc", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "distinct with order by having no overalap with the selection columns - using distinct engine", + "query": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc", + "plan": { + "QueryType": "SELECT", + "Original": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc", + "Instructions": { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "1 ASC", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Distinct", + "Collations": [ + "(0:2)", + "1" + ], + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1", + "Query": "select distinct foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2", + "Table": "`user`" + } + ] + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } } ]