From f9535b233b782a194ac0b3103595e841d052bca1 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Tue, 14 Nov 2023 08:18:30 +0100 Subject: [PATCH] [release-18.0] planbuilder bugfix: expose columns through derived tables (#14501) (#14504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andrés Taylor --- .../informationschema_test.go | 48 +++++++++++-------- .../planbuilder/operators/aggregator.go | 8 +++- go/vt/vtgate/planbuilder/operators/horizon.go | 7 ++- .../planbuilder/operators/route_planning.go | 6 +-- .../planbuilder/testdata/aggr_cases.json | 48 +++++++++++++++++++ .../testdata/info_schema57_cases.json | 42 ++++++++++++++++ .../testdata/info_schema80_cases.json | 42 ++++++++++++++++ .../planbuilder/testdata/select_cases.json | 45 +++++++++++++++++ .../testdata/unsupported_cases.json | 4 +- 9 files changed, 222 insertions(+), 28 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/informationschema/informationschema_test.go b/go/test/endtoend/vtgate/queries/informationschema/informationschema_test.go index 337ec3d2ff9..7a2d265ed57 100644 --- a/go/test/endtoend/vtgate/queries/informationschema/informationschema_test.go +++ b/go/test/endtoend/vtgate/queries/informationschema/informationschema_test.go @@ -21,14 +21,12 @@ import ( "fmt" "testing" - "vitess.io/vitess/go/test/endtoend/utils" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/test/endtoend/cluster" + "vitess.io/vitess/go/test/endtoend/utils" ) func start(t *testing.T) (utils.MySQLCompare, func()) { @@ -205,23 +203,31 @@ func TestMultipleSchemaPredicates(t *testing.T) { require.Contains(t, err.Error(), "specifying two different database in the query is not supported") } -func TestInfrSchemaAndUnionAll(t *testing.T) { - clusterInstance.VtGateExtraArgs = append(clusterInstance.VtGateExtraArgs, "--planner-version=gen4") - require.NoError(t, - clusterInstance.RestartVtgate()) - - vtConnParams := clusterInstance.GetVTParams(keyspaceName) - vtConnParams.DbName = keyspaceName - conn, err := mysql.Connect(context.Background(), &vtConnParams) - require.NoError(t, err) +func TestJoinWithSingleShardQueryOnRHS(t *testing.T) { + // This test checks that we can run queries like this, where the RHS is a single shard query + mcmp, closer := start(t) + defer closer() - for _, workload := range []string{"oltp", "olap"} { - t.Run(workload, func(t *testing.T) { - utils.Exec(t, conn, fmt.Sprintf("set workload = %s", workload)) - utils.Exec(t, conn, "start transaction") - utils.Exec(t, conn, `select connection_id()`) - utils.Exec(t, conn, `(select 'corder' from t1 limit 1) union all (select 'customer' from t7_xxhash limit 1)`) - utils.Exec(t, conn, "rollback") - }) - } + query := `SELECT + c.column_name as column_name, + c.data_type as data_type, + c.table_name as table_name, + c.table_schema as table_schema +FROM + information_schema.columns c + JOIN ( + SELECT + table_name + FROM + information_schema.tables + WHERE + table_schema != 'information_schema' + LIMIT + 1 + ) AS tables ON tables.table_name = c.table_name +ORDER BY + c.table_name` + + res := utils.Exec(t, mcmp.VtConn, query) + require.NotEmpty(t, res.Rows) } diff --git a/go/vt/vtgate/planbuilder/operators/aggregator.go b/go/vt/vtgate/planbuilder/operators/aggregator.go index b572e05fb45..09c7e6159b8 100644 --- a/go/vt/vtgate/planbuilder/operators/aggregator.go +++ b/go/vt/vtgate/planbuilder/operators/aggregator.go @@ -124,7 +124,13 @@ func (a *Aggregator) isDerived() bool { return a.DT != nil } -func (a *Aggregator) FindCol(ctx *plancontext.PlanningContext, in sqlparser.Expr, _ bool) (int, error) { +func (a *Aggregator) FindCol(ctx *plancontext.PlanningContext, in sqlparser.Expr, underRoute bool) (int, error) { + if underRoute && a.isDerived() { + // We don't want to use columns on this operator if it's a derived table under a route. + // In this case, we need to add a Projection on top of this operator to make the column available + return -1, nil + } + expr, err := a.DT.RewriteExpression(ctx, in) if err != nil { return 0, err diff --git a/go/vt/vtgate/planbuilder/operators/horizon.go b/go/vt/vtgate/planbuilder/operators/horizon.go index 2f9d574d99d..1b614e0c051 100644 --- a/go/vt/vtgate/planbuilder/operators/horizon.go +++ b/go/vt/vtgate/planbuilder/operators/horizon.go @@ -151,7 +151,12 @@ func canReuseColumn[T any]( return } -func (h *Horizon) FindCol(ctx *plancontext.PlanningContext, expr sqlparser.Expr, _ bool) (int, error) { +func (h *Horizon) FindCol(ctx *plancontext.PlanningContext, expr sqlparser.Expr, underRoute bool) (int, error) { + if underRoute && h.IsDerived() { + // We don't want to use columns on this operator if it's a derived table under a route. + // In this case, we need to add a Projection on top of this operator to make the column available + return -1, nil + } for idx, se := range sqlparser.GetFirstSelect(h.Query).SelectExprs { ae, ok := se.(*sqlparser.AliasedExpr) if !ok { diff --git a/go/vt/vtgate/planbuilder/operators/route_planning.go b/go/vt/vtgate/planbuilder/operators/route_planning.go index 720f2f56480..9a4940173e4 100644 --- a/go/vt/vtgate/planbuilder/operators/route_planning.go +++ b/go/vt/vtgate/planbuilder/operators/route_planning.go @@ -375,11 +375,11 @@ func mergeOrJoin(ctx *plancontext.PlanningContext, lhs, rhs ops.Operator, joinPr if len(joinPredicates) > 0 && requiresSwitchingSides(ctx, rhs) { if !inner { - return nil, nil, vterrors.VT12001("LEFT JOIN with derived tables") + return nil, nil, vterrors.VT12001("LEFT JOIN with LIMIT on the outer side") } if requiresSwitchingSides(ctx, lhs) { - return nil, nil, vterrors.VT12001("JOIN between derived tables") + return nil, nil, vterrors.VT12001("JOIN between derived tables with LIMIT") } join := NewApplyJoin(Clone(rhs), Clone(lhs), nil, !inner) @@ -387,7 +387,7 @@ func mergeOrJoin(ctx *plancontext.PlanningContext, lhs, rhs ops.Operator, joinPr if err != nil { return nil, nil, err } - return newOp, rewrite.NewTree("logical join to applyJoin, switching side because derived table", newOp), nil + return newOp, rewrite.NewTree("logical join to applyJoin, switching side because LIMIT", newOp), nil } join := NewApplyJoin(Clone(lhs), Clone(rhs), nil, !inner) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 0d1230456b7..31229df276e 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -5956,5 +5956,53 @@ "user.user" ] } + }, + { + "comment": "GROUP BY inside derived table on the RHS should not be a problem", + "query": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM user WHERE id = 143 GROUP BY 1) AS tables ON tables.table_name = c.table_name", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM user WHERE id = 143 GROUP BY 1) AS tables ON tables.table_name = c.table_name", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "R:0", + "JoinVars": { + "tables_table_name": 0 + }, + "TableName": "`user`_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select table_name from (select table_name from `user` where 1 != 1 group by table_name) as `tables` where 1 != 1", + "Query": "select table_name from (select table_name from `user` where id = 143 group by table_name) as `tables`", + "Table": "`user`", + "Values": [ + "INT64(143)" + ], + "Vindex": "user_index" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select c.column_name from `user` as c where 1 != 1", + "Query": "select c.column_name from `user` as c where c.table_name = :tables_table_name", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/info_schema57_cases.json b/go/vt/vtgate/planbuilder/testdata/info_schema57_cases.json index aec230c8b3a..7380b203cdd 100644 --- a/go/vt/vtgate/planbuilder/testdata/info_schema57_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/info_schema57_cases.json @@ -1139,5 +1139,47 @@ "Table": "information_schema.apa" } } + }, + { + "comment": "LIMIT 1 inside derived table on the RHS should not be a problem", + "query": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "R:0", + "JoinVars": { + "tables_table_name": 0 + }, + "TableName": "information_schema.`tables`_information_schema.`columns`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "DBA", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select table_name from (select table_name from information_schema.`tables` where 1 != 1) as `tables` where 1 != 1", + "Query": "select table_name from (select table_name from information_schema.`tables` where table_schema != 'information_schema' limit 1) as `tables`", + "Table": "information_schema.`tables`" + }, + { + "OperatorType": "Route", + "Variant": "DBA", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select c.column_name from information_schema.`columns` as c where 1 != 1", + "Query": "select c.column_name from information_schema.`columns` as c where c.table_name = :c_table_name /* VARCHAR */", + "SysTableTableName": "[c_table_name::tables_table_name]", + "Table": "information_schema.`columns`" + } + ] + } + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/info_schema80_cases.json b/go/vt/vtgate/planbuilder/testdata/info_schema80_cases.json index 13a503a4eb8..7322b1a4ad2 100644 --- a/go/vt/vtgate/planbuilder/testdata/info_schema80_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/info_schema80_cases.json @@ -1261,5 +1261,47 @@ "Table": "information_schema.apa" } } + }, + { + "comment": "LIMIT 1 inside derived table on the RHS should not be a problem", + "query": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "R:0", + "JoinVars": { + "tables_table_name": 0 + }, + "TableName": "information_schema.`tables`_information_schema.`columns`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "DBA", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select table_name from (select table_name from information_schema.`tables` where 1 != 1) as `tables` where 1 != 1", + "Query": "select table_name from (select table_name from information_schema.`tables` where table_schema != 'information_schema' limit 1) as `tables`", + "Table": "information_schema.`tables`" + }, + { + "OperatorType": "Route", + "Variant": "DBA", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select c.column_name from information_schema.`columns` as c where 1 != 1", + "Query": "select c.column_name from information_schema.`columns` as c where c.table_name = :c_table_name /* VARCHAR */", + "SysTableTableName": "[c_table_name::tables_table_name]", + "Table": "information_schema.`columns`" + } + ] + } + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index 376278ca68f..34ceadfa181 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -4808,5 +4808,50 @@ "user.user_extra" ] } + }, + { + "comment": "Derived tables going to a single shard still need to expand derived table columns", + "query": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM unsharded LIMIT 1) AS tables ON tables.table_name = c.table_name", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM unsharded LIMIT 1) AS tables ON tables.table_name = c.table_name", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "R:0", + "JoinVars": { + "tables_table_name": 0 + }, + "TableName": "unsharded_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Unsharded", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select table_name from (select table_name from unsharded where 1 != 1) as `tables` where 1 != 1", + "Query": "select table_name from (select table_name from unsharded limit 1) as `tables`", + "Table": "unsharded" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select c.column_name from `user` as c where 1 != 1", + "Query": "select c.column_name from `user` as c where c.table_name = :tables_table_name", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "main.unsharded", + "user.user" + ] + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json index 109cec4d241..e3d2f0be11b 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json @@ -482,12 +482,12 @@ { "comment": "cant switch sides for outer joins", "query": "select id from user left join (select user_id from user_extra limit 10) ue on user.id = ue.user_id", - "plan": "VT12001: unsupported: LEFT JOIN with derived tables" + "plan": "VT12001: unsupported: LEFT JOIN with LIMIT on the outer side" }, { "comment": "limit on both sides means that we can't evaluate this at all", "query": "select id from (select id from user limit 10) u join (select user_id from user_extra limit 10) ue on u.id = ue.user_id", - "plan": "VT12001: unsupported: JOIN between derived tables" + "plan": "VT12001: unsupported: JOIN between derived tables with LIMIT" }, { "comment": "multi-shard union",