Skip to content

Commit

Permalink
[release-18.0] planbuilder bugfix: expose columns through derived tab…
Browse files Browse the repository at this point in the history
…les (#14501) (#14504)

Co-authored-by: Andrés Taylor <[email protected]>
  • Loading branch information
vitess-bot[bot] and systay authored Nov 14, 2023
1 parent e128bb9 commit f9535b2
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@ import (
"fmt"
"testing"

"vitess.io/vitess/go/test/endtoend/utils"

"github.com/stretchr/testify/assert"

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/test/endtoend/utils"
)

func start(t *testing.T) (utils.MySQLCompare, func()) {
Expand Down Expand Up @@ -205,23 +203,31 @@ func TestMultipleSchemaPredicates(t *testing.T) {
require.Contains(t, err.Error(), "specifying two different database in the query is not supported")
}

func TestInfrSchemaAndUnionAll(t *testing.T) {
clusterInstance.VtGateExtraArgs = append(clusterInstance.VtGateExtraArgs, "--planner-version=gen4")
require.NoError(t,
clusterInstance.RestartVtgate())

vtConnParams := clusterInstance.GetVTParams(keyspaceName)
vtConnParams.DbName = keyspaceName
conn, err := mysql.Connect(context.Background(), &vtConnParams)
require.NoError(t, err)
func TestJoinWithSingleShardQueryOnRHS(t *testing.T) {
// This test checks that we can run queries like this, where the RHS is a single shard query
mcmp, closer := start(t)
defer closer()

for _, workload := range []string{"oltp", "olap"} {
t.Run(workload, func(t *testing.T) {
utils.Exec(t, conn, fmt.Sprintf("set workload = %s", workload))
utils.Exec(t, conn, "start transaction")
utils.Exec(t, conn, `select connection_id()`)
utils.Exec(t, conn, `(select 'corder' from t1 limit 1) union all (select 'customer' from t7_xxhash limit 1)`)
utils.Exec(t, conn, "rollback")
})
}
query := `SELECT
c.column_name as column_name,
c.data_type as data_type,
c.table_name as table_name,
c.table_schema as table_schema
FROM
information_schema.columns c
JOIN (
SELECT
table_name
FROM
information_schema.tables
WHERE
table_schema != 'information_schema'
LIMIT
1
) AS tables ON tables.table_name = c.table_name
ORDER BY
c.table_name`

res := utils.Exec(t, mcmp.VtConn, query)
require.NotEmpty(t, res.Rows)
}
8 changes: 7 additions & 1 deletion go/vt/vtgate/planbuilder/operators/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,13 @@ func (a *Aggregator) isDerived() bool {
return a.DT != nil
}

func (a *Aggregator) FindCol(ctx *plancontext.PlanningContext, in sqlparser.Expr, _ bool) (int, error) {
func (a *Aggregator) FindCol(ctx *plancontext.PlanningContext, in sqlparser.Expr, underRoute bool) (int, error) {
if underRoute && a.isDerived() {
// We don't want to use columns on this operator if it's a derived table under a route.
// In this case, we need to add a Projection on top of this operator to make the column available
return -1, nil
}

expr, err := a.DT.RewriteExpression(ctx, in)
if err != nil {
return 0, err
Expand Down
7 changes: 6 additions & 1 deletion go/vt/vtgate/planbuilder/operators/horizon.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,12 @@ func canReuseColumn[T any](
return
}

func (h *Horizon) FindCol(ctx *plancontext.PlanningContext, expr sqlparser.Expr, _ bool) (int, error) {
func (h *Horizon) FindCol(ctx *plancontext.PlanningContext, expr sqlparser.Expr, underRoute bool) (int, error) {
if underRoute && h.IsDerived() {
// We don't want to use columns on this operator if it's a derived table under a route.
// In this case, we need to add a Projection on top of this operator to make the column available
return -1, nil
}
for idx, se := range sqlparser.GetFirstSelect(h.Query).SelectExprs {
ae, ok := se.(*sqlparser.AliasedExpr)
if !ok {
Expand Down
6 changes: 3 additions & 3 deletions go/vt/vtgate/planbuilder/operators/route_planning.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,19 +375,19 @@ func mergeOrJoin(ctx *plancontext.PlanningContext, lhs, rhs ops.Operator, joinPr

if len(joinPredicates) > 0 && requiresSwitchingSides(ctx, rhs) {
if !inner {
return nil, nil, vterrors.VT12001("LEFT JOIN with derived tables")
return nil, nil, vterrors.VT12001("LEFT JOIN with LIMIT on the outer side")
}

if requiresSwitchingSides(ctx, lhs) {
return nil, nil, vterrors.VT12001("JOIN between derived tables")
return nil, nil, vterrors.VT12001("JOIN between derived tables with LIMIT")
}

join := NewApplyJoin(Clone(rhs), Clone(lhs), nil, !inner)
newOp, err := pushJoinPredicates(ctx, joinPredicates, join)
if err != nil {
return nil, nil, err
}
return newOp, rewrite.NewTree("logical join to applyJoin, switching side because derived table", newOp), nil
return newOp, rewrite.NewTree("logical join to applyJoin, switching side because LIMIT", newOp), nil
}

join := NewApplyJoin(Clone(lhs), Clone(rhs), nil, !inner)
Expand Down
48 changes: 48 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -5956,5 +5956,53 @@
"user.user"
]
}
},
{
"comment": "GROUP BY inside derived table on the RHS should not be a problem",
"query": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM user WHERE id = 143 GROUP BY 1) AS tables ON tables.table_name = c.table_name",
"plan": {
"QueryType": "SELECT",
"Original": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM user WHERE id = 143 GROUP BY 1) AS tables ON tables.table_name = c.table_name",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "R:0",
"JoinVars": {
"tables_table_name": 0
},
"TableName": "`user`_`user`",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "EqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select table_name from (select table_name from `user` where 1 != 1 group by table_name) as `tables` where 1 != 1",
"Query": "select table_name from (select table_name from `user` where id = 143 group by table_name) as `tables`",
"Table": "`user`",
"Values": [
"INT64(143)"
],
"Vindex": "user_index"
},
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select c.column_name from `user` as c where 1 != 1",
"Query": "select c.column_name from `user` as c where c.table_name = :tables_table_name",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]
42 changes: 42 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/info_schema57_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -1139,5 +1139,47 @@
"Table": "information_schema.apa"
}
}
},
{
"comment": "LIMIT 1 inside derived table on the RHS should not be a problem",
"query": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name",
"plan": {
"QueryType": "SELECT",
"Original": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "R:0",
"JoinVars": {
"tables_table_name": 0
},
"TableName": "information_schema.`tables`_information_schema.`columns`",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "DBA",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select table_name from (select table_name from information_schema.`tables` where 1 != 1) as `tables` where 1 != 1",
"Query": "select table_name from (select table_name from information_schema.`tables` where table_schema != 'information_schema' limit 1) as `tables`",
"Table": "information_schema.`tables`"
},
{
"OperatorType": "Route",
"Variant": "DBA",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select c.column_name from information_schema.`columns` as c where 1 != 1",
"Query": "select c.column_name from information_schema.`columns` as c where c.table_name = :c_table_name /* VARCHAR */",
"SysTableTableName": "[c_table_name::tables_table_name]",
"Table": "information_schema.`columns`"
}
]
}
}
}
]
42 changes: 42 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/info_schema80_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -1261,5 +1261,47 @@
"Table": "information_schema.apa"
}
}
},
{
"comment": "LIMIT 1 inside derived table on the RHS should not be a problem",
"query": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name",
"plan": {
"QueryType": "SELECT",
"Original": "SELECT c.column_name FROM information_schema.columns c JOIN ( SELECT table_name FROM information_schema.tables WHERE table_schema != 'information_schema' LIMIT 1 ) AS tables ON tables.table_name = c.table_name",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "R:0",
"JoinVars": {
"tables_table_name": 0
},
"TableName": "information_schema.`tables`_information_schema.`columns`",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "DBA",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select table_name from (select table_name from information_schema.`tables` where 1 != 1) as `tables` where 1 != 1",
"Query": "select table_name from (select table_name from information_schema.`tables` where table_schema != 'information_schema' limit 1) as `tables`",
"Table": "information_schema.`tables`"
},
{
"OperatorType": "Route",
"Variant": "DBA",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select c.column_name from information_schema.`columns` as c where 1 != 1",
"Query": "select c.column_name from information_schema.`columns` as c where c.table_name = :c_table_name /* VARCHAR */",
"SysTableTableName": "[c_table_name::tables_table_name]",
"Table": "information_schema.`columns`"
}
]
}
}
}
]
45 changes: 45 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4808,5 +4808,50 @@
"user.user_extra"
]
}
},
{
"comment": "Derived tables going to a single shard still need to expand derived table columns",
"query": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM unsharded LIMIT 1) AS tables ON tables.table_name = c.table_name",
"plan": {
"QueryType": "SELECT",
"Original": "SELECT c.column_name FROM user c JOIN (SELECT table_name FROM unsharded LIMIT 1) AS tables ON tables.table_name = c.table_name",
"Instructions": {
"OperatorType": "Join",
"Variant": "Join",
"JoinColumnIndexes": "R:0",
"JoinVars": {
"tables_table_name": 0
},
"TableName": "unsharded_`user`",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Unsharded",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select table_name from (select table_name from unsharded where 1 != 1) as `tables` where 1 != 1",
"Query": "select table_name from (select table_name from unsharded limit 1) as `tables`",
"Table": "unsharded"
},
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select c.column_name from `user` as c where 1 != 1",
"Query": "select c.column_name from `user` as c where c.table_name = :tables_table_name",
"Table": "`user`"
}
]
},
"TablesUsed": [
"main.unsharded",
"user.user"
]
}
}
]
4 changes: 2 additions & 2 deletions go/vt/vtgate/planbuilder/testdata/unsupported_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -482,12 +482,12 @@
{
"comment": "cant switch sides for outer joins",
"query": "select id from user left join (select user_id from user_extra limit 10) ue on user.id = ue.user_id",
"plan": "VT12001: unsupported: LEFT JOIN with derived tables"
"plan": "VT12001: unsupported: LEFT JOIN with LIMIT on the outer side"
},
{
"comment": "limit on both sides means that we can't evaluate this at all",
"query": "select id from (select id from user limit 10) u join (select user_id from user_extra limit 10) ue on u.id = ue.user_id",
"plan": "VT12001: unsupported: JOIN between derived tables"
"plan": "VT12001: unsupported: JOIN between derived tables with LIMIT"
},
{
"comment": "multi-shard union",
Expand Down

0 comments on commit f9535b2

Please sign in to comment.