From def38ba9055723beb993cfda2a2bd4c9416bb627 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:55:51 +0200 Subject: [PATCH] bugfix: Allow cross-keyspace joins (#16520) Signed-off-by: Andres Taylor --- .github/workflows/vitess_tester_vtgate.yml | 6 +- .../two_sharded_keyspaces/queries.test | 26 +++++++ .../two_sharded_keyspaces/vschema.json | 72 +++++++++++++++++++ .../planbuilder/operators/join_merging.go | 2 +- .../planbuilder/operators/sharded_routing.go | 8 ++- .../operators/subquery_planning.go | 2 +- .../planbuilder/testdata/from_cases.json | 49 +++++++++++++ .../testdata/unsupported_cases.json | 5 ++ test/templates/cluster_vitess_tester.tpl | 6 +- 9 files changed, 166 insertions(+), 10 deletions(-) create mode 100644 go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/queries.test create mode 100644 go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/vschema.json diff --git a/.github/workflows/vitess_tester_vtgate.yml b/.github/workflows/vitess_tester_vtgate.yml index 0c2965fee9c..36ebd0f100c 100644 --- a/.github/workflows/vitess_tester_vtgate.yml +++ b/.github/workflows/vitess_tester_vtgate.yml @@ -112,7 +112,7 @@ jobs: go install github.com/vitessio/go-junit-report@HEAD # install vitess tester - go install github.com/vitessio/vitess-tester@eb953122baba163ed8ccaa6642458ee984f5d7e4 + go install github.com/vitessio/vitess-tester@89dd933a9ea0e15f69ca58b9c8ea09a358762cca - name: Setup launchable dependencies if: steps.skip-workflow.outputs.is_draft == 'false' && steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' && github.base_ref == 'main' @@ -143,9 +143,9 @@ jobs: # We go over all the directories in the given path. # If there is a vschema file there, we use it, otherwise we let vitess-tester autogenerate it. if [ -f $dir/vschema.json ]; then - vitess-tester --sharded --xunit --test-dir $dir --vschema "$dir"vschema.json + vitess-tester --xunit --vschema "$dir"vschema.json $dir/*.test else - vitess-tester --sharded --xunit --test-dir $dir + vitess-tester --sharded --xunit $dir/*.test fi # Number the reports by changing their file names. mv report.xml report"$i".xml diff --git a/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/queries.test b/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/queries.test new file mode 100644 index 00000000000..f625333313a --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/queries.test @@ -0,0 +1,26 @@ +use customer; +create table if not exists customer( + customer_id bigint not null, + email varbinary(128), + primary key(customer_id) +) ENGINE=InnoDB; +insert into customer.customer(customer_id, email) values(1, '[alice@domain.com](mailto:alice@domain.com)'); +insert into customer.customer(customer_id, email) values(2, '[bob@domain.com](mailto:bob@domain.com)'); +insert into customer.customer(customer_id, email) values(3, '[charlie@domain.com](mailto:charlie@domain.com)'); +insert into customer.customer(customer_id, email) values(4, '[dan@domain.com](mailto:dan@domain.com)'); +insert into customer.customer(customer_id, email) values(5, '[eve@domain.com](mailto:eve@domain.com)'); +use corder; +create table if not exists corder( + order_id bigint not null, + customer_id bigint, + sku varbinary(128), + price bigint, + primary key(order_id) +) ENGINE=InnoDB; +insert into corder.corder(order_id, customer_id, sku, price) values(1, 1, 'SKU-1001', 100); +insert into corder.corder(order_id, customer_id, sku, price) values(2, 2, 'SKU-1002', 30); +insert into corder.corder(order_id, customer_id, sku, price) values(3, 3, 'SKU-1002', 30); +insert into corder.corder(order_id, customer_id, sku, price) values(4, 4, 'SKU-1002', 30); +insert into corder.corder(order_id, customer_id, sku, price) values(5, 5, 'SKU-1002', 30); + +select co.order_id, co.customer_id, co.price from corder.corder co left join customer.customer cu on co.customer_id=cu.customer_id where cu.customer_id=1; diff --git a/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/vschema.json b/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/vschema.json new file mode 100644 index 00000000000..5672042bace --- /dev/null +++ b/go/test/endtoend/vtgate/vitess_tester/two_sharded_keyspaces/vschema.json @@ -0,0 +1,72 @@ +{ + "keyspaces": { + "customer": { + "sharded": true, + "vindexes": { + "hash": { + "type": "hash", + "params": {}, + "owner": "" + } + }, + "tables": { + "customer": { + "type": "", + "column_vindexes": [ + { + "column": "customer_id", + "name": "hash", + "columns": [] + } + ], + "columns": [], + "pinned": "", + "column_list_authoritative": false, + "source": "" + } + }, + "require_explicit_routing": false, + "foreign_key_mode": 0, + "multi_tenant_spec": null + }, + "corder": { + "sharded": true, + "vindexes": { + "hash": { + "type": "hash", + "params": {}, + "owner": "" + } + }, + "tables": { + "corder": { + "type": "", + "column_vindexes": [ + { + "column": "customer_id", + "name": "hash", + "columns": [] + } + ], + "columns": [], + "pinned": "", + "column_list_authoritative": false, + "source": "" + } + }, + "require_explicit_routing": false, + "foreign_key_mode": 0, + "multi_tenant_spec": null + } + }, + "routing_rules": { + "rules": [] + }, + "shard_routing_rules": { + "rules": [] + }, + "keyspace_routing_rules": null, + "mirror_rules": { + "rules": [] + } +} \ No newline at end of file diff --git a/go/vt/vtgate/planbuilder/operators/join_merging.go b/go/vt/vtgate/planbuilder/operators/join_merging.go index 625ad34cd6c..38bc1e80832 100644 --- a/go/vt/vtgate/planbuilder/operators/join_merging.go +++ b/go/vt/vtgate/planbuilder/operators/join_merging.go @@ -64,7 +64,7 @@ func mergeJoinInputs(ctx *plancontext.PlanningContext, lhs, rhs Operator, joinPr // sharded routing is complex, so we handle it in a separate method case a == sharded && b == sharded: - return tryMergeJoinShardedRouting(ctx, lhsRoute, rhsRoute, m, joinPredicates) + return tryMergeShardedRouting(ctx, lhsRoute, rhsRoute, m, joinPredicates, false /*isSubquery*/) default: return nil diff --git a/go/vt/vtgate/planbuilder/operators/sharded_routing.go b/go/vt/vtgate/planbuilder/operators/sharded_routing.go index 1319b76f040..40532f729c3 100644 --- a/go/vt/vtgate/planbuilder/operators/sharded_routing.go +++ b/go/vt/vtgate/planbuilder/operators/sharded_routing.go @@ -634,11 +634,12 @@ func (tr *ShardedRouting) extraInfo() string { ) } -func tryMergeJoinShardedRouting( +func tryMergeShardedRouting( ctx *plancontext.PlanningContext, routeA, routeB *Route, m merger, joinPredicates []sqlparser.Expr, + isSubquery bool, ) *Route { sameKeyspace := routeA.Routing.Keyspace() == routeB.Routing.Keyspace() tblA := routeA.Routing.(*ShardedRouting) @@ -670,7 +671,10 @@ func tryMergeJoinShardedRouting( } if !sameKeyspace { - panic(vterrors.VT12001("cross-shard correlated subquery")) + if isSubquery { + panic(vterrors.VT12001("cross-shard correlated subquery")) + } + return nil } canMerge := canMergeOnFilters(ctx, routeA, routeB, joinPredicates) diff --git a/go/vt/vtgate/planbuilder/operators/subquery_planning.go b/go/vt/vtgate/planbuilder/operators/subquery_planning.go index cdc0b8b191a..1a9734159c0 100644 --- a/go/vt/vtgate/planbuilder/operators/subquery_planning.go +++ b/go/vt/vtgate/planbuilder/operators/subquery_planning.go @@ -722,7 +722,7 @@ func mergeSubqueryInputs(ctx *plancontext.PlanningContext, in, out Operator, joi // sharded routing is complex, so we handle it in a separate method case inner == sharded && outer == sharded: - return tryMergeJoinShardedRouting(ctx, inRoute, outRoute, m, joinPredicates) + return tryMergeShardedRouting(ctx, inRoute, outRoute, m, joinPredicates, true /*isSubquery*/) default: return nil diff --git a/go/vt/vtgate/planbuilder/testdata/from_cases.json b/go/vt/vtgate/planbuilder/testdata/from_cases.json index b9670910f10..222956bd430 100644 --- a/go/vt/vtgate/planbuilder/testdata/from_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/from_cases.json @@ -4522,6 +4522,55 @@ ] } }, + { + "comment": "Cross keyspace join", + "query": "select 1 from user join t1 on user.id = t1.id", + "plan": { + "QueryType": "SELECT", + "Original": "select 1 from user join t1 on user.id = t1.id", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0", + "JoinVars": { + "t1_id": 1 + }, + "TableName": "t1_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "zlookup_unique", + "Sharded": true + }, + "FieldQuery": "select 1, t1.id from t1 where 1 != 1", + "Query": "select 1, t1.id from t1", + "Table": "t1" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from `user` where 1 != 1", + "Query": "select 1 from `user` where `user`.id = :t1_id", + "Table": "`user`", + "Values": [ + ":t1_id" + ], + "Vindex": "user_index" + } + ] + }, + "TablesUsed": [ + "user.user", + "zlookup_unique.t1" + ] + } + }, { "comment": "Select everything from a derived table having a cross-shard join", "query": "select * from (select u.foo * ue.bar from user u join user_extra ue) as dt", diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json index 4af630ffc0f..046806573d9 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json @@ -294,6 +294,11 @@ "query": "select 1 from music union (select id from user union all select name from unsharded)", "plan": "VT12001: unsupported: nesting of UNIONs on the right-hand side" }, + { + "comment": "Cross keyspace query with subquery", + "query": "select 1 from user where id in (select id from t1)", + "plan": "VT12001: unsupported: cross-shard correlated subquery" + }, { "comment": "multi-shard union", "query": "select 1 from music union (select id from user union select name from unsharded)", diff --git a/test/templates/cluster_vitess_tester.tpl b/test/templates/cluster_vitess_tester.tpl index bd34c2de088..a2e2b14b765 100644 --- a/test/templates/cluster_vitess_tester.tpl +++ b/test/templates/cluster_vitess_tester.tpl @@ -110,7 +110,7 @@ jobs: go install github.com/vitessio/go-junit-report@HEAD # install vitess tester - go install github.com/vitessio/vitess-tester@eb953122baba163ed8ccaa6642458ee984f5d7e4 + go install github.com/vitessio/vitess-tester@89dd933a9ea0e15f69ca58b9c8ea09a358762cca - name: Setup launchable dependencies if: steps.skip-workflow.outputs.is_draft == 'false' && steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' && github.base_ref == 'main' @@ -141,9 +141,9 @@ jobs: # We go over all the directories in the given path. # If there is a vschema file there, we use it, otherwise we let vitess-tester autogenerate it. if [ -f $dir/vschema.json ]; then - vitess-tester --sharded --xunit --test-dir $dir --vschema "$dir"vschema.json + vitess-tester --xunit --vschema "$dir"vschema.json $dir/*.test else - vitess-tester --sharded --xunit --test-dir $dir + vitess-tester --sharded --xunit $dir/*.test fi # Number the reports by changing their file names. mv report.xml report"$i".xml