Skip to content

Commit

Permalink
VTGate/foreign keys stress test: add tests for 'standard' replica (#1…
Browse files Browse the repository at this point in the history
…4747)

Signed-off-by: Shlomi Noach <[email protected]>
  • Loading branch information
shlomi-noach authored Dec 21, 2023
1 parent 750d04c commit c91f57b
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 66 deletions.
6 changes: 4 additions & 2 deletions go/test/endtoend/onlineddl/revert/onlineddl_revert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,10 @@ func testRevertible(t *testing.T) {
fkOnlineDDLPossible := false
t.Run("check 'rename_table_preserve_foreign_key' variable", func(t *testing.T) {
// Online DDL is not possible on vanilla MySQL 8.0 for reasons described in https://vitess.io/blog/2021-06-15-online-ddl-why-no-fk/.
// However, Online DDL is made possible in via these changes: https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps1.
// However, Online DDL is made possible in via these changes:
// - https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// - https://github.com/planetscale/mysql-server/commit/c2f1344a6863518d749f2eb01a4c74ca08a5b889
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps3.
// Said changes introduce a new global/session boolean variable named 'rename_table_preserve_foreign_key'. It defaults 'false'/0 for backwards compatibility.
// When enabled, a `RENAME TABLE` to a FK parent "pins" the children's foreign keys to the table name rather than the table pointer. Which means after the RENAME,
// the children will point to the newly instated table rather than the original, renamed table.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2287,8 +2287,10 @@ func testForeignKeys(t *testing.T) {
fkOnlineDDLPossible := false
t.Run("check 'rename_table_preserve_foreign_key' variable", func(t *testing.T) {
// Online DDL is not possible on vanilla MySQL 8.0 for reasons described in https://vitess.io/blog/2021-06-15-online-ddl-why-no-fk/.
// However, Online DDL is made possible in via these changes: https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps1.
// However, Online DDL is made possible in via these changes:
// - https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// - https://github.com/planetscale/mysql-server/commit/c2f1344a6863518d749f2eb01a4c74ca08a5b889
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps3.
// Said changes introduce a new global/session boolean variable named 'rename_table_preserve_foreign_key'. It defaults 'false'/0 for backwards compatibility.
// When enabled, a `RENAME TABLE` to a FK parent "pins" the children's foreign keys to the table name rather than the table pointer. Which means after the RENAME,
// the children will point to the newly instated table rather than the original, renamed table.
Expand Down
171 changes: 120 additions & 51 deletions go/test/endtoend/vtgate/foreignkey/stress/fk_stress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ var (
clusterInstance *cluster.LocalProcessCluster
shards []cluster.Shard
primary *cluster.Vttablet
replica *cluster.Vttablet
replicaNoFK *cluster.Vttablet
replicaFK *cluster.Vttablet
vtParams mysql.ConnParams

onlineDDLStrategy = "vitess --unsafe-allow-foreign-keys --cut-over-threshold=15s"
Expand Down Expand Up @@ -350,7 +351,7 @@ func TestMain(m *testing.M) {
}

// We will use a replica to confirm that vtgate's cascading works correctly.
if err := clusterInstance.StartKeyspace(*keyspace, []string{"1"}, 1, false); err != nil {
if err := clusterInstance.StartKeyspace(*keyspace, []string{"1"}, 2, false); err != nil {
return 1, err
}

Expand Down Expand Up @@ -391,14 +392,23 @@ func tabletTestName(t *testing.T, tablet *cluster.Vttablet) string {
switch tablet {
case primary:
return "primary"
case replica:
return "replica"
case replicaNoFK:
return "replicaNoFK"
case replicaFK:
return "replicaFK"
default:
assert.FailNowf(t, "unknown tablet", "%v, type=%v", tablet.Alias, tablet.Type)
}
return ""
}

func validateReplicationIsHealthy(t *testing.T, tablet *cluster.Vttablet) (result bool) {
t.Run(tabletTestName(t, tablet), func(t *testing.T) {
result = cluster.ValidateReplicationIsHealthy(t, tablet)
})
return result
}

func getTabletPosition(t *testing.T, tablet *cluster.Vttablet) replication.Position {
rs := queryTablet(t, tablet, "select @@gtid_executed as gtid_executed", "")
row := rs.Named().Row()
Expand All @@ -410,17 +420,14 @@ func getTabletPosition(t *testing.T, tablet *cluster.Vttablet) replication.Posit
return pos
}

func waitForReplicaCatchup(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
primaryPos := getTabletPosition(t, primary)
func waitForReplicaCatchup(t *testing.T, ctx context.Context, replica *cluster.Vttablet, pos replication.Position) {
for {
replicaPos := getTabletPosition(t, replica)
if replicaPos.GTIDSet.Contains(primaryPos.GTIDSet) {
if replicaPos.GTIDSet.Contains(pos.GTIDSet) {
// success
return
}
if !cluster.ValidateReplicationIsHealthy(t, replica) {
if !validateReplicationIsHealthy(t, replica) {
assert.FailNow(t, "replication is broken; not waiting for catchup")
return
}
Expand All @@ -434,21 +441,41 @@ func waitForReplicaCatchup(t *testing.T) {
}
}

func waitForReplicationCatchup(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
primaryPos := getTabletPosition(t, primary)
var wg sync.WaitGroup
for _, replica := range []*cluster.Vttablet{replicaNoFK, replicaFK} {
replica := replica
wg.Add(1)
go func() {
waitForReplicaCatchup(t, ctx, replica, primaryPos)
wg.Done()
}()
}
wg.Wait()
}

func validateMetrics(t *testing.T, tcase *testCase) {
for _, workloadTable := range tableNames {
t.Run(workloadTable, func(t *testing.T) {
t.Run("fk errors", func(t *testing.T) {
testSelectTableFKErrors(t, workloadTable, tcase)
})
var primaryRows, replicaRows int64
var primaryRows, replicaNoFKRows, replicaFKRows int64
t.Run(tabletTestName(t, primary), func(t *testing.T) {
primaryRows = testSelectTableMetrics(t, primary, workloadTable, tcase)
})
t.Run(tabletTestName(t, replica), func(t *testing.T) {
replicaRows = testSelectTableMetrics(t, replica, workloadTable, tcase)
t.Run(tabletTestName(t, replicaNoFK), func(t *testing.T) {
replicaNoFKRows = testSelectTableMetrics(t, replicaNoFK, workloadTable, tcase)
})
t.Run(tabletTestName(t, replicaFK), func(t *testing.T) {
replicaFKRows = testSelectTableMetrics(t, replicaFK, workloadTable, tcase)
})
t.Run("compare primary and replica", func(t *testing.T) {
assert.Equal(t, primaryRows, replicaRows)
t.Run("compare primary and replicas", func(t *testing.T) {
assert.Equal(t, primaryRows, replicaNoFKRows)
assert.Equal(t, primaryRows, replicaFKRows)
})
})
}
Expand All @@ -457,12 +484,16 @@ func validateMetrics(t *testing.T, tcase *testCase) {
func TestInitialSetup(t *testing.T) {
shards = clusterInstance.Keyspaces[0].Shards
require.Equal(t, 1, len(shards))
require.Equal(t, 2, len(shards[0].Vttablets))
require.Equal(t, 3, len(shards[0].Vttablets)) // primary, no-fk replica, fk replica
primary = shards[0].Vttablets[0]
require.NotNil(t, primary)
replica = shards[0].Vttablets[1]
require.NotNil(t, replica)
require.NotEqual(t, primary.Alias, replica.Alias)
replicaNoFK = shards[0].Vttablets[1]
require.NotNil(t, replicaNoFK)
require.NotEqual(t, primary.Alias, replicaNoFK.Alias)
replicaFK = shards[0].Vttablets[2]
require.NotNil(t, replicaFK)
require.NotEqual(t, primary.Alias, replicaFK.Alias)
require.NotEqual(t, replicaNoFK.Alias, replicaFK.Alias)

reverseTableNames = slices.Clone(tableNames)
slices.Reverse(reverseTableNames)
Expand Down Expand Up @@ -497,7 +528,8 @@ func ExecuteFKTest(t *testing.T, tcase *testCase) {
workloadName = "workload"
}
testName := fmt.Sprintf("%s/del=%s/upd=%s", workloadName, referenceActionMap[tcase.onDeleteAction], referenceActionMap[tcase.onUpdateAction])
if tcase.onlineDDLTable != "" {
testOnlineDDL := (tcase.onlineDDLTable != "")
if testOnlineDDL {
testName = fmt.Sprintf("%s/ddl=%s", testName, tcase.onlineDDLTable)
}
if tcase.notes != "" {
Expand All @@ -524,7 +556,7 @@ func ExecuteFKTest(t *testing.T, tcase *testCase) {
baseSleepInterval := 15 * time.Millisecond
singleConnectionSleepIntervalNanoseconds := float64(baseSleepInterval.Nanoseconds()) * sleepModifier
sleepInterval := time.Duration(int64(singleConnectionSleepIntervalNanoseconds))
if tcase.onlineDDLTable != "" {
if testOnlineDDL {
sleepInterval = sleepInterval * 2
maxConcurrency = max(1, maxConcurrency/2)
}
Expand All @@ -543,7 +575,7 @@ func ExecuteFKTest(t *testing.T, tcase *testCase) {
}()
}

if tcase.onlineDDLTable != "" {
if testOnlineDDL {
t.Run("migrating", func(t *testing.T) {
// This only works on patched MySQL
hint := tcase.createTableHint
Expand Down Expand Up @@ -575,18 +607,21 @@ func ExecuteFKTest(t *testing.T, tcase *testCase) {
wg.Wait()
})
}
t.Run("wait for replica", func(t *testing.T) {
waitForReplicaCatchup(t)
t.Run("wait for replicas", func(t *testing.T) {
waitForReplicationCatchup(t)
})
validateTableDefinitions(t, testOnlineDDL)
t.Run("validate metrics", func(t *testing.T) {
validateMetrics(t, tcase)
})
t.Run("validate replication health", func(t *testing.T) {
cluster.ValidateReplicationIsHealthy(t, replica)
validateReplicationIsHealthy(t, replicaNoFK)
validateReplicationIsHealthy(t, replicaFK)
})
t.Run("validate fk", func(t *testing.T) {
testFKIntegrity(t, primary, tcase)
testFKIntegrity(t, replica, tcase)
testFKIntegrity(t, replicaNoFK, tcase)
testFKIntegrity(t, replicaFK, tcase)
})
})
}
Expand All @@ -595,20 +630,26 @@ func TestStressFK(t *testing.T) {
defer cluster.PanicHandler(t)

t.Run("validate replication health", func(t *testing.T) {
cluster.ValidateReplicationIsHealthy(t, replica)
validateReplicationIsHealthy(t, replicaNoFK)
validateReplicationIsHealthy(t, replicaFK)
})

runOnlineDDL := false
t.Run("check 'rename_table_preserve_foreign_key' variable", func(t *testing.T) {
// Online DDL is not possible on vanilla MySQL 8.0 for reasons described in https://vitess.io/blog/2021-06-15-online-ddl-why-no-fk/.
// However, Online DDL is made possible in via these changes: https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps1.
// Said changes introduce a new global/session boolean variable named 'rename_table_preserve_foreign_key'. It defaults 'false'/0 for backwards compatibility.
// When enabled, a `RENAME TABLE` to a FK parent "pins" the children's foreign keys to the table name rather than the table pointer. Which means after the RENAME,
// However, Online DDL is made possible in via these changes:
// - https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// - https://github.com/planetscale/mysql-server/commit/c2f1344a6863518d749f2eb01a4c74ca08a5b889
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps3.
// Said changes introduce a new behavior for `RENAME TABLE`. When at least two tables are being renamed in the statement,
// and when at least one table uses internal vitess naming, then a `RENAME TABLE` to a FK parent "pins" the children's
// foreign keys to the table name rather than the table pointer. Which means after the RENAME,
// the children will point to the newly instated table rather than the original, renamed table.
// (Note: this applies to a particular type of RENAME where we swap tables, see the above blog post).
// For FK children, the MySQL changes simply ignore any Vitess-internal table.
//
// The variable 'rename_table_preserve_foreign_key' serves as an indicator to the functionality's availability,
// and at this time changing its value does not change any behavior.
//
// In this stress test, we enable Online DDL if the variable 'rename_table_preserve_foreign_key' is present. The Online DDL mechanism will in turn
// query for this variable, and manipulate it, when starting the migration and when cutting over.
rs, err := primary.VttabletProcess.QueryTablet("show global variables like 'rename_table_preserve_foreign_key'", keyspaceName, false)
Expand Down Expand Up @@ -690,6 +731,47 @@ func TestStressFK(t *testing.T) {
}
}

func validateTableDefinitions(t *testing.T, afterOnlineDDL bool) {
t.Run("validate definitions", func(t *testing.T) {
for _, tableName := range []string{childTableName, child2TableName, grandchildTableName} {
t.Run(tableName, func(t *testing.T) {
childFKFollowedParentRenameMsg := "found traces of internal vitess table name, suggesting Online DDL on parent table caused this child table to follow the renames parent. 'rename_table_preserve_foreign_key' should have prevented this"
var primaryStmt string
t.Run(tabletTestName(t, primary), func(t *testing.T) {
primaryStmt = getCreateTableStatement(t, primary, tableName)
assert.NotEmpty(t, primaryStmt)
assert.Contains(t, primaryStmt, "CONSTRAINT")
assert.NotContainsf(t, primaryStmt, "_vrepl", childFKFollowedParentRenameMsg)
assert.NotContainsf(t, primaryStmt, "_vrp_", childFKFollowedParentRenameMsg)
})
t.Run(tabletTestName(t, replicaFK), func(t *testing.T) {
stmt := getCreateTableStatement(t, replicaFK, tableName)
assert.Contains(t, stmt, "CONSTRAINT")
assert.Equal(t, primaryStmt, stmt)
assert.NotContainsf(t, stmt, "_vrepl", childFKFollowedParentRenameMsg)
assert.NotContainsf(t, stmt, "_vrp_", childFKFollowedParentRenameMsg)
})
t.Run(tabletTestName(t, replicaNoFK), func(t *testing.T) {
stmt := getCreateTableStatement(t, replicaNoFK, tableName)
// replicaNoFK does not have foreign keys, for the purpose of testing VTGate's cascading
// of foreign key rules.
// However, if we run Online DDL, the table will be swapped at the end of the migration.
// We're not sure here exactly which table has been migrated. Was it this table's parent?
// Or this table itself? Or an unrelated table? In case of Online DDL we don't want to
// validate this replicas' schema, because it could be any one of several outcomes. And
// we don't even care how this replica's schema looks like after the migration. Ths
// schema was inconsistent with the Primary to begin with. We've already tested replicaFK
// for correctness of the schema.
if !afterOnlineDDL {
assert.NotContains(t, stmt, "CONSTRAINT")
assert.NotEqual(t, primaryStmt, stmt)
}
})
})
}
})
}

// createInitialSchema creates the tables from scratch, and drops the foreign key constraints on the replica.
func createInitialSchema(t *testing.T, tcase *testCase) {
ctx := context.Background()
Expand Down Expand Up @@ -735,8 +817,8 @@ func createInitialSchema(t *testing.T, tcase *testCase) {
require.Nil(t, err)
})
}
t.Run("wait for replica", func(t *testing.T) {
waitForReplicaCatchup(t)
t.Run("wait for replication", func(t *testing.T) {
waitForReplicationCatchup(t)
})
t.Run("validating tables: vttablet", func(t *testing.T) {
// Check if table is created. Checked on tablets.
Expand All @@ -763,25 +845,12 @@ func createInitialSchema(t *testing.T, tcase *testCase) {

t.Run("dropping foreign keys on replica", func(t *testing.T) {
for _, statement := range dropConstraintsStatements {
_ = queryTablet(t, replica, "set global super_read_only=0", "")
_ = queryTablet(t, replica, statement, "")
_ = queryTablet(t, replica, "set global super_read_only=1", "")
}
})
t.Run("validate definitions", func(t *testing.T) {
for _, tableName := range []string{childTableName, child2TableName, grandchildTableName} {
t.Run(tableName, func(t *testing.T) {
t.Run(tabletTestName(t, primary), func(t *testing.T) {
stmt := getCreateTableStatement(t, primary, tableName)
assert.Contains(t, stmt, "CONSTRAINT")
})
t.Run(tabletTestName(t, replica), func(t *testing.T) {
stmt := getCreateTableStatement(t, replica, tableName)
assert.NotContains(t, stmt, "CONSTRAINT")
})
})
_ = queryTablet(t, replicaNoFK, "set global super_read_only=0", "")
_ = queryTablet(t, replicaNoFK, statement, "")
_ = queryTablet(t, replicaNoFK, "set global super_read_only=1", "")
}
})
validateTableDefinitions(t, false)
}

// testOnlineDDLStatement runs an online DDL, ALTER statement
Expand Down
18 changes: 9 additions & 9 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -987,13 +987,11 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
if preserveFKSupported {
// This code is only applicable when MySQL supports the 'rename_table_preserve_foreign_key' variable. This variable
// does not exist in vanilla MySQL.
// See https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps1.
if _, err := renameConn.Conn.Exec(ctx, sqlEnablePreserveForeignKey, 1, false); err != nil {
return err
}
log.Infof("@@rename_table_preserve_foreign_key enabled")
defer renameConn.Conn.Exec(ctx, sqlDisablePreserveForeignKey, 1, false)
// See
// - https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// - https://github.com/planetscale/mysql-server/commit/c2f1344a6863518d749f2eb01a4c74ca08a5b889
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps3.
log.Infof("@@rename_table_preserve_foreign_key supported")
}

renameQuery := sqlparser.BuildParsedQuery(sqlSwapTables, onlineDDL.Table, sentryTableName, vreplTable, onlineDDL.Table, sentryTableName, vreplTable)
Expand Down Expand Up @@ -3585,8 +3583,10 @@ func (e *Executor) readVReplStream(ctx context.Context, uuid string, okIfMissing

// isPreserveForeignKeySupported checks if the underlying MySQL server supports 'rename_table_preserve_foreign_key'
// Online DDL is not possible on vanilla MySQL 8.0 for reasons described in https://vitess.io/blog/2021-06-15-online-ddl-why-no-fk/.
// However, Online DDL is made possible in via these changes: https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps1.
// However, Online DDL is made possible in via these changes:
// - https://github.com/planetscale/mysql-server/commit/bb777e3e86387571c044fb4a2beb4f8c60462ced
// - https://github.com/planetscale/mysql-server/commit/c2f1344a6863518d749f2eb01a4c74ca08a5b889
// as part of https://github.com/planetscale/mysql-server/releases/tag/8.0.34-ps3.
// Said changes introduce a new global/session boolean variable named 'rename_table_preserve_foreign_key'. It defaults 'false'/0 for backwards compatibility.
// When enabled, a `RENAME TABLE` to a FK parent "pins" the children's foreign keys to the table name rather than the table pointer. Which means after the RENAME,
// the children will point to the newly instated table rather than the original, renamed table.
Expand Down
2 changes: 0 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,6 @@ const (
sqlAnalyzeTable = "ANALYZE NO_WRITE_TO_BINLOG TABLE `%a`"
sqlShowCreateTable = "SHOW CREATE TABLE `%a`"
sqlShowVariablesLikePreserveForeignKey = "show global variables like 'rename_table_preserve_foreign_key'"
sqlEnablePreserveForeignKey = "set @@rename_table_preserve_foreign_key = 1"
sqlDisablePreserveForeignKey = "set @@rename_table_preserve_foreign_key = 0"
sqlShowVariablesLikeFastAnalyzeTable = "show global variables like 'fast_analyze_table'"
sqlEnableFastAnalyzeTable = "set @@fast_analyze_table = 1"
sqlDisableFastAnalyzeTable = "set @@fast_analyze_table = 0"
Expand Down

0 comments on commit c91f57b

Please sign in to comment.