Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-18.0] Improve Schema Engine's TablesWithSize80 query (#17066) #17088

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions go/mysql/flavor_mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,26 +310,45 @@ WHERE t.table_schema = database()
GROUP BY t.table_name, t.table_type, t.create_time, t.table_comment`

// TablesWithSize80 is a query to select table along with size for mysql 8.0
//
// Note the following:
// - We use a single query to fetch both partitioned and non-partitioned tables. This is because
// accessing `information_schema.innodb_tablespaces` is expensive on servers with many tablespaces,
// and every query that loads the table needs to perform full table scans on it. Doing a single
// table scan is more efficient than doing more than one.
// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to do early pruning before the JOIN.
// - `TABLES`.`TABLE_NAME` has `utf8mb4_0900_ai_ci` collation. `INNODB_TABLESPACES`.`NAME` has `utf8mb3_general_ci`.
// We normalize the collation to get better query performance (we force the casting at the time of our choosing)
// - InnoDB has different table names than MySQL does, in particular for partitioned tables. As far as InnoDB
// is concerned, each partition is its own table.
// - We use a `UNION ALL` approach to handle two distinct scenarios: tables that are partitioned and those that are not.
// Since we `LEFT JOIN` from `TABLES` to `INNODB_TABLESPACES`, we know we already do full table scan on `TABLES`. We therefore
// don't mind spending some extra computation time (as in `CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci`)
// to make things easier for the JOIN.
// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to tell if the table is partitioned or not. The column
// may be `NULL` or may have multiple attributes, one of which is "partitioned", which we are looking for.
// - In a partitioned table, InnoDB will return multiple rows for the same table name, one for each partition, which we successively SUM.
// We also `SUM` the sizes in the non-partitioned case. This is not because we need to, but because it makes the query
// symmetric and less prone to future edit errors.
const TablesWithSize80 = `SELECT t.table_name,
t.table_type,
UNIX_TIMESTAMP(t.create_time),
t.table_comment,
SUM(i.file_size),
SUM(i.allocated_size)
FROM information_schema.tables t
LEFT JOIN information_schema.innodb_tablespaces i
ON i.name LIKE CONCAT(t.table_schema, '/', t.table_name, IF(t.create_options <=> 'partitioned', '#p#%', '')) COLLATE utf8mb3_general_ci
LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i
ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci
WHERE
t.table_schema = database()
t.table_schema = database() AND IFNULL(t.create_options, '') NOT LIKE '%partitioned%'
GROUP BY
t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment
UNION ALL
SELECT t.table_name,
t.table_type,
UNIX_TIMESTAMP(t.create_time),
t.table_comment,
SUM(i.file_size),
SUM(i.allocated_size)
FROM information_schema.tables t
LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i
ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci)
WHERE
t.table_schema = database() AND t.create_options LIKE '%partitioned%'
GROUP BY
t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment
`
Expand Down
14 changes: 9 additions & 5 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er

migrationCutOverThreshold := getMigrationCutOverThreshold(onlineDDL)

waitForPos := func(s *VReplStream, pos replication.Position) error {
ctx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
waitForPos := func(s *VReplStream, pos replication.Position, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// Wait for target to reach the up-to-date pos
if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, s.id, replication.EncodePosition(pos)); err != nil {
Expand Down Expand Up @@ -850,8 +850,12 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er
return err
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-sentry pos: %v", replication.EncodePosition(postSentryPos))
if err := waitForPos(s, postSentryPos); err != nil {
return err
// We have not yet locked anything, stopped anything, or done anything that otherwise
// impacts query serving so we wait for a multiple of the cutover threshold here, with
// that variable primarily serving to limit the max time we later spend waiting for
// a position again AFTER we've taken the locks and table access is blocked.
if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*3); err != nil {
return vterrors.Wrapf(err, "failed waiting for pos after sentry creation")
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached")
}
Expand Down Expand Up @@ -1005,7 +1009,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er
}

e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-lock pos: %v", replication.EncodePosition(postWritesPos))
if err := waitForPos(s, postWritesPos); err != nil {
if err := waitForPos(s, postWritesPos, migrationCutOverThreshold); err != nil {
e.updateMigrationStage(ctx, onlineDDL.UUID, "timeout while waiting for post-lock pos: %v", err)
return err
}
Expand Down
Loading