Skip to content

Commit

Permalink
[release-19.0] Improve Schema Engine's TablesWithSize80 query (#17066) (
Browse files Browse the repository at this point in the history
#17089)

Signed-off-by: Shlomi Noach <[email protected]>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
Co-authored-by: Shlomi Noach <[email protected]>
  • Loading branch information
vitess-bot[bot] and shlomi-noach authored Oct 31, 2024
1 parent 5c08da6 commit 0e83212
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 14 deletions.
37 changes: 28 additions & 9 deletions go/mysql/flavor_mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,26 +337,45 @@ WHERE t.table_schema = database()
GROUP BY t.table_name, t.table_type, t.create_time, t.table_comment`

// TablesWithSize80 is a query to select table along with size for mysql 8.0
//
// Note the following:
// - We use a single query to fetch both partitioned and non-partitioned tables. This is because
// accessing `information_schema.innodb_tablespaces` is expensive on servers with many tablespaces,
// and every query that loads the table needs to perform full table scans on it. Doing a single
// table scan is more efficient than doing more than one.
// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to do early pruning before the JOIN.
// - `TABLES`.`TABLE_NAME` has `utf8mb4_0900_ai_ci` collation. `INNODB_TABLESPACES`.`NAME` has `utf8mb3_general_ci`.
// We normalize the collation to get better query performance (we force the casting at the time of our choosing)
// - InnoDB has different table names than MySQL does, in particular for partitioned tables. As far as InnoDB
// is concerned, each partition is its own table.
// - We use a `UNION ALL` approach to handle two distinct scenarios: tables that are partitioned and those that are not.
// Since we `LEFT JOIN` from `TABLES` to `INNODB_TABLESPACES`, we know we already do full table scan on `TABLES`. We therefore
// don't mind spending some extra computation time (as in `CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci`)
// to make things easier for the JOIN.
// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to tell if the table is partitioned or not. The column
// may be `NULL` or may have multiple attributes, one of which is "partitioned", which we are looking for.
// - In a partitioned table, InnoDB will return multiple rows for the same table name, one for each partition, which we successively SUM.
// We also `SUM` the sizes in the non-partitioned case. This is not because we need to, but because it makes the query
// symmetric and less prone to future edit errors.
const TablesWithSize80 = `SELECT t.table_name,
t.table_type,
UNIX_TIMESTAMP(t.create_time),
t.table_comment,
SUM(i.file_size),
SUM(i.allocated_size)
FROM information_schema.tables t
LEFT JOIN information_schema.innodb_tablespaces i
ON i.name LIKE CONCAT(t.table_schema, '/', t.table_name, IF(t.create_options <=> 'partitioned', '#p#%', '')) COLLATE utf8mb3_general_ci
LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i
ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci
WHERE
t.table_schema = database()
t.table_schema = database() AND IFNULL(t.create_options, '') NOT LIKE '%partitioned%'
GROUP BY
t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment
UNION ALL
SELECT t.table_name,
t.table_type,
UNIX_TIMESTAMP(t.create_time),
t.table_comment,
SUM(i.file_size),
SUM(i.allocated_size)
FROM information_schema.tables t
LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i
ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci)
WHERE
t.table_schema = database() AND t.create_options LIKE '%partitioned%'
GROUP BY
t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment
`
Expand Down
14 changes: 9 additions & 5 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,8 +895,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh

migrationCutOverThreshold := getMigrationCutOverThreshold(onlineDDL)

waitForPos := func(s *VReplStream, pos replication.Position) error {
ctx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
waitForPos := func(s *VReplStream, pos replication.Position, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// Wait for target to reach the up-to-date pos
if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, s.id, replication.EncodePosition(pos)); err != nil {
Expand Down Expand Up @@ -954,8 +954,12 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
return err
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-sentry pos: %v", replication.EncodePosition(postSentryPos))
if err := waitForPos(s, postSentryPos); err != nil {
return err
// We have not yet locked anything, stopped anything, or done anything that otherwise
// impacts query serving so we wait for a multiple of the cutover threshold here, with
// that variable primarily serving to limit the max time we later spend waiting for
// a position again AFTER we've taken the locks and table access is blocked.
if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*3); err != nil {
return vterrors.Wrapf(err, "failed waiting for pos after sentry creation")
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached")
}
Expand Down Expand Up @@ -1129,7 +1133,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
}

e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-lock pos: %v", replication.EncodePosition(postWritesPos))
if err := waitForPos(s, postWritesPos); err != nil {
if err := waitForPos(s, postWritesPos, migrationCutOverThreshold); err != nil {
e.updateMigrationStage(ctx, onlineDDL.UUID, "timeout while waiting for post-lock pos: %v", err)
return err
}
Expand Down

0 comments on commit 0e83212

Please sign in to comment.