Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for avoidance of hosts taking backup in PRS & ERS #17300

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 92 additions & 102 deletions go/vt/proto/replicationdata/replicationdata.pb.go

Large diffs are not rendered by default.

34 changes: 0 additions & 34 deletions go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1,610 changes: 795 additions & 815 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Large diffs are not rendered by default.

68 changes: 0 additions & 68 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 14 additions & 3 deletions go/vt/vtctl/reparentutil/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,6 @@ func stopReplicationAndBuildStatusMaps(
logger.Infof("getting replication position from %v", alias)

stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY)
m.Lock()
res.tabletsBackupState[alias] = stopReplicationStatus.GetBackupRunning()
m.Unlock()
if err != nil {
sqlErr, isSQLErr := sqlerror.NewSQLErrorFromError(err).(*sqlerror.SQLError)
if isSQLErr && sqlErr != nil && sqlErr.Number() == sqlerror.ERNotReplica {
Expand All @@ -242,6 +239,20 @@ func stopReplicationAndBuildStatusMaps(
err = vterrors.Wrapf(err, "error when getting replication status for alias %v: %v", alias, err)
}
} else {
isTakingBackup := false

// Prefer the most up-to-date information regarding whether the tablet is taking a backup from the After
// replication status, but fall back to the Before status if After is nil.
if stopReplicationStatus.After != nil {
isTakingBackup = stopReplicationStatus.After.BackupRunning
} else if stopReplicationStatus.Before != nil {
isTakingBackup = stopReplicationStatus.Before.BackupRunning
}

m.Lock()
res.tabletsBackupState[alias] = isTakingBackup
m.Unlock()

var sqlThreadRunning bool
// Check if the sql thread was running for the tablet
sqlThreadRunning, err = SQLThreadWasRunning(stopReplicationStatus)
Expand Down
4 changes: 1 addition & 3 deletions go/vt/vttablet/grpctmserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ func (s *server) MysqlHostMetrics(ctx context.Context, request *tabletmanagerdat
func (s *server) ReplicationStatus(ctx context.Context, request *tabletmanagerdatapb.ReplicationStatusRequest) (response *tabletmanagerdatapb.ReplicationStatusResponse, err error) {
defer s.tm.HandleRPCPanic(ctx, "ReplicationStatus", request, response, false /*verbose*/, &err)
ctx = callinfo.GRPCCallInfo(ctx)
response = &tabletmanagerdatapb.ReplicationStatusResponse{BackupRunning: s.tm.IsBackupRunning()}
deepthi marked this conversation as resolved.
Show resolved Hide resolved
response = &tabletmanagerdatapb.ReplicationStatusResponse{}
status, err := s.tm.ReplicationStatus(ctx)
if err == nil {
response.Status = status
Expand Down Expand Up @@ -638,8 +638,6 @@ func (s *server) StopReplicationAndGetStatus(ctx context.Context, request *table
response.Status = statusResponse.Status
}

response.BackupRunning = s.tm.IsBackupRunning()
deepthi marked this conversation as resolved.
Show resolved Hide resolved

return response, err
}

Expand Down
8 changes: 7 additions & 1 deletion go/vt/vttablet/tabletmanager/rpc_replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ func (tm *TabletManager) ReplicationStatus(ctx context.Context) (*replicationdat
if err != nil {
return nil, err
}
return replication.ReplicationStatusToProto(status), nil

protoStatus := replication.ReplicationStatusToProto(status)
protoStatus.BackupRunning = tm.IsBackupRunning()

return protoStatus, nil
}

// FullStatus returns the full status of MySQL including the replication information, semi-sync information, GTID information among others
Expand Down Expand Up @@ -893,6 +897,7 @@ func (tm *TabletManager) StopReplicationAndGetStatus(ctx context.Context, stopRe
return StopReplicationAndGetStatusResponse{}, vterrors.Wrap(err, "before status failed")
}
before := replication.ReplicationStatusToProto(rs)
before.BackupRunning = tm.IsBackupRunning()

if stopReplicationMode == replicationdatapb.StopReplicationMode_IOTHREADONLY {
if !rs.IOHealthy() {
Expand Down Expand Up @@ -939,6 +944,7 @@ func (tm *TabletManager) StopReplicationAndGetStatus(ctx context.Context, stopRe
}, vterrors.Wrap(err, "acquiring replication status failed")
}
after := replication.ReplicationStatusToProto(rsAfter)
after.BackupRunning = tm.IsBackupRunning()

rs.Position = rsAfter.Position
rs.RelayLogPosition = rsAfter.RelayLogPosition
Expand Down
1 change: 0 additions & 1 deletion proto/replicationdata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ message Configuration {
message StopReplicationStatus {
replicationdata.Status before = 1;
replicationdata.Status after = 2;
bool backup_running = 3;
ejortegau marked this conversation as resolved.
Show resolved Hide resolved
}

// StopReplicationMode is used to provide controls over how replication is stopped.
Expand Down
2 changes: 0 additions & 2 deletions proto/tabletmanagerdata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ message ReplicationStatusRequest {

message ReplicationStatusResponse {
replicationdata.Status status = 1;
bool backup_running = 2;
deepthi marked this conversation as resolved.
Show resolved Hide resolved
}

message PrimaryStatusRequest {
Expand Down Expand Up @@ -549,7 +548,6 @@ message StopReplicationAndGetStatusResponse {

// Status represents the replication status call right before, and right after telling the replica to stop.
replicationdata.StopReplicationStatus status = 2;
bool backup_running = 3;
deepthi marked this conversation as resolved.
Show resolved Hide resolved
}

message PromoteReplicaRequest {
Expand Down
18 changes: 0 additions & 18 deletions web/vtadmin/src/proto/vtadmin.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading