-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix: Errant GTID detection on the replicas when they set replication source #16833
Merged
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
09e9244
feat: add code to run errant gtid detection for a replica
GuptaManan100 9c6ad9e
feat: use errant gtid detection logic in setreplicationsourcelocked
GuptaManan100 734bd86
feat: make changes for initialize replication too
GuptaManan100 b7928ec
Merge remote-tracking branch 'upstream/main' into errant-gtid-fix
GuptaManan100 66f0b6d
feat: fix encoding of primary position that we send down
GuptaManan100 38983b7
feat: remove sending primary position from PRS and ERS into SetReplic…
GuptaManan100 ff452cc
comment: add code comments
GuptaManan100 b2e5b20
test: add tests
GuptaManan100 5854c45
feat: fix context passing and test expectations
GuptaManan100 f8b5129
Merge remote-tracking branch 'upstream/main' into errant-gtid-fix
GuptaManan100 f280d6d
feat: address review comments
GuptaManan100 e1e560c
docs: add summary changes
GuptaManan100 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -467,6 +467,33 @@ func (set Mysql56GTIDSet) SIDBlock() []byte { | |
return buf.Bytes() | ||
} | ||
|
||
// ErrantGTIDsOnReplica gets the errant GTIDs on the replica by comparing against the primary position and UUID. | ||
func ErrantGTIDsOnReplica(replicaPosition Position, primaryPosition Position) (string, error) { | ||
replicaGTIDSet, replicaOk := replicaPosition.GTIDSet.(Mysql56GTIDSet) | ||
primaryGTIDSet, primaryOk := primaryPosition.GTIDSet.(Mysql56GTIDSet) | ||
|
||
// Currently we only support errant GTID detection for MySQL 56 flavour. | ||
if !replicaOk || !primaryOk { | ||
return "", nil | ||
} | ||
|
||
// Calculate the difference between the replica and primary GTID sets. | ||
diffSet := replicaGTIDSet.Difference(primaryGTIDSet) | ||
return diffSet.String(), nil | ||
} | ||
|
||
// RemoveUUID removes a specific UUID from the gtid set. | ||
func (set Mysql56GTIDSet) RemoveUUID(uuid SID) Mysql56GTIDSet { | ||
newSet := make(Mysql56GTIDSet) | ||
for sid, intervals := range set { | ||
if sid == uuid { | ||
continue | ||
} | ||
newSet[sid] = intervals | ||
} | ||
return newSet | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
// Difference will supply the difference between the receiver and supplied Mysql56GTIDSets, and supply the result | ||
// as a Mysql56GTIDSet. | ||
func (set Mysql56GTIDSet) Difference(other Mysql56GTIDSet) Mysql56GTIDSet { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -704,3 +704,88 @@ func BenchmarkMySQL56GTIDParsing(b *testing.B) { | |
} | ||
} | ||
} | ||
|
||
func TestErrantGTIDsOnReplica(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
replicaPosition string | ||
primaryPosition string | ||
errantGtidWanted string | ||
wantErr string | ||
}{ | ||
{ | ||
name: "Empty replica position", | ||
replicaPosition: "MySQL56/", | ||
primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8", | ||
errantGtidWanted: "", | ||
}, { | ||
name: "Empty primary position", | ||
replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8", | ||
primaryPosition: "MySQL56/", | ||
errantGtidWanted: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8", | ||
}, { | ||
name: "Empty primary position - with multiple errant gtids", | ||
replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1", | ||
primaryPosition: "MySQL56/", | ||
errantGtidWanted: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1", | ||
}, { | ||
name: "Single errant GTID", | ||
replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34", | ||
primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-50,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-30", | ||
errantGtidWanted: "8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34", | ||
}, { | ||
name: "Multiple errant GTID", | ||
replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-32,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:3-35", | ||
primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-50,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-30,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34", | ||
errantGtidWanted: "8bc65cca-3fe4-11ed-bbfb-091034d48b3e:31-32,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:3-33:35", | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
replPos, err := DecodePosition(tt.replicaPosition) | ||
require.NoError(t, err) | ||
primaryPos, err := DecodePosition(tt.primaryPosition) | ||
require.NoError(t, err) | ||
errantGTIDs, err := ErrantGTIDsOnReplica(replPos, primaryPos) | ||
if tt.wantErr != "" { | ||
require.ErrorContains(t, err, tt.wantErr) | ||
} else { | ||
require.NoError(t, err) | ||
require.EqualValues(t, tt.errantGtidWanted, errantGTIDs) | ||
} | ||
|
||
}) | ||
} | ||
} | ||
|
||
func TestMysql56GTIDSet_RemoveUUID(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
initialSet string | ||
uuid string | ||
wantSet string | ||
}{ | ||
{ | ||
name: "Remove unknown UUID", | ||
initialSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24", | ||
uuid: "8bc65c84-3fe4-11ed-a912-257f0fcde6c9", | ||
wantSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
}, | ||
{ | ||
name: "Remove a single UUID", | ||
initialSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24", | ||
uuid: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9", | ||
wantSet: "8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24", | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
gtidSet, err := ParseMysql56GTIDSet(tt.initialSet) | ||
require.NoError(t, err) | ||
sid, err := ParseSID(tt.uuid) | ||
require.NoError(t, err) | ||
gtidSet = gtidSet.RemoveUUID(sid) | ||
require.EqualValues(t, tt.wantSet, gtidSet.String()) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,18 +36,15 @@ import ( | |
"vitess.io/vitess/go/vt/logutil" | ||
"vitess.io/vitess/go/vt/mysqlctl" | ||
"vitess.io/vitess/go/vt/mysqlctl/backupstats" | ||
binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" | ||
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" | ||
topodatapb "vitess.io/vitess/go/vt/proto/topodata" | ||
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" | ||
"vitess.io/vitess/go/vt/proto/vttime" | ||
"vitess.io/vitess/go/vt/servenv" | ||
"vitess.io/vitess/go/vt/topo" | ||
"vitess.io/vitess/go/vt/topo/topoproto" | ||
"vitess.io/vitess/go/vt/vterrors" | ||
"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" | ||
"vitess.io/vitess/go/vt/vttablet/tmclient" | ||
|
||
binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" | ||
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" | ||
topodatapb "vitess.io/vitess/go/vt/proto/topodata" | ||
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" | ||
) | ||
|
||
// This file handles the initial backup restore upon startup. | ||
|
@@ -326,7 +323,7 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L | |
} else if keyspaceInfo.KeyspaceType == topodatapb.KeyspaceType_NORMAL { | ||
// Reconnect to primary only for "NORMAL" keyspaces | ||
params.Logger.Infof("Restore: starting replication at position %v", pos) | ||
if err := tm.startReplication(context.Background(), pos, originalType); err != nil { | ||
if err := tm.startReplication(ctx, pos, originalType); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
return err | ||
} | ||
} | ||
|
@@ -577,47 +574,30 @@ func (tm *TabletManager) disableReplication(ctx context.Context) error { | |
} | ||
|
||
func (tm *TabletManager) startReplication(ctx context.Context, pos replication.Position, tabletType topodatapb.TabletType) error { | ||
if err := tm.MysqlDaemon.StopReplication(ctx, nil); err != nil { | ||
// The first three steps of stopping replication, and setting the replication position, | ||
// we want to do even if the context expires, so we use a background context for these tasks. | ||
if err := tm.MysqlDaemon.StopReplication(context.Background(), nil); err != nil { | ||
return vterrors.Wrap(err, "failed to stop replication") | ||
} | ||
if err := tm.MysqlDaemon.ResetReplicationParameters(ctx); err != nil { | ||
if err := tm.MysqlDaemon.ResetReplicationParameters(context.Background()); err != nil { | ||
return vterrors.Wrap(err, "failed to reset replication") | ||
} | ||
|
||
// Set the position at which to resume from the primary. | ||
if err := tm.MysqlDaemon.SetReplicationPosition(ctx, pos); err != nil { | ||
if err := tm.MysqlDaemon.SetReplicationPosition(context.Background(), pos); err != nil { | ||
return vterrors.Wrap(err, "failed to set replication position") | ||
} | ||
|
||
primary, err := tm.initializeReplication(ctx, tabletType) | ||
primaryPosStr, err := tm.initializeReplication(ctx, tabletType) | ||
// If we ran into an error while initializing replication, then there is no point in waiting for catch-up. | ||
// Also, if there is no primary tablet in the shard, we don't need to proceed further. | ||
if err != nil || primary == nil { | ||
if err != nil || primaryPosStr == "" { | ||
return err | ||
} | ||
|
||
// wait for reliable replication_lag_seconds | ||
// we have pos where we want to resume from | ||
// if PrimaryPosition is the same, that means no writes | ||
// have happened to primary, so we are up-to-date | ||
// otherwise, wait for replica's Position to change from | ||
// the initial pos before proceeding | ||
tmc := tmclient.NewTabletManagerClient() | ||
defer tmc.Close() | ||
remoteCtx, remoteCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) | ||
defer remoteCancel() | ||
posStr, err := tmc.PrimaryPosition(remoteCtx, primary.Tablet) | ||
if err != nil { | ||
// It is possible that though PrimaryAlias is set, the primary tablet is unreachable | ||
// Log a warning and let tablet restore in that case | ||
// If we had instead considered this fatal, all tablets would crash-loop | ||
// until a primary appears, which would make it impossible to elect a primary. | ||
log.Warningf("Can't get primary replication position after restore: %v", err) | ||
return nil | ||
} | ||
primaryPos, err := replication.DecodePosition(posStr) | ||
primaryPos, err := replication.DecodePosition(primaryPosStr) | ||
if err != nil { | ||
return vterrors.Wrapf(err, "can't decode primary replication position: %q", posStr) | ||
return vterrors.Wrapf(err, "can't decode primary replication position: %q", primaryPos) | ||
} | ||
|
||
if !pos.Equal(primaryPos) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍