diff --git a/changelog/21.0/21.0.0/summary.md b/changelog/21.0/21.0.0/summary.md
index be8304fcc26..9562c127952 100644
--- a/changelog/21.0/21.0.0/summary.md
+++ b/changelog/21.0/21.0.0/summary.md
@@ -18,7 +18,6 @@
- **[Dynamic VReplication Configuration](#dynamic-vreplication-configuration)**
- **[Reference Table Materialization](#reference-table-materialization)**
- **[New VEXPLAIN Modes: TRACE and KEYS](#new-vexplain-modes)**
- - **[Errant GTID Detection on VTTablets](#errant-gtid-vttablet)**
- **[Automatically Replace MySQL auto_increment Clauses with Vitess Sequences](#auto-replace-mysql-autoinc-with-seq)**
- **[Experimental MySQL 8.4 support](#experimental-mysql-84)**
- **[Current Errant GTIDs Count Metric](#errant-gtid-metric)**
@@ -208,14 +207,6 @@ filter columns (potential candidates for indexes, primary keys, or sharding keys
These new `VEXPLAIN` modes enhance Vitess's query analysis capabilities, allowing for more informed decisions about sharding
strategies and query optimization.
-### Errant GTID Detection on VTTablets
-
-VTTablets now run an errant GTID detection logic before they join the replication stream. So, if a replica has an errant GTID, it will
-not start replicating from the primary. This protects us from running into situations which are very difficult to recover from.
-
-For users running with the vitess-operator on Kubernetes, this change means that replica tablets with errant GTIDs will have broken
-replication and will report as unready. Users will need to manually replace and clean up these errant replica tablets.
-
### Automatically Replace MySQL auto_increment Clauses with Vitess Sequences
In https://github.com/vitessio/vitess/pull/16860 we added support for replacing MySQL `auto_increment` clauses with [Vitess Sequences](https://vitess.io/docs/reference/features/vitess-sequences/), performing all of the setup and initialization
diff --git a/go/mysql/replication/mysql56_gtid_set.go b/go/mysql/replication/mysql56_gtid_set.go
index 48241215c10..b11318bfa4d 100644
--- a/go/mysql/replication/mysql56_gtid_set.go
+++ b/go/mysql/replication/mysql56_gtid_set.go
@@ -467,33 +467,6 @@ func (set Mysql56GTIDSet) SIDBlock() []byte {
return buf.Bytes()
}
-// ErrantGTIDsOnReplica gets the errant GTIDs on the replica by comparing against the primary position and UUID.
-func ErrantGTIDsOnReplica(replicaPosition Position, primaryPosition Position) (string, error) {
- replicaGTIDSet, replicaOk := replicaPosition.GTIDSet.(Mysql56GTIDSet)
- primaryGTIDSet, primaryOk := primaryPosition.GTIDSet.(Mysql56GTIDSet)
-
- // Currently we only support errant GTID detection for MySQL 56 flavour.
- if !replicaOk || !primaryOk {
- return "", nil
- }
-
- // Calculate the difference between the replica and primary GTID sets.
- diffSet := replicaGTIDSet.Difference(primaryGTIDSet)
- return diffSet.String(), nil
-}
-
-// RemoveUUID removes a specific UUID from the gtid set.
-func (set Mysql56GTIDSet) RemoveUUID(uuid SID) Mysql56GTIDSet {
- newSet := make(Mysql56GTIDSet)
- for sid, intervals := range set {
- if sid == uuid {
- continue
- }
- newSet[sid] = intervals
- }
- return newSet
-}
-
// Difference will supply the difference between the receiver and supplied Mysql56GTIDSets, and supply the result
// as a Mysql56GTIDSet.
func (set Mysql56GTIDSet) Difference(other Mysql56GTIDSet) Mysql56GTIDSet {
diff --git a/go/mysql/replication/mysql56_gtid_set_test.go b/go/mysql/replication/mysql56_gtid_set_test.go
index bcb2a68af72..e5accc46120 100644
--- a/go/mysql/replication/mysql56_gtid_set_test.go
+++ b/go/mysql/replication/mysql56_gtid_set_test.go
@@ -750,88 +750,3 @@ func TestGTIDCount(t *testing.T) {
})
}
}
-
-func TestErrantGTIDsOnReplica(t *testing.T) {
- tests := []struct {
- name string
- replicaPosition string
- primaryPosition string
- errantGtidWanted string
- wantErr string
- }{
- {
- name: "Empty replica position",
- replicaPosition: "MySQL56/",
- primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8",
- errantGtidWanted: "",
- }, {
- name: "Empty primary position",
- replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8",
- primaryPosition: "MySQL56/",
- errantGtidWanted: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8",
- }, {
- name: "Empty primary position - with multiple errant gtids",
- replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1",
- primaryPosition: "MySQL56/",
- errantGtidWanted: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1",
- }, {
- name: "Single errant GTID",
- replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34",
- primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-50,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-30",
- errantGtidWanted: "8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34",
- }, {
- name: "Multiple errant GTID",
- replicaPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-32,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:3-35",
- primaryPosition: "MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-50,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1-30,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:34",
- errantGtidWanted: "8bc65cca-3fe4-11ed-bbfb-091034d48b3e:31-32,8bc65cca-3fe4-11ed-bbfb-091034d48bd3:3-33:35",
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- replPos, err := DecodePosition(tt.replicaPosition)
- require.NoError(t, err)
- primaryPos, err := DecodePosition(tt.primaryPosition)
- require.NoError(t, err)
- errantGTIDs, err := ErrantGTIDsOnReplica(replPos, primaryPos)
- if tt.wantErr != "" {
- require.ErrorContains(t, err, tt.wantErr)
- } else {
- require.NoError(t, err)
- require.EqualValues(t, tt.errantGtidWanted, errantGTIDs)
- }
-
- })
- }
-}
-
-func TestMysql56GTIDSet_RemoveUUID(t *testing.T) {
- tests := []struct {
- name string
- initialSet string
- uuid string
- wantSet string
- }{
- {
- name: "Remove unknown UUID",
- initialSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24",
- uuid: "8bc65c84-3fe4-11ed-a912-257f0fcde6c9",
- wantSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24",
- },
- {
- name: "Remove a single UUID",
- initialSet: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24",
- uuid: "8bc65c84-3fe4-11ed-a912-257f0fcdd6c9",
- wantSet: "8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1:4-24",
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- gtidSet, err := ParseMysql56GTIDSet(tt.initialSet)
- require.NoError(t, err)
- sid, err := ParseSID(tt.uuid)
- require.NoError(t, err)
- gtidSet = gtidSet.RemoveUUID(sid)
- require.EqualValues(t, tt.wantSet, gtidSet.String())
- })
- }
-}
diff --git a/go/vt/mysqlctl/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon.go
index ef62f0ccb85..ce7ce456bdd 100644
--- a/go/vt/mysqlctl/fakemysqldaemon.go
+++ b/go/vt/mysqlctl/fakemysqldaemon.go
@@ -81,9 +81,6 @@ type FakeMysqlDaemon struct {
// and ReplicationStatus.
CurrentPrimaryPosition replication.Position
- // CurrentRelayLogPosition is returned by ReplicationStatus.
- CurrentRelayLogPosition replication.Position
-
// CurrentSourceFilePosition is used to determine the executed
// file based positioning of the replication source.
CurrentSourceFilePosition replication.Position
@@ -312,7 +309,6 @@ func (fmd *FakeMysqlDaemon) ReplicationStatus(ctx context.Context) (replication.
return replication.ReplicationStatus{
Position: fmd.CurrentPrimaryPosition,
FilePosition: fmd.CurrentSourceFilePosition,
- RelayLogPosition: fmd.CurrentRelayLogPosition,
RelayLogSourceBinlogEquivalentPosition: fmd.CurrentSourceFilePosition,
ReplicationLagSeconds: fmd.ReplicationLagSeconds,
// Implemented as AND to avoid changing all tests that were
diff --git a/go/vt/vttablet/tabletmanager/restore.go b/go/vt/vttablet/tabletmanager/restore.go
index 35587124108..a56eb4da37b 100644
--- a/go/vt/vttablet/tabletmanager/restore.go
+++ b/go/vt/vttablet/tabletmanager/restore.go
@@ -36,15 +36,18 @@ import (
"vitess.io/vitess/go/vt/logutil"
"vitess.io/vitess/go/vt/mysqlctl"
"vitess.io/vitess/go/vt/mysqlctl/backupstats"
- binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
- tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
- topodatapb "vitess.io/vitess/go/vt/proto/topodata"
- vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/proto/vttime"
"vitess.io/vitess/go/vt/servenv"
+ "vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
+ "vitess.io/vitess/go/vt/vttablet/tmclient"
+
+ binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
+ tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
+ topodatapb "vitess.io/vitess/go/vt/proto/topodata"
+ vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)
// This file handles the initial backup restore upon startup.
@@ -323,7 +326,7 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
} else if keyspaceInfo.KeyspaceType == topodatapb.KeyspaceType_NORMAL {
// Reconnect to primary only for "NORMAL" keyspaces
params.Logger.Infof("Restore: starting replication at position %v", pos)
- if err := tm.startReplication(ctx, pos, originalType); err != nil {
+ if err := tm.startReplication(context.Background(), pos, originalType); err != nil {
return err
}
}
@@ -574,30 +577,47 @@ func (tm *TabletManager) disableReplication(ctx context.Context) error {
}
func (tm *TabletManager) startReplication(ctx context.Context, pos replication.Position, tabletType topodatapb.TabletType) error {
- // The first three steps of stopping replication, and setting the replication position,
- // we want to do even if the context expires, so we use a background context for these tasks.
- if err := tm.MysqlDaemon.StopReplication(context.Background(), nil); err != nil {
+ if err := tm.MysqlDaemon.StopReplication(ctx, nil); err != nil {
return vterrors.Wrap(err, "failed to stop replication")
}
- if err := tm.MysqlDaemon.ResetReplicationParameters(context.Background()); err != nil {
+ if err := tm.MysqlDaemon.ResetReplicationParameters(ctx); err != nil {
return vterrors.Wrap(err, "failed to reset replication")
}
// Set the position at which to resume from the primary.
- if err := tm.MysqlDaemon.SetReplicationPosition(context.Background(), pos); err != nil {
+ if err := tm.MysqlDaemon.SetReplicationPosition(ctx, pos); err != nil {
return vterrors.Wrap(err, "failed to set replication position")
}
- primaryPosStr, err := tm.initializeReplication(ctx, tabletType)
+ primary, err := tm.initializeReplication(ctx, tabletType)
// If we ran into an error while initializing replication, then there is no point in waiting for catch-up.
// Also, if there is no primary tablet in the shard, we don't need to proceed further.
- if err != nil || primaryPosStr == "" {
+ if err != nil || primary == nil {
return err
}
- primaryPos, err := replication.DecodePosition(primaryPosStr)
+ // wait for reliable replication_lag_seconds
+ // we have pos where we want to resume from
+ // if PrimaryPosition is the same, that means no writes
+ // have happened to primary, so we are up-to-date
+ // otherwise, wait for replica's Position to change from
+ // the initial pos before proceeding
+ tmc := tmclient.NewTabletManagerClient()
+ defer tmc.Close()
+ remoteCtx, remoteCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
+ defer remoteCancel()
+ posStr, err := tmc.PrimaryPosition(remoteCtx, primary.Tablet)
+ if err != nil {
+ // It is possible that though PrimaryAlias is set, the primary tablet is unreachable
+ // Log a warning and let tablet restore in that case
+ // If we had instead considered this fatal, all tablets would crash-loop
+ // until a primary appears, which would make it impossible to elect a primary.
+ log.Warningf("Can't get primary replication position after restore: %v", err)
+ return nil
+ }
+ primaryPos, err := replication.DecodePosition(posStr)
if err != nil {
- return vterrors.Wrapf(err, "can't decode primary replication position: %q", primaryPos)
+ return vterrors.Wrapf(err, "can't decode primary replication position: %q", posStr)
}
if !pos.Equal(primaryPos) {
diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go
index 0eb866db2a8..a8232730b5c 100644
--- a/go/vt/vttablet/tabletmanager/rpc_replication.go
+++ b/go/vt/vttablet/tabletmanager/rpc_replication.go
@@ -726,7 +726,6 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
wasReplicating := false
shouldbeReplicating := false
status, err := tm.MysqlDaemon.ReplicationStatus(ctx)
- replicaPosition := status.RelayLogPosition
if err == mysql.ErrNotReplica {
// This is a special error that means we actually succeeded in reading
// the status, but the status is empty because replication is not
@@ -736,12 +735,6 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
// Since we continue in the case of this error, make sure 'status' is
// in a known, empty state.
status = replication.ReplicationStatus{}
- // The replica position we use for the errant GTID detection should be the executed
- // GTID set since this tablet is not running replication at all.
- replicaPosition, err = tm.MysqlDaemon.PrimaryPosition(ctx)
- if err != nil {
- return err
- }
} else if err != nil {
// Abort on any other non-nil error.
return err
@@ -773,35 +766,12 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA
if err != nil {
return err
}
-
host := parent.Tablet.MysqlHostname
port := parent.Tablet.MysqlPort
// If host is empty, then we shouldn't even attempt the reparent. That tablet has already shutdown.
if host == "" {
return vterrors.New(vtrpc.Code_FAILED_PRECONDITION, "Shard primary has empty mysql hostname")
}
- // Errant GTID detection.
- {
- // Find the executed GTID set of the tablet that we are reparenting to.
- // We will then compare our own position against it to verify that we don't
- // have an errant GTID. If we find any GTID that we have, but the primary doesn't,
- // we will not enter the replication graph and instead fail replication.
- primaryPositionStr, err := tm.tmc.PrimaryPosition(ctx, parent.Tablet)
- if err != nil {
- return err
- }
- primaryPosition, err := replication.DecodePosition(primaryPositionStr)
- if err != nil {
- return err
- }
- errantGtid, err := replication.ErrantGTIDsOnReplica(replicaPosition, primaryPosition)
- if err != nil {
- return err
- }
- if errantGtid != "" {
- return vterrors.New(vtrpc.Code_FAILED_PRECONDITION, fmt.Sprintf("Errant GTID detected - %s; Primary GTID - %s, Replica GTID - %s", errantGtid, primaryPosition, replicaPosition.String()))
- }
- }
if status.SourceHost != host || status.SourcePort != port || heartbeatInterval != 0 {
// This handles both changing the address and starting replication.
if err := tm.MysqlDaemon.SetReplicationSource(ctx, host, port, heartbeatInterval, wasReplicating, shouldbeReplicating); err != nil {
diff --git a/go/vt/vttablet/tabletmanager/tm_init.go b/go/vt/vttablet/tabletmanager/tm_init.go
index 7f540b51546..9656445cece 100644
--- a/go/vt/vttablet/tabletmanager/tm_init.go
+++ b/go/vt/vttablet/tabletmanager/tm_init.go
@@ -50,7 +50,6 @@ import (
"vitess.io/vitess/go/constants/sidecar"
"vitess.io/vitess/go/flagutil"
"vitess.io/vitess/go/mysql/collations"
- "vitess.io/vitess/go/mysql/replication"
"vitess.io/vitess/go/mysql/sqlerror"
"vitess.io/vitess/go/netutil"
"vitess.io/vitess/go/protoutil"
@@ -65,7 +64,6 @@ import (
"vitess.io/vitess/go/vt/mysqlctl"
querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
- "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/servenv"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
@@ -77,7 +75,6 @@ import (
"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
"vitess.io/vitess/go/vt/vttablet/tabletserver"
"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
- "vitess.io/vitess/go/vt/vttablet/tmclient"
)
const (
@@ -165,9 +162,6 @@ type TabletManager struct {
VDiffEngine *vdiff.Engine
Env *vtenv.Environment
- // tmc is used to run an RPC against other vttablets.
- tmc tmclient.TabletManagerClient
-
// tmState manages the TabletManager state.
tmState *tmState
@@ -369,7 +363,6 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, config *tabletenv.Tabl
log.Infof("TabletManager Start")
tm.DBConfigs.DBName = topoproto.TabletDbName(tablet)
tm.tabletAlias = tablet.Alias
- tm.tmc = tmclient.NewTabletManagerClient()
tm.tmState = newTMState(tm, tablet)
tm.actionSema = semaphore.NewWeighted(1)
tm._waitForGrantsComplete = make(chan struct{})
@@ -970,50 +963,50 @@ func (tm *TabletManager) hookExtraEnv() map[string]string {
// initializeReplication is used to initialize the replication when the tablet starts.
// It returns the current primary tablet for use externally
-func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType topodatapb.TabletType) (primaryPosStr string, err error) {
+func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType topodatapb.TabletType) (primary *topo.TabletInfo, err error) {
// If active reparents are disabled, we do not touch replication.
// There is nothing to do
if mysqlctl.DisableActiveReparents {
- return "", nil
+ return nil, nil
}
// If the desired tablet type is primary, then we shouldn't be setting our replication source.
// So there is nothing to do.
if tabletType == topodatapb.TabletType_PRIMARY {
- return "", nil
+ return nil, nil
}
// Read the shard to find the current primary, and its location.
tablet := tm.Tablet()
si, err := tm.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard)
if err != nil {
- return "", vterrors.Wrap(err, "cannot read shard")
+ return nil, vterrors.Wrap(err, "cannot read shard")
}
if si.PrimaryAlias == nil {
// There's no primary. This is fine, since there might be no primary currently
log.Warningf("cannot start replication during initialization: shard %v/%v has no primary.", tablet.Keyspace, tablet.Shard)
- return "", nil
+ return nil, nil
}
if topoproto.TabletAliasEqual(si.PrimaryAlias, tablet.Alias) {
// We used to be the primary before we got restarted,
// and no other primary has been elected in the meantime.
// There isn't anything to do here either.
log.Warningf("cannot start replication during initialization: primary in shard record still points to this tablet.")
- return "", nil
+ return nil, nil
}
currentPrimary, err := tm.TopoServer.GetTablet(ctx, si.PrimaryAlias)
if err != nil {
- return "", vterrors.Wrapf(err, "cannot read primary tablet %v", si.PrimaryAlias)
+ return nil, vterrors.Wrapf(err, "cannot read primary tablet %v", si.PrimaryAlias)
}
durabilityName, err := tm.TopoServer.GetKeyspaceDurability(ctx, tablet.Keyspace)
if err != nil {
- return "", vterrors.Wrapf(err, "cannot read keyspace durability policy %v", tablet.Keyspace)
+ return nil, vterrors.Wrapf(err, "cannot read keyspace durability policy %v", tablet.Keyspace)
}
log.Infof("Getting a new durability policy for %v", durabilityName)
durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
if err != nil {
- return "", vterrors.Wrapf(err, "cannot get durability policy %v", durabilityName)
+ return nil, vterrors.Wrapf(err, "cannot get durability policy %v", durabilityName)
}
// If using semi-sync, we need to enable it before connecting to primary.
// We should set the correct type, since it is used in replica semi-sync
@@ -1022,50 +1015,21 @@ func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType t
semiSyncAction, err := tm.convertBoolToSemiSyncAction(ctx, reparentutil.IsReplicaSemiSync(durability, currentPrimary.Tablet, tablet))
if err != nil {
- return "", err
+ return nil, err
}
if err := tm.fixSemiSync(ctx, tabletType, semiSyncAction); err != nil {
- return "", err
+ return nil, err
}
// Set primary and start replication.
if currentPrimary.Tablet.MysqlHostname == "" {
log.Warningf("primary tablet in the shard record does not have mysql hostname specified, possibly because that tablet has been shut down.")
- return "", nil
- }
-
- // Find our own executed GTID set and,
- // the executed GTID set of the tablet that we are reparenting to.
- // We will then compare our own position against it to verify that we don't
- // have an errant GTID. If we find any GTID that we have, but the primary doesn't,
- // we will not enter the replication graph and instead fail replication.
- replicaPos, err := tm.MysqlDaemon.PrimaryPosition(ctx)
- if err != nil {
- return "", err
- }
-
- primaryPosStr, err = tm.tmc.PrimaryPosition(ctx, currentPrimary.Tablet)
- if err != nil {
- return "", err
+ return nil, nil
}
-
- primaryPosition, err := replication.DecodePosition(primaryPosStr)
- if err != nil {
- return "", err
- }
-
- errantGTIDs, err := replication.ErrantGTIDsOnReplica(replicaPos, primaryPosition)
- if err != nil {
- return "", err
- }
- if errantGTIDs != "" {
- return "", vterrors.New(vtrpc.Code_FAILED_PRECONDITION, fmt.Sprintf("Errant GTID detected - %s", errantGTIDs))
- }
-
if err := tm.MysqlDaemon.SetReplicationSource(ctx, currentPrimary.Tablet.MysqlHostname, currentPrimary.Tablet.MysqlPort, 0, true, true); err != nil {
- return "", vterrors.Wrap(err, "MysqlDaemon.SetReplicationSource failed")
+ return nil, vterrors.Wrap(err, "MysqlDaemon.SetReplicationSource failed")
}
- return primaryPosStr, nil
+ return currentPrimary, nil
}
diff --git a/go/vt/wrangler/testlib/backup_test.go b/go/vt/wrangler/testlib/backup_test.go
index cb61c4bab99..873e4045490 100644
--- a/go/vt/wrangler/testlib/backup_test.go
+++ b/go/vt/wrangler/testlib/backup_test.go
@@ -37,6 +37,7 @@ import (
"vitess.io/vitess/go/vt/mysqlctl"
"vitess.io/vitess/go/vt/mysqlctl/backupstorage"
"vitess.io/vitess/go/vt/mysqlctl/filebackupstorage"
+ "vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/memorytopo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtenv"
@@ -687,6 +688,9 @@ func TestRestoreUnreachablePrimary(t *testing.T) {
"FAKE RESET REPLICA ALL",
"FAKE RESET BINARY LOGS AND GTIDS",
"FAKE SET GLOBAL gtid_purged",
+ "STOP REPLICA",
+ "FAKE SET SOURCE",
+ "START REPLICA",
}
destTablet.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {},
@@ -710,16 +714,13 @@ func TestRestoreUnreachablePrimary(t *testing.T) {
// stop primary so that it is unreachable
primary.StopActionLoop(t)
- // Attempt to fix the test, but its still failing :man_shrugging.
- ctx, cancel = context.WithTimeout(ctx, 2*time.Second)
- defer cancel()
- // Restore will return an error while trying to contact the primary for its position, but otherwise will succeed.
- // The replication won't be running however, since we can't run errant GTID detection without the primary being online.
- err = destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, time.Time{} /* restoreFromBackupTs */, time.Time{} /* restoreToTimestamp */, "" /* restoreToPos */, []string{} /* ignoreBackupEngines */, mysqlShutdownTimeout)
- require.ErrorContains(t, err, "DeadlineExceeded")
+ // set a short timeout so that we don't have to wait 30 seconds
+ topo.RemoteOperationTimeout = 2 * time.Second
+ // Restore should still succeed
+ require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, time.Time{} /* restoreFromBackupTs */, time.Time{} /* restoreToTimestamp */, "" /* restoreToPos */, []string{} /* ignoreBackupEngines */, mysqlShutdownTimeout))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
- assert.False(t, destTablet.FakeMysqlDaemon.Replicating)
+ assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)
}
diff --git a/go/vt/wrangler/testlib/reparent_utils_test.go b/go/vt/wrangler/testlib/reparent_utils_test.go
index 6f66968ba80..83450aeb7d0 100644
--- a/go/vt/wrangler/testlib/reparent_utils_test.go
+++ b/go/vt/wrangler/testlib/reparent_utils_test.go
@@ -205,9 +205,6 @@ func TestSetReplicationSource(t *testing.T) {
return nil
})
require.NoError(t, err, "UpdateShardFields failed")
- pos, err := replication.DecodePosition("MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-8")
- require.NoError(t, err)
- primary.FakeMysqlDaemon.SetPrimaryPositionLocked(pos)
// primary action loop (to initialize host and port)
primary.StartActionLoop(t, wr)
@@ -249,36 +246,6 @@ func TestSetReplicationSource(t *testing.T) {
checkSemiSyncEnabled(t, false, true, replica)
})
- t.Run("Errant GTIDs on the replica", func(t *testing.T) {
- replica := NewFakeTablet(t, wr, "cell1", 4, topodatapb.TabletType_REPLICA, nil)
- // replica loop
- replica.FakeMysqlDaemon.Replicating = true
- replica.FakeMysqlDaemon.IOThreadRunning = true
- replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet))
- replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
- // These 3 statements come from tablet startup
- "STOP REPLICA",
- "FAKE SET SOURCE",
- "START REPLICA",
- }
- replica.StartActionLoop(t, wr)
- defer replica.StopActionLoop(t)
-
- // Set replica's GTID to have a write that the primary's GTID doesn't have
- pos, err = replication.DecodePosition("MySQL56/8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-7,8bc65cca-3fe4-11ed-bbfb-091034d48b3e:1")
- require.NoError(t, err)
- replica.FakeMysqlDaemon.CurrentRelayLogPosition = pos
-
- // run SetReplicationSource
- err = wr.SetReplicationSource(ctx, replica.Tablet)
- require.ErrorContains(t, err, "Errant GTID detected")
-
- // check what was run
- err = replica.FakeMysqlDaemon.CheckSuperQueryList()
- require.NoError(t, err, "CheckSuperQueryList failed")
- checkSemiSyncEnabled(t, false, true, replica)
- })
-
// test setting an empty hostname because of primary shutdown
t.Run("Primary tablet already shutdown", func(t *testing.T) {
replica := NewFakeTablet(t, wr, "cell1", 3, topodatapb.TabletType_REPLICA, nil)