From a5cc3960224739e5a905725cd86dc5cbefa65f70 Mon Sep 17 00:00:00 2001 From: Austen Lacy Date: Mon, 6 Nov 2023 11:24:22 -0500 Subject: [PATCH] Bug fix: Use target tablet from health stats cache when checking replication status (#14436) (#128) Signed-off-by: Austen Lacy Co-authored-by: Austen Lacy (cherry picked from commit f757ff28f0e5aaa35b79f436c5b994e346d6221c) (cherry picked from commit 7fabb2df0a87ce757583e6c12c76fe9511f54e3e) --- go/test/endtoend/cluster/cluster_process.go | 5 ++++ go/test/endtoend/tabletgateway/vtgate_test.go | 29 +++++++++++++++++++ go/vt/vtgate/executor.go | 4 +-- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/cluster/cluster_process.go b/go/test/endtoend/cluster/cluster_process.go index 075bec7c868..694081a9e01 100644 --- a/go/test/endtoend/cluster/cluster_process.go +++ b/go/test/endtoend/cluster/cluster_process.go @@ -982,6 +982,11 @@ func (cluster *LocalProcessCluster) VtctlclientGetTablet(tablet *Vttablet) (*top return &ti, nil } +func (cluster *LocalProcessCluster) VtctlclientChangeTabletType(tablet *Vttablet, tabletType topodatapb.TabletType) error { + _, err := cluster.VtctlclientProcess.ExecuteCommandWithOutput("ChangeTabletType", "--", tablet.Alias, tabletType.String()) + return err +} + // Teardown brings down the cluster by invoking teardown for individual processes func (cluster *LocalProcessCluster) Teardown() { PanicHandler(nil) diff --git a/go/test/endtoend/tabletgateway/vtgate_test.go b/go/test/endtoend/tabletgateway/vtgate_test.go index 9a26888647a..06a590cfb1f 100644 --- a/go/test/endtoend/tabletgateway/vtgate_test.go +++ b/go/test/endtoend/tabletgateway/vtgate_test.go @@ -35,6 +35,7 @@ import ( "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/utils" vtorcutils "vitess.io/vitess/go/test/endtoend/vtorc/utils" + "vitess.io/vitess/go/vt/proto/topodata" ) func TestVtgateHealthCheck(t *testing.T) { @@ -100,6 +101,34 @@ func TestVtgateReplicationStatusCheck(t *testing.T) { assert.True(t, rawLag.IsNull() || lagInt > 0, "replication lag should be NULL or greater than 0 but was: %s", rawLag.ToString()) } +func TestVtgateReplicationStatusCheckWithTabletTypeChange(t *testing.T) { + defer cluster.PanicHandler(t) + // Healthcheck interval on tablet is set to 1s, so sleep for 2s + time.Sleep(2 * time.Second) + verifyVtgateVariables(t, clusterInstance.VtgateProcess.VerifyURL) + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.NoError(t, err) + defer conn.Close() + + // Only returns rows for REPLICA and RDONLY tablets -- so should be 2 of them + qr := utils.Exec(t, conn, "show vitess_replication_status like '%'") + expectNumRows := 2 + numRows := len(qr.Rows) + assert.Equal(t, expectNumRows, numRows, fmt.Sprintf("wrong number of results from show vitess_replication_status. Expected %d, got %d", expectNumRows, numRows)) + + // change the RDONLY tablet to SPARE + rdOnlyTablet := clusterInstance.Keyspaces[0].Shards[0].Rdonly() + err = clusterInstance.VtctlclientChangeTabletType(rdOnlyTablet, topodata.TabletType_SPARE) + require.NoError(t, err) + + // Only returns rows for REPLICA and RDONLY tablets -- so should be 1 of them since we updated 1 to spare + qr = utils.Exec(t, conn, "show vitess_replication_status like '%'") + expectNumRows = 1 + numRows = len(qr.Rows) + assert.Equal(t, expectNumRows, numRows, fmt.Sprintf("wrong number of results from show vitess_replication_status. Expected %d, got %d", expectNumRows, numRows)) +} + func verifyVtgateVariables(t *testing.T, url string) { resp, err := http.Get(url) require.NoError(t, err) diff --git a/go/vt/vtgate/executor.go b/go/vt/vtgate/executor.go index 4de224802ca..aeddd61be27 100644 --- a/go/vt/vtgate/executor.go +++ b/go/vt/vtgate/executor.go @@ -852,14 +852,14 @@ func (e *Executor) showVitessReplicationStatus(ctx context.Context, filter *sqlp for _, s := range status { for _, ts := range s.TabletsStats { // We only want to show REPLICA and RDONLY tablets - if ts.Tablet.Type != topodatapb.TabletType_REPLICA && ts.Tablet.Type != topodatapb.TabletType_RDONLY { + if ts.Target.TabletType != topodatapb.TabletType_REPLICA && ts.Target.TabletType != topodatapb.TabletType_RDONLY { continue } // Allow people to filter by Keyspace and Shard using a LIKE clause if filter != nil { ksFilterRegex := sqlparser.LikeToRegexp(filter.Like) - keyspaceShardStr := fmt.Sprintf("%s/%s", ts.Tablet.Keyspace, ts.Tablet.Shard) + keyspaceShardStr := fmt.Sprintf("%s/%s", ts.Target.Keyspace, ts.Target.Shard) if !ksFilterRegex.MatchString(keyspaceShardStr) { continue }