From a31687e59147b098a1e4e80602be41d921ea2b53 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:03:16 -0800 Subject: [PATCH] increase vtctlclient backupShard command success rate (#14604) Signed-off-by: Jun Wang Signed-off-by: jwangace <121262788+jwangace@users.noreply.github.com> Co-authored-by: Jun Wang Co-authored-by: Andrew Mason --- go/vt/vtctl/grpcvtctldserver/server.go | 8 +- go/vt/vtctl/reparentutil/util.go | 12 +++ go/vt/vtctl/reparentutil/util_test.go | 102 +++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/go/vt/vtctl/grpcvtctldserver/server.go b/go/vt/vtctl/grpcvtctldserver/server.go index 25b711e1019..44762001ef0 100644 --- a/go/vt/vtctl/grpcvtctldserver/server.go +++ b/go/vt/vtctl/grpcvtctldserver/server.go @@ -431,8 +431,14 @@ func (s *VtctldServer) BackupShard(req *vtctldatapb.BackupShardRequest, stream v span.Annotate("incremental_from_pos", req.IncrementalFromPos) tablets, stats, err := reparentutil.ShardReplicationStatuses(ctx, s.ts, s.tmc, req.Keyspace, req.Shard) + + // Instead of return on err directly, only return err when no tablets for backup at all if err != nil { - return err + tablets = reparentutil.GetBackupCandidates(tablets, stats) + // Only return err when no usable tablet + if len(tablets) == 0 { + return err + } } var ( diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go index cfde8f34508..ac57e1230d1 100644 --- a/go/vt/vtctl/reparentutil/util.go +++ b/go/vt/vtctl/reparentutil/util.go @@ -343,3 +343,15 @@ func waitForCatchUp( } return nil } + +// GetBackupCandidates is used to get a list of healthy tablets for backup +func GetBackupCandidates(tablets []*topo.TabletInfo, stats []*replicationdatapb.Status) (res []*topo.TabletInfo) { + for i, stat := range stats { + // shardTablets[i] and stats[i] is 1:1 mapping + // Always include TabletType_PRIMARY. Healthy shardTablets[i] will be added to tablets + if tablets[i].Type == topodatapb.TabletType_PRIMARY || stat != nil { + res = append(res, tablets[i]) + } + } + return res +} diff --git a/go/vt/vtctl/reparentutil/util_test.go b/go/vt/vtctl/reparentutil/util_test.go index a9e6274d490..7b0d624590f 100644 --- a/go/vt/vtctl/reparentutil/util_test.go +++ b/go/vt/vtctl/reparentutil/util_test.go @@ -1218,3 +1218,105 @@ func Test_getTabletsWithPromotionRules(t *testing.T) { }) } } + +func TestGetBackupCandidates(t *testing.T) { + var ( + primaryTablet = &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 1, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + } + replicaTablet = &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 2, + }, + Type: topodatapb.TabletType_REPLICA, + }, + } + rdonlyTablet = &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 3, + }, + Type: topodatapb.TabletType_RDONLY, + }, + } + spareTablet = &topo.TabletInfo{ + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 4, + }, + Type: topodatapb.TabletType_SPARE, + }, + } + ) + tests := []struct { + name string + in []*topo.TabletInfo + expected []*topo.TabletInfo + status []*replicationdatapb.Status + }{ + { + name: "one primary tablet with status", + in: []*topo.TabletInfo{primaryTablet}, + expected: []*topo.TabletInfo{primaryTablet}, + status: []*replicationdatapb.Status{{}}, + }, + { + name: "one primary tablet with no status", + in: []*topo.TabletInfo{primaryTablet}, + expected: []*topo.TabletInfo{primaryTablet}, + status: []*replicationdatapb.Status{nil}, + }, + { + name: "4 tablets with no status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet}, + status: []*replicationdatapb.Status{nil, nil, nil, nil}, + }, + { + name: "4 tablets with full status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + status: []*replicationdatapb.Status{{}, {}, {}, {}}, + }, + { + name: "4 tablets with no primaryTablet status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + status: []*replicationdatapb.Status{nil, {}, {}, {}}, + }, + { + name: "4 tablets with no replicaTablet status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet, rdonlyTablet, spareTablet}, + status: []*replicationdatapb.Status{{}, nil, {}, {}}, + }, + { + name: "4 tablets with no rdonlyTablet status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet, replicaTablet, spareTablet}, + status: []*replicationdatapb.Status{{}, {}, nil, {}}, + }, + { + name: "4 tablets with no spareTablet status", + in: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet, spareTablet}, + expected: []*topo.TabletInfo{primaryTablet, replicaTablet, rdonlyTablet}, + status: []*replicationdatapb.Status{{}, {}, {}, nil}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + res := GetBackupCandidates(tt.in, tt.status) + require.EqualValues(t, tt.expected, res) + }) + } +}