From 1d136325f1ffd531769129ba20dc81f87f8394a6 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Mon, 20 Jan 2025 13:34:19 +0100 Subject: [PATCH] [release-21.0] Tablet picker: Handle the case where a primary tablet is not setup for a shard (#17573) (#17576) Signed-off-by: Rohit Nayak Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com> --- go/vt/discovery/tablet_picker.go | 12 ++++++++++-- go/vt/discovery/tablet_picker_test.go | 23 +++++++++++++++++++++++ go/vt/topo/tablet.go | 3 +++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/go/vt/discovery/tablet_picker.go b/go/vt/discovery/tablet_picker.go index c48905be948..cfe1ba2e964 100644 --- a/go/vt/discovery/tablet_picker.go +++ b/go/vt/discovery/tablet_picker.go @@ -387,8 +387,13 @@ func (tp *TabletPicker) GetMatchingTablets(ctx context.Context) []*topo.TabletIn log.Errorf("Error getting shard %s/%s: %v", tp.keyspace, tp.shard, err) return nil } - if _, ignore := tp.ignoreTablets[si.PrimaryAlias.String()]; !ignore { - aliases = append(aliases, si.PrimaryAlias) + + // It is possible that there is a cluster event (ERS/PRS, for example) due to which + // there is no primary elected for the shard at the moment. + if si.PrimaryAlias != nil { + if _, ignore := tp.ignoreTablets[si.PrimaryAlias.String()]; !ignore { + aliases = append(aliases, si.PrimaryAlias) + } } } else { actualCells := make([]string, 0) @@ -425,6 +430,9 @@ func (tp *TabletPicker) GetMatchingTablets(ctx context.Context) []*topo.TabletIn continue } for _, node := range sri.Nodes { + if node.TabletAlias == nil { + continue + } if _, ignore := tp.ignoreTablets[node.TabletAlias.String()]; !ignore { aliases = append(aliases, node.TabletAlias) } diff --git a/go/vt/discovery/tablet_picker_test.go b/go/vt/discovery/tablet_picker_test.go index 76a8828afec..27c4d8bf7b1 100644 --- a/go/vt/discovery/tablet_picker_test.go +++ b/go/vt/discovery/tablet_picker_test.go @@ -62,6 +62,29 @@ func TestPickPrimary(t *testing.T) { assert.True(t, proto.Equal(want, tablet), "Pick: %v, want %v", tablet, want) } +// TestPickNoPrimary confirms that if the picker was setup only for primary tablets but +// there is no primary setup for the shard we correctly return an error. +func TestPickNoPrimary(t *testing.T) { + defer utils.EnsureNoLeaks(t) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + te := newPickerTestEnv(t, ctx, []string{"cell", "otherCell"}) + want := addTablet(ctx, te, 100, topodatapb.TabletType_PRIMARY, "cell", true, true) + defer deleteTablet(t, te, want) + _, err := te.topoServ.UpdateShardFields(ctx, te.keyspace, te.shard, func(si *topo.ShardInfo) error { + si.PrimaryAlias = nil // force a missing primary + return nil + }) + require.NoError(t, err) + + tp, err := NewTabletPicker(ctx, te.topoServ, []string{"otherCell"}, "cell", te.keyspace, te.shard, "primary", TabletPickerOptions{}) + require.NoError(t, err) + + _, err = tp.PickForStreaming(ctx) + require.Errorf(t, err, "No healthy serving tablet") +} + func TestPickLocalPreferences(t *testing.T) { defer utils.EnsureNoLeaks(t) type tablet struct { diff --git a/go/vt/topo/tablet.go b/go/vt/topo/tablet.go index 671a0f43905..482f782e775 100644 --- a/go/vt/topo/tablet.go +++ b/go/vt/topo/tablet.go @@ -489,6 +489,9 @@ func (ts *Server) GetTabletMap(ctx context.Context, tabletAliases []*topodatapb. var sem = semaphore.NewWeighted(int64(concurrency)) for _, tabletAlias := range tabletAliases { + if tabletAlias == nil { + return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "nil tablet alias in list") + } wg.Add(1) go func(tabletAlias *topodatapb.TabletAlias) { defer wg.Done()