From ef9d7dba95d1ec4edfea66a2bf52e8b741afb680 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 2 May 2024 21:28:53 -0400 Subject: [PATCH] Add tests for GetTablets partial results (#15829) --- go/test/endtoend/clustertest/vtctld_test.go | 4 +- go/vt/topo/etcd2topo/server_test.go | 112 +++++++++++++++++++- go/vt/vtctl/grpcvtctldserver/server.go | 1 - go/vt/vtctl/grpcvtctldserver/server_test.go | 102 ++++++++++++++++-- 4 files changed, 205 insertions(+), 14 deletions(-) diff --git a/go/test/endtoend/clustertest/vtctld_test.go b/go/test/endtoend/clustertest/vtctld_test.go index bb1bcdf2237..c61f7820bb7 100644 --- a/go/test/endtoend/clustertest/vtctld_test.go +++ b/go/test/endtoend/clustertest/vtctld_test.go @@ -54,7 +54,7 @@ func TestVtctldProcess(t *testing.T) { url = fmt.Sprintf("http://%s:%d/api/topodata/", clusterInstance.Hostname, clusterInstance.VtctldHTTPPort) testTopoDataAPI(t, url) - testListAllTablets(t) + testGetTablets(t) testTabletStatus(t) testExecuteAsDba(t) testExecuteAsApp(t) @@ -82,7 +82,7 @@ func testTopoDataAPI(t *testing.T, url string) { assert.Contains(t, childrenGot, clusterInstance.Cell) } -func testListAllTablets(t *testing.T) { +func testGetTablets(t *testing.T) { // first w/o any filters, aside from cell result, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetTablets", "--cell", clusterInstance.Cell) require.NoError(t, err) diff --git a/go/vt/topo/etcd2topo/server_test.go b/go/vt/topo/etcd2topo/server_test.go index 732829ee78b..bbf9f8e9164 100644 --- a/go/vt/topo/etcd2topo/server_test.go +++ b/go/vt/topo/etcd2topo/server_test.go @@ -22,9 +22,12 @@ import ( "os" "os/exec" "path" + "strings" "testing" "time" + "github.com/stretchr/testify/require" + "vitess.io/vitess/go/testfiles" "vitess.io/vitess/go/vt/log" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -36,12 +39,14 @@ import ( ) // startEtcd starts an etcd subprocess, and waits for it to be ready. -func startEtcd(t *testing.T) string { +func startEtcd(t *testing.T, port int) (string, *exec.Cmd) { // Create a temporary directory. dataDir := t.TempDir() // Get our two ports to listen to. - port := testfiles.GoVtTopoEtcd2topoPort + if port == 0 { + port = testfiles.GoVtTopoEtcd2topoPort + } name := "vitess_unit_test" clientAddr := fmt.Sprintf("http://localhost:%v", port) peerAddr := fmt.Sprintf("http://localhost:%v", port+1) @@ -94,7 +99,7 @@ func startEtcd(t *testing.T) string { } }) - return clientAddr + return clientAddr, cmd } // startEtcdWithTLS starts an etcd subprocess with TLS setup, and waits for it to be ready. @@ -219,7 +224,7 @@ func TestEtcd2TLS(t *testing.T) { func TestEtcd2Topo(t *testing.T) { // Start a single etcd in the background. - clientAddr := startEtcd(t) + clientAddr, _ := startEtcd(t, 0) testIndex := 0 newServer := func() *topo.Server { @@ -257,6 +262,105 @@ func TestEtcd2Topo(t *testing.T) { ts.Close() } +// TestEtcd2TopoGetTabletsPartialResults confirms that GetTablets handles partial results +// correctly when etcd2 is used along with the normal vtctldclient <-> vtctld client/server +// path. +func TestEtcd2TopoGetTabletsPartialResults(t *testing.T) { + ctx := context.Background() + cells := []string{"cell1", "cell2"} + root := "/vitess" + // Start three etcd instances in the background. One will serve the global topo data + // while the other two will serve the cell topo data. + globalClientAddr, _ := startEtcd(t, 0) + cellClientAddrs := make([]string, len(cells)) + cellClientCmds := make([]*exec.Cmd, len(cells)) + cellTSs := make([]*topo.Server, len(cells)) + for i := 0; i < len(cells); i++ { + addr, cmd := startEtcd(t, testfiles.GoVtTopoEtcd2topoPort+(i+100*i)) + cellClientAddrs[i] = addr + cellClientCmds[i] = cmd + } + require.Equal(t, len(cells), len(cellTSs)) + + // Setup the global topo server. + globalTS, err := topo.OpenServer("etcd2", globalClientAddr, path.Join(root, topo.GlobalCell)) + require.NoError(t, err, "OpenServer() failed for global topo server: %v", err) + + // Setup the cell topo servers. + for i, cell := range cells { + cellTSs[i], err = topo.OpenServer("etcd2", cellClientAddrs[i], path.Join(root, topo.GlobalCell)) + require.NoError(t, err, "OpenServer() failed for cell %s topo server: %v", cell, err) + } + + // Create the CellInfo and Tablet records/keys. + for i, cell := range cells { + err = globalTS.CreateCellInfo(ctx, cell, &topodatapb.CellInfo{ + ServerAddress: cellClientAddrs[i], + Root: path.Join(root, cell), + }) + require.NoError(t, err, "CreateCellInfo() failed in global cell for cell %s: %v", cell, err) + ta := &topodatapb.TabletAlias{ + Cell: cell, + Uid: uint32(100 + i), + } + err = globalTS.CreateTablet(ctx, &topodatapb.Tablet{Alias: ta}) + require.NoError(t, err, "CreateTablet() failed in cell %s: %v", cell, err) + } + + // This returns stdout and stderr lines as a slice of strings along with the command error. + getTablets := func(strict bool) ([]string, []string, error) { + cmd := exec.Command("vtctldclient", "--server", "internal", "--topo-implementation", "etcd2", "--topo-global-server-address", globalClientAddr, "GetTablets", fmt.Sprintf("--strict=%t", strict)) + var stdout, stderr strings.Builder + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + // Trim any leading and trailing newlines so we don't have an empty string at + // either end of the slices which throws off the logical number of lines produced. + var stdoutLines, stderrLines []string + if stdout.Len() > 0 { // Otherwise we'll have a 1 element slice with an empty string + stdoutLines = strings.Split(strings.Trim(stdout.String(), "\n"), "\n") + } + if stderr.Len() > 0 { // Otherwise we'll have a 1 element slice with an empty string + stderrLines = strings.Split(strings.Trim(stderr.String(), "\n"), "\n") + } + return stdoutLines, stderrLines, err + } + + // Execute the vtctldclient command. + stdout, stderr, err := getTablets(false) + require.NoError(t, err, "Unexpected error: %v, output: %s", err, strings.Join(stdout, "\n")) + // We get each of the single tablets in each cell. + require.Len(t, stdout, len(cells)) + // And no error message. + require.Len(t, stderr, 0, "Unexpected error message: %s", strings.Join(stderr, "\n")) + + // Stop the last cell topo server. + cmd := cellClientCmds[len(cells)-1] + require.NotNil(t, cmd) + err = cmd.Process.Kill() + require.NoError(t, err) + _ = cmd.Wait() + + // Execute the vtctldclient command to get partial results. + stdout, stderr, err = getTablets(false) + require.NoError(t, err, "Unexpected error: %v, output: %s", err, strings.Join(stdout, "\n")) + // We get partial results, missing the tablet from the last cell. + require.Len(t, stdout, len(cells)-1, "Unexpected output: %s", strings.Join(stdout, "\n")) + // We get an error message for the cell that was unreachable. + require.Greater(t, len(stderr), 0, "Unexpected error message: %s", strings.Join(stderr, "\n")) + + // Execute the vtctldclient command with strict enabled. + _, stderr, err = getTablets(true) + require.Error(t, err) // We get an error + // We still get an error message printed to the console for the cell that was unreachable. + require.Greater(t, len(stderr), 0, "Unexpected error message: %s", strings.Join(stderr, "\n")) + + globalTS.Close() + for _, cellTS := range cellTSs { + cellTS.Close() + } +} + // testKeyspaceLock tests etcd-specific heartbeat (TTL). // Note TTL granularity is in seconds, even though the API uses time.Duration. // So we have to wait a long time in these tests. diff --git a/go/vt/vtctl/grpcvtctldserver/server.go b/go/vt/vtctl/grpcvtctldserver/server.go index 8e69f8ce032..9e44fffd8c6 100644 --- a/go/vt/vtctl/grpcvtctldserver/server.go +++ b/go/vt/vtctl/grpcvtctldserver/server.go @@ -2133,7 +2133,6 @@ func (s *VtctldServer) GetTablets(ctx context.Context, req *vtctldatapb.GetTable if req.Strict { return nil, err } - log.Warningf("GetTablets encountered non-fatal error %s; continuing because Strict=false", err) default: return nil, err diff --git a/go/vt/vtctl/grpcvtctldserver/server_test.go b/go/vt/vtctl/grpcvtctldserver/server_test.go index b3546b7a4a4..426f54c074b 100644 --- a/go/vt/vtctl/grpcvtctldserver/server_test.go +++ b/go/vt/vtctl/grpcvtctldserver/server_test.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "os" + "slices" "sort" "strings" "testing" @@ -7089,12 +7090,13 @@ func TestGetTablets(t *testing.T) { t.Parallel() tests := []struct { - name string - cells []string - tablets []*topodatapb.Tablet - req *vtctldatapb.GetTabletsRequest - expected []*topodatapb.Tablet - shouldErr bool + name string + cells []string + unreachableCells []string // Cells that will return a ctx timeout error when trying to get tablets + tablets []*topodatapb.Tablet + req *vtctldatapb.GetTabletsRequest + expected []*topodatapb.Tablet + shouldErr bool }{ { name: "no tablets", @@ -7443,6 +7445,72 @@ func TestGetTablets(t *testing.T) { }, shouldErr: true, }, + { + name: "multiple cells with one timing out and strict false", + cells: []string{"cell1", "cell2"}, + unreachableCells: []string{"cell2"}, + tablets: []*topodatapb.Tablet{ + { + Alias: &topodatapb.TabletAlias{ + Cell: "cell1", + Uid: 100, + }, + Keyspace: "ks1", + Shard: "-", + }, + { + Alias: &topodatapb.TabletAlias{ + Cell: "cell2", + Uid: 200, + }, + Keyspace: "ks1", + Shard: "-", + }, + }, + req: &vtctldatapb.GetTabletsRequest{ + Cells: []string{"cell1", "cell2"}, + Strict: false, + }, + shouldErr: false, + expected: []*topodatapb.Tablet{ + { + Alias: &topodatapb.TabletAlias{ + Cell: "cell1", + Uid: 100, + }, + Keyspace: "ks1", + Shard: "-", + }, + }, + }, + { + name: "multiple cells with one timing out and strict true", + cells: []string{"cell1", "cell2"}, + unreachableCells: []string{"cell2"}, + tablets: []*topodatapb.Tablet{ + { + Alias: &topodatapb.TabletAlias{ + Cell: "cell1", + Uid: 100, + }, + Keyspace: "ks1", + Shard: "-", + }, + { + Alias: &topodatapb.TabletAlias{ + Cell: "cell2", + Uid: 200, + }, + Keyspace: "ks1", + Shard: "-", + }, + }, + req: &vtctldatapb.GetTabletsRequest{ + Cells: []string{"cell1", "cell2"}, + Strict: true, + }, + shouldErr: true, + }, { name: "in nonstrict mode if all cells fail the request fails", cells: []string{"cell1"}, @@ -7676,7 +7744,27 @@ func TestGetTablets(t *testing.T) { testutil.AddTablets(ctx, t, ts, nil, tt.tablets...) - resp, err := vtctld.GetTablets(ctx, tt.req) + for _, cell := range tt.cells { + if slices.Contains(tt.unreachableCells, cell) { + err := ts.UpdateCellInfoFields(ctx, cell, func(ci *topodatapb.CellInfo) error { + ci.ServerAddress = memorytopo.UnreachableServerAddr + return nil + }) + require.NoError(t, err, "failed to update %s cell to point at unreachable address", cell) + } + } + + var ( + resp *vtctldatapb.GetTabletsResponse + err error + ) + if len(tt.unreachableCells) > 0 { + gtCtx, gtCancel := context.WithTimeout(context.Background(), 2*time.Second) + defer gtCancel() + resp, err = vtctld.GetTablets(gtCtx, tt.req) + } else { + resp, err = vtctld.GetTablets(ctx, tt.req) + } if tt.shouldErr { assert.Error(t, err) return