diff --git a/tests/e2e/cluster_downgrade_test.go b/tests/e2e/cluster_downgrade_test.go new file mode 100644 index 000000000000..b3a61fb9a430 --- /dev/null +++ b/tests/e2e/cluster_downgrade_test.go @@ -0,0 +1,345 @@ +// Copyright 2024 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + "github.com/coreos/go-semver/semver" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "go.etcd.io/etcd/api/v3/version" + "go.etcd.io/etcd/client/pkg/v3/fileutil" + "go.etcd.io/etcd/client/pkg/v3/types" + clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/server/v3/datadir" + "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/etcdserver/api/membership" + "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" + "go.etcd.io/etcd/tests/v3/framework/e2e" +) + +func TestDowngradeUpgradeClusterOf1(t *testing.T) { + testDowngradeUpgrade(t, 1, false) +} + +func TestDowngradeUpgradeClusterOf3(t *testing.T) { + testDowngradeUpgrade(t, 3, false) +} + +func TestDowngradeUpgradeClusterOf1WithSnapshot(t *testing.T) { + testDowngradeUpgrade(t, 1, true) +} + +func TestDowngradeUpgradeClusterOf3WithSnapshot(t *testing.T) { + testDowngradeUpgrade(t, 3, true) +} + +func testDowngradeUpgrade(t *testing.T, clusterSize int, triggerSnapshot bool) { + currentEtcdBinary := e2e.BinPath + lastReleaseBinary := e2e.BinPathLastRelease + if !fileutil.Exist(lastReleaseBinary) { + t.Skipf("%q does not exist", lastReleaseBinary) + } + + currentVersion, err := e2e.GetVersionFromBinary(currentEtcdBinary) + require.NoError(t, err) + // wipe any pre-release suffix like -alpha.0 we see commonly in builds + currentVersion.PreRelease = "" + + lastVersion, err := e2e.GetVersionFromBinary(lastReleaseBinary) + require.NoError(t, err) + + require.Equalf(t, lastVersion.Minor, currentVersion.Minor-1, "unexpected minor version difference") + currentVersionStr := currentVersion.String() + lastVersionStr := lastVersion.String() + + currentClusterVersion := semver.New(currentVersionStr) + currentClusterVersion.Patch = 0 + currentClusterVersionStr := currentClusterVersion.String() + + lastClusterVersion := semver.New(lastVersionStr) + lastClusterVersion.Patch = 0 + lastClusterVersionStr := lastClusterVersion.String() + + e2e.BeforeTest(t) + + t.Logf("Create cluster with version %s", currentVersionStr) + snapshotCount := 10 + epc := newCluster(t, clusterSize, snapshotCount) + for i := 0; i < len(epc.Procs); i++ { + validateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{ + Cluster: currentClusterVersionStr, + Server: version.Version, + }) + } + cc := epc.Etcdctl() + t.Logf("Cluster created") + if len(epc.Procs) > 1 { + t.Log("Waiting health interval to required to make membership changes") + time.Sleep(etcdserver.HealthInterval) + } + + t.Log("Adding member to test membership, but a learner avoid breaking quorum") + resp, err := cc.MemberAddAsLearner("fake1", []string{"http://127.0.0.1:1001"}) + require.NoError(t, err) + if triggerSnapshot { + t.Logf("Generating snapshot") + generateSnapshot(t, snapshotCount, cc) + verifySnapshot(t, epc) + } + t.Log("Removing learner to test membership") + _, err = cc.MemberRemove(resp.Member.ID) + require.NoError(t, err) + beforeMembers, beforeKV := getMembersAndKeys(t, cc) + + t.Logf("Starting downgrade process to %q", lastVersionStr) + for i := 0; i < len(epc.Procs); i++ { + t.Logf("Downgrading member %d by running %s binary", i, lastReleaseBinary) + stopEtcd(t, epc.Procs[i]) + startEtcd(t, epc.Procs[i], lastReleaseBinary) + } + + t.Log("All members downgraded, validating downgrade") + for i := 0; i < len(epc.Procs); i++ { + validateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{ + Cluster: lastClusterVersionStr, + Server: lastVersionStr, + }) + } + + t.Log("Downgrade complete") + afterMembers, afterKV := getMembersAndKeys(t, cc) + assert.Equal(t, beforeKV.Kvs, afterKV.Kvs) + assert.Equal(t, beforeMembers.Members, afterMembers.Members) + + if len(epc.Procs) > 1 { + t.Log("Waiting health interval to required to make membership changes") + time.Sleep(etcdserver.HealthInterval) + } + t.Log("Adding learner to test membership, but avoid breaking quorum") + resp, err = cc.MemberAddAsLearner("fake2", []string{"http://127.0.0.1:1002"}) + require.NoError(t, err) + if triggerSnapshot { + t.Logf("Generating snapshot") + generateSnapshot(t, snapshotCount, cc) + verifySnapshot(t, epc) + } + t.Log("Removing learner to test membership") + _, err = cc.MemberRemove(resp.Member.ID) + require.NoError(t, err) + beforeMembers, beforeKV = getMembersAndKeys(t, cc) + + t.Logf("Starting upgrade process to %q", currentVersionStr) + for i := 0; i < len(epc.Procs); i++ { + t.Logf("Upgrading member %d", i) + stopEtcd(t, epc.Procs[i]) + startEtcd(t, epc.Procs[i], currentEtcdBinary) + // NOTE: The leader has monitor to the cluster version, which will + // update cluster version. We don't need to check the transient + // version just in case that it might be flaky. + } + + t.Log("All members upgraded, validating upgrade") + for i := 0; i < len(epc.Procs); i++ { + validateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{ + Cluster: currentClusterVersionStr, + Server: version.Version, + }) + } + t.Log("Upgrade complete") + + afterMembers, afterKV = getMembersAndKeys(t, cc) + assert.Equal(t, beforeKV.Kvs, afterKV.Kvs) + assert.Equal(t, beforeMembers.Members, afterMembers.Members) +} + +func newCluster(t *testing.T, clusterSize int, snapshotCount int) *e2e.EtcdProcessCluster { + copiedCfg := e2e.NewConfigNoTLS() + copiedCfg.ClusterSize = clusterSize + copiedCfg.SnapshotCount = snapshotCount + copiedCfg.KeepDataDir = true + copiedCfg.BasePeerScheme = "unix" // to avoid port conflict + + epc, err := e2e.NewEtcdProcessCluster(t, copiedCfg) + if err != nil { + t.Fatalf("could not start etcd process cluster (%v)", err) + } + t.Cleanup(func() { + if errC := epc.Close(); errC != nil { + t.Fatalf("error closing etcd processes (%v)", errC) + } + }) + return epc +} + +func startEtcd(t *testing.T, ep e2e.EtcdProcess, execPath string) { + ep.Config().ExecPath = execPath + if execPath == e2e.BinPathLastRelease { + ep.Config().Args = addOrRemoveArg(ep.Config().Args, "--next-cluster-version-compatible", true) + } else { + ep.Config().Args = addOrRemoveArg(ep.Config().Args, "--next-cluster-version-compatible", false) + } + err := ep.Restart() + if err != nil { + t.Fatalf("could not start etcd process cluster (%v)", err) + } +} + +func addOrRemoveArg(args []string, arg string, isAdd bool) []string { + newArgs := []string{} + for _, s := range args { + if s != arg { + newArgs = append(newArgs, s) + } + } + if isAdd { + newArgs = append(newArgs, arg) + } + return newArgs +} + +func stopEtcd(t *testing.T, ep e2e.EtcdProcess) { + if err := ep.Stop(); err != nil { + t.Fatal(err) + } +} + +func validateVersion(t *testing.T, cfg *e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess, expect version.Versions) { + executeWithTimeout(t, 30*time.Second, func() { + for { + result, err := getMemberVersionByCurl(cfg, member) + if err != nil { + cfg.Logger.Warn("failed to get member version and retrying", zap.Error(err), zap.String("member", member.Config().Name)) + time.Sleep(time.Second) + continue + } + cfg.Logger.Info("Comparing versions", zap.String("member", member.Config().Name), zap.Any("got", result), zap.Any("want", expect)) + if err := compareMemberVersion(expect, result); err != nil { + cfg.Logger.Warn("Versions didn't match retrying", zap.Error(err), zap.String("member", member.Config().Name)) + time.Sleep(time.Second) + continue + } + cfg.Logger.Info("Versions match", zap.String("member", member.Config().Name)) + break + } + }) +} + +func leader(t *testing.T, epc *e2e.EtcdProcessCluster) e2e.EtcdProcess { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + for i := 0; i < len(epc.Procs); i++ { + endpoints := epc.Procs[i].EndpointsGRPC() + cli, err := clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: 3 * time.Second, + }) + if err != nil { + t.Fatal(err) + } + defer cli.Close() + resp, err := cli.Status(ctx, endpoints[0]) + if err != nil { + t.Fatal(err) + } + if resp.Header.GetMemberId() == resp.Leader { + return epc.Procs[i] + } + } + t.Fatal("Leader not found") + return nil +} + +func compareMemberVersion(expect version.Versions, target version.Versions) error { + if expect.Server != "" && expect.Server != target.Server { + return fmt.Errorf("expect etcdserver version %v, but got %v", expect.Server, target.Server) + } + + if expect.Cluster != "" && expect.Cluster != target.Cluster { + return fmt.Errorf("expect etcdcluster version %v, but got %v", expect.Cluster, target.Cluster) + } + + return nil +} + +func getMemberVersionByCurl(cfg *e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) (version.Versions, error) { + args := e2e.CURLPrefixArgsCluster(cfg, member, "GET", e2e.CURLReq{Endpoint: "/version"}) + lines, err := e2e.RunUtilCompletion(args, nil) + if err != nil { + return version.Versions{}, err + } + + data := strings.Join(lines, "\n") + result := version.Versions{} + if err := json.Unmarshal([]byte(data), &result); err != nil { + return version.Versions{}, fmt.Errorf("failed to unmarshal (%v): %w", data, err) + } + return result, nil +} + +func generateSnapshot(t *testing.T, snapshotCount int, cc *e2e.Etcdctl) { + t.Logf("Adding keys") + for i := 0; i < snapshotCount*3; i++ { + err := cc.Put(fmt.Sprintf("%d", i), "1") + assert.NoError(t, err) + } +} + +func verifySnapshot(t *testing.T, epc *e2e.EtcdProcessCluster) { + for i := range epc.Procs { + t.Logf("Verifying snapshot for member %d", i) + ss := snap.New(epc.Cfg.Logger, datadir.ToSnapDir(epc.Procs[i].Config().DataDirPath)) + _, err := ss.Load() + assert.NoError(t, err) + } + t.Logf("All members have a valid snapshot") +} + +func verifySnapshotMembers(t *testing.T, epc *e2e.EtcdProcessCluster, expectedMembers *clientv3.MemberListResponse) { + for i := range epc.Procs { + t.Logf("Verifying snapshot for member %d", i) + ss := snap.New(epc.Cfg.Logger, datadir.ToSnapDir(epc.Procs[i].Config().DataDirPath)) + snap, err := ss.Load() + require.NoError(t, err) + st := v2store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix) + err = st.Recovery(snap.Data) + assert.NoError(t, err) + for _, m := range expectedMembers.Members { + _, err := st.Get(membership.MemberStoreKey(types.ID(m.ID)), true, true) + assert.NoError(t, err) + } + t.Logf("Verifed snapshot for member %d", i) + } + t.Log("All members have a valid snapshot") +} + +func getMembersAndKeys(t *testing.T, cc *e2e.Etcdctl) (*clientv3.MemberListResponse, *clientv3.GetResponse) { + kvs, err := cc.GetWithPrefix("") + assert.NoError(t, err) + + members, err := cc.MemberList() + assert.NoError(t, err) + + return members, kvs +} diff --git a/tests/e2e/v3_curl_test.go b/tests/e2e/v3_curl_test.go index 581d56d89c77..032f6377c38b 100644 --- a/tests/e2e/v3_curl_test.go +++ b/tests/e2e/v3_curl_test.go @@ -18,6 +18,7 @@ import ( "encoding/base64" "encoding/json" "fmt" + "math/rand" "path" "strconv" "testing" @@ -244,7 +245,7 @@ func testV3CurlAuth(cx ctlCtx) { lineFunc = func(txt string) bool { return true } ) - cmdArgs = e2e.CURLPrefixArgsCluster(cx.epc, "POST", e2e.CURLReq{Endpoint: path.Join(p, "/auth/authenticate"), Value: string(authreq)}) + cmdArgs = e2e.CURLPrefixArgsCluster(cx.epc.Cfg, cx.epc.Procs[rand.Intn(cx.epc.Cfg.ClusterSize)], "POST", e2e.CURLReq{Endpoint: path.Join(p, "/auth/authenticate"), Value: string(authreq)}) proc, err := e2e.SpawnCmd(cmdArgs, cx.envMap) testutil.AssertNil(cx.t, err) defer proc.Close() @@ -283,7 +284,7 @@ func testV3CurlCampaign(cx ctlCtx) { if err != nil { cx.t.Fatal(err) } - cargs := e2e.CURLPrefixArgsCluster(cx.epc, "POST", e2e.CURLReq{ + cargs := e2e.CURLPrefixArgsCluster(cx.epc.Cfg, cx.epc.Procs[rand.Intn(cx.epc.Cfg.ClusterSize)], "POST", e2e.CURLReq{ Endpoint: path.Join(cx.apiPrefix, "/election/campaign"), Value: string(cdata), }) diff --git a/tests/framework/e2e/curl.go b/tests/framework/e2e/curl.go index 9df19620a07b..dcbaa3be4576 100644 --- a/tests/framework/e2e/curl.go +++ b/tests/framework/e2e/curl.go @@ -43,15 +43,19 @@ type CURLReq struct { // CURLPrefixArgsCluster builds the beginning of a curl command for a given key // addressed to a random URL in the given cluster. -func CURLPrefixArgsCluster(clus *EtcdProcessCluster, method string, req CURLReq) []string { - member := clus.Procs[rand.Intn(clus.Cfg.ClusterSize)] - clientURL := member.Config().Acurl - if req.MetricsURLScheme != "" { - clientURL = member.EndpointsMetrics()[0] - } - return CURLPrefixArgs(clientURL, clus.Cfg.ClientTLS, !clus.Cfg.NoCN, method, req) +func CURLPrefixArgsCluster(cfg *EtcdProcessClusterConfig, member EtcdProcess, method string, req CURLReq) []string { + return CURLPrefixArgs(member.Config().Acurl, cfg.ClientTLS, !cfg.NoCN, method, req) } +// func CURLPrefixArgsCluster(clus *EtcdProcessCluster, method string, req CURLReq) []string { +// member := clus.Procs[rand.Intn(clus.Cfg.ClusterSize)] +// clientURL := member.Config().Acurl +// if req.MetricsURLScheme != "" { +// clientURL = member.EndpointsMetrics()[0] +// } +// return CURLPrefixArgs(clientURL, clus.Cfg.ClientTLS, !clus.Cfg.NoCN, method, req) +// } + // CURLPrefixArgs builds the beginning of a curl command for a given key // addressed to a random URL in the given cluster. func CURLPrefixArgs(clientURL string, connType ClientConnType, CN bool, method string, req CURLReq) []string { @@ -111,13 +115,13 @@ func CURLPrefixArgs(clientURL string, connType ClientConnType, CN bool, method s } func CURLPost(clus *EtcdProcessCluster, req CURLReq) error { - return SpawnWithExpect(CURLPrefixArgsCluster(clus, "POST", req), req.Expected) + return SpawnWithExpect(CURLPrefixArgsCluster(clus.Cfg, clus.Procs[rand.Intn(clus.Cfg.ClusterSize)], "POST", req), req.Expected) } func CURLPut(clus *EtcdProcessCluster, req CURLReq) error { - return SpawnWithExpect(CURLPrefixArgsCluster(clus, "PUT", req), req.Expected) + return SpawnWithExpect(CURLPrefixArgsCluster(clus.Cfg, clus.Procs[rand.Intn(clus.Cfg.ClusterSize)], "PUT", req), req.Expected) } func CURLGet(clus *EtcdProcessCluster, req CURLReq) error { - return SpawnWithExpect(CURLPrefixArgsCluster(clus, "GET", req), req.Expected) + return SpawnWithExpect(CURLPrefixArgsCluster(clus.Cfg, clus.Procs[rand.Intn(clus.Cfg.ClusterSize)], "GET", req), req.Expected) } diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index c20844286567..bb464e4db8f4 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -27,6 +27,7 @@ import ( "testing" "time" + "github.com/coreos/go-semver/semver" "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/expect" "go.etcd.io/etcd/pkg/v3/proxy" @@ -405,3 +406,27 @@ func parseFailpointsBody(body io.Reader) (map[string]string, error) { } return failpoints, nil } + +func GetVersionFromBinary(binaryPath string) (*semver.Version, error) { + lines, err := RunUtilCompletion([]string{binaryPath, "--version"}, nil) + if err != nil { + return nil, fmt.Errorf("could not find binary version from %s, err: %w", binaryPath, err) + } + + for _, line := range lines { + if strings.HasPrefix(line, "etcd Version:") { + versionString := strings.TrimSpace(strings.SplitAfter(line, ":")[1]) + version, err := semver.NewVersion(versionString) + if err != nil { + return nil, err + } + return &semver.Version{ + Major: version.Major, + Minor: version.Minor, + Patch: version.Patch, + }, nil + } + } + + return nil, fmt.Errorf("could not find version in binary output of %s, lines outputted were %v", binaryPath, lines) +} diff --git a/tests/framework/e2e/etcdctl.go b/tests/framework/e2e/etcdctl.go index 7e7361b17cdf..0d21ae0f5b46 100644 --- a/tests/framework/e2e/etcdctl.go +++ b/tests/framework/e2e/etcdctl.go @@ -61,6 +61,12 @@ func (ctl *Etcdctl) Get(key string) (*clientv3.GetResponse, error) { return &resp, err } +func (ctl *Etcdctl) GetWithPrefix(key string) (*clientv3.GetResponse, error) { + var resp clientv3.GetResponse + err := ctl.spawnJsonCmd(&resp, "get", key, "--prefix") + return &resp, err +} + func (ctl *Etcdctl) Put(key, value string) error { if ctl.v2 { panic("Unsupported method for v2") @@ -149,6 +155,15 @@ func (ctl *Etcdctl) MemberAdd(name string, peerURLs []string) (*clientv3.MemberA return &resp, err } +func (ctl *Etcdctl) MemberAddAsLearner(name string, peerURLs []string) (*clientv3.MemberAddResponse, error) { + if ctl.v2 { + panic("Unsupported method for v2") + } + var resp clientv3.MemberAddResponse + err := ctl.spawnJsonCmd(&resp, "member", "add", name, "--learner", "--peer-urls", strings.Join(peerURLs, ",")) + return &resp, err +} + func (ctl *Etcdctl) MemberRemove(id uint64) (*clientv3.MemberRemoveResponse, error) { if ctl.v2 { panic("Unsupported method for v2") diff --git a/tests/go.mod b/tests/go.mod index 19f09c2c6b54..453ec8450aa2 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -15,6 +15,7 @@ replace ( ) require ( + github.com/coreos/go-semver v0.3.0 github.com/dustin/go-humanize v1.0.0 github.com/etcd-io/gofail v0.0.0-20190801230047-ad7f989257ca github.com/gogo/protobuf v1.3.2 @@ -54,7 +55,6 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd/v22 v22.3.2 // indirect github.com/creack/pty v1.1.11 // indirect github.com/davecgh/go-spew v1.1.1 // indirect