From ae6655f11e965c983e6b04a5603f4dc2aac9bd87 Mon Sep 17 00:00:00 2001 From: Thomas Gosteli Date: Fri, 1 Nov 2024 12:23:48 +0000 Subject: [PATCH] fix(defrag): handle no space left error Signed-off-by: Thomas Gosteli --- server/mvcc/backend/backend.go | 12 +++++--- tests/e2e/defrag_no_space_test.go | 49 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 tests/e2e/defrag_no_space_test.go diff --git a/server/mvcc/backend/backend.go b/server/mvcc/backend/backend.go index 7d77da12fd6..077146b82ee 100644 --- a/server/mvcc/backend/backend.go +++ b/server/mvcc/backend/backend.go @@ -477,10 +477,6 @@ func (b *backend) defrag() error { b.readTx.Lock() defer b.readTx.Unlock() - b.batchTx.unsafeCommit(true) - - b.batchTx.tx = nil - // Create a temporary file to ensure we start with a clean slate. // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. dir := filepath.Dir(b.db.Path()) @@ -488,11 +484,14 @@ func (b *backend) defrag() error { if err != nil { return err } + options := bolt.Options{} if boltOpenOptions != nil { options = *boltOpenOptions } options.OpenFile = func(_ string, _ int, _ os.FileMode) (file *os.File, err error) { + // gofail: var defragNoSpace string + // return nil, fmt.Errorf(defragNoSpace) return temp, nil } // Don't load tmp db into memory regardless of opening options @@ -515,6 +514,11 @@ func (b *backend) defrag() error { zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))), ) } + + // Commit/stop and then reset current transactions (including the readTx) + b.batchTx.unsafeCommit(true) + b.batchTx.tx = nil + // gofail: var defragBeforeCopy struct{} err = defragdb(b.db, tmpdb, defragLimit) if err != nil { diff --git a/tests/e2e/defrag_no_space_test.go b/tests/e2e/defrag_no_space_test.go new file mode 100644 index 00000000000..810136f156e --- /dev/null +++ b/tests/e2e/defrag_no_space_test.go @@ -0,0 +1,49 @@ +// Copyright 2024 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "go.etcd.io/etcd/tests/v3/framework/config" + "go.etcd.io/etcd/tests/v3/framework/e2e" +) + +func TestDefragNoSpace(t *testing.T) { + e2e.BeforeTest(t) + + clus, err := e2e.NewEtcdProcessCluster(context.TODO(), t, + e2e.WithClusterSize(1), + e2e.WithGoFailEnabled(true), + ) + require.NoError(t, err) + t.Cleanup(func() { clus.Stop() }) + + member := clus.Procs[0] + + require.NoError(t, member.Failpoints().SetupHTTP(context.Background(), "defragNoSpace", `return("no space")`)) + require.ErrorContains(t, member.Etcdctl().Defragment(context.Background(), config.DefragOption{Timeout: time.Minute}), "no space") + + // Make sure etcd continues to run even after the failed defrag attempt + require.NoError(t, member.Etcdctl().Put(context.Background(), "foo", "bar", config.PutOptions{})) + value, err := member.Etcdctl().Get(context.Background(), "foo", config.GetOptions{}) + require.NoError(t, err) + require.Len(t, value.Kvs, 1) + require.Equal(t, "bar", string(value.Kvs[0].Value)) +}