diff --git a/.github/workflows/robustness_test.yaml b/.github/workflows/robustness_test.yaml index 9aca5249e..88ee42d91 100644 --- a/.github/workflows/robustness_test.yaml +++ b/.github/workflows/robustness_test.yaml @@ -12,6 +12,9 @@ jobs: with: go-version: ${{ steps.goversion.outputs.goversion }} - run: | + set -euo pipefail + sudo apt-get install -y dmsetup xfsprogs + make gofail-enable # build bbolt with failpoint go install ./cmd/bbolt diff --git a/db.go b/db.go index 4175bdf3d..e8a2d1c29 100644 --- a/db.go +++ b/db.go @@ -1159,6 +1159,8 @@ func (db *DB) grow(sz int) error { // https://github.com/boltdb/bolt/issues/284 if !db.NoGrowSync && !db.readOnly { if runtime.GOOS != "windows" { + // gofail: var resizeFileError string + // return errors.New(resizeFileError) if err := db.file.Truncate(int64(sz)); err != nil { return fmt.Errorf("file resize error: %s", err) } diff --git a/tests/dmflakey/dmflakey.go b/tests/dmflakey/dmflakey.go index d9bdf99a0..88c3c2d48 100644 --- a/tests/dmflakey/dmflakey.go +++ b/tests/dmflakey/dmflakey.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "os/exec" + "path" "path/filepath" "strings" "time" @@ -89,9 +90,9 @@ const ( // The device-mapper device will be /dev/mapper/$flakeyDevice. And the filesystem // image will be created at $dataStorePath/$flakeyDevice.img. By default, the // device is available for 2 minutes and size is 10 GiB. -func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType) (_ Flakey, retErr error) { +func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType, mkfsOpt string) (_ Flakey, retErr error) { imgPath := filepath.Join(dataStorePath, fmt.Sprintf("%s.img", flakeyDevice)) - if err := createEmptyFSImage(imgPath, fsType); err != nil { + if err := createEmptyFSImage(imgPath, fsType, mkfsOpt); err != nil { return nil, err } defer func() { @@ -275,7 +276,7 @@ func (f *flakey) Teardown() error { // createEmptyFSImage creates empty filesystem on dataStorePath folder with // default size - 10 GiB. -func createEmptyFSImage(imgPath string, fsType FSType) error { +func createEmptyFSImage(imgPath string, fsType FSType, mkfsOpt string) error { if err := validateFSType(fsType); err != nil { return err } @@ -289,6 +290,10 @@ func createEmptyFSImage(imgPath string, fsType FSType) error { return fmt.Errorf("failed to create image because %s already exists", imgPath) } + if err := os.MkdirAll(path.Dir(imgPath), 0600); err != nil { + return fmt.Errorf("failed to ensure parent directory %s: %w", path.Dir(imgPath), err) + } + f, err := os.Create(imgPath) if err != nil { return fmt.Errorf("failed to create image %s: %w", imgPath, err) @@ -303,10 +308,16 @@ func createEmptyFSImage(imgPath string, fsType FSType) error { imgPath, defaultImgSize, err) } - output, err := exec.Command(mkfs, imgPath).CombinedOutput() + args := []string{imgPath} + if mkfsOpt != "" { + splitArgs := strings.Split(mkfsOpt, " ") + args = append(splitArgs, imgPath) + } + + output, err := exec.Command(mkfs, args...).CombinedOutput() if err != nil { - return fmt.Errorf("failed to mkfs.%s on %s (out: %s): %w", - fsType, imgPath, string(output), err) + return fmt.Errorf("failed to mkfs on %s (%s %v) (out: %s): %w", + imgPath, mkfs, args, string(output), err) } return nil } diff --git a/tests/dmflakey/dmflakey_test.go b/tests/dmflakey/dmflakey_test.go index 41c66db8d..99e2de062 100644 --- a/tests/dmflakey/dmflakey_test.go +++ b/tests/dmflakey/dmflakey_test.go @@ -26,31 +26,35 @@ func TestMain(m *testing.M) { } func TestBasic(t *testing.T) { - tmpDir := t.TempDir() + for _, fsType := range []FSType{FSTypeEXT4, FSTypeXFS} { + t.Run(string(fsType), func(t *testing.T) { + tmpDir := t.TempDir() - flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4) - require.NoError(t, err, "init flakey") - defer func() { - assert.NoError(t, flakey.Teardown()) - }() + flakey, err := InitFlakey("go-dmflakey", tmpDir, fsType, "") + require.NoError(t, err, "init flakey") + defer func() { + assert.NoError(t, flakey.Teardown()) + }() - target := filepath.Join(tmpDir, "root") - require.NoError(t, os.MkdirAll(target, 0600)) + target := filepath.Join(tmpDir, "root") + require.NoError(t, os.MkdirAll(target, 0600)) - require.NoError(t, mount(target, flakey.DevicePath(), "")) - defer func() { - assert.NoError(t, unmount(target)) - }() + require.NoError(t, mount(target, flakey.DevicePath(), "")) + defer func() { + assert.NoError(t, unmount(target)) + }() - file := filepath.Join(target, "test") - assert.NoError(t, writeFile(file, []byte("hello, world"), 0600, true)) + file := filepath.Join(target, "test") + assert.NoError(t, writeFile(file, []byte("hello, world"), 0600, true)) - assert.NoError(t, unmount(target)) + assert.NoError(t, unmount(target)) - assert.NoError(t, flakey.Teardown()) + assert.NoError(t, flakey.Teardown()) + }) + } } -func TestDropWrites(t *testing.T) { +func TestDropWritesExt4(t *testing.T) { flakey, root := initFlakey(t, FSTypeEXT4) // commit=1000 is to delay commit triggered by writeback thread @@ -82,7 +86,7 @@ func TestDropWrites(t *testing.T) { assert.True(t, errors.Is(err, os.ErrNotExist)) } -func TestErrorWrites(t *testing.T) { +func TestErrorWritesExt4(t *testing.T) { flakey, root := initFlakey(t, FSTypeEXT4) // commit=1000 is to delay commit triggered by writeback thread @@ -114,7 +118,7 @@ func initFlakey(t *testing.T, fsType FSType) (_ Flakey, root string) { target := filepath.Join(tmpDir, "root") require.NoError(t, os.MkdirAll(target, 0600)) - flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4) + flakey, err := InitFlakey("go-dmflakey", tmpDir, fsType, "") require.NoError(t, err, "init flakey") t.Cleanup(func() { diff --git a/tests/failpoint/db_failpoint_test.go b/tests/failpoint/db_failpoint_test.go index d9201ef1f..7eedbfe3e 100644 --- a/tests/failpoint/db_failpoint_test.go +++ b/tests/failpoint/db_failpoint_test.go @@ -209,3 +209,63 @@ func TestIssue72(t *testing.T) { func idToBytes(id int) []byte { return []byte(fmt.Sprintf("%010d", id)) } + +func TestFailpoint_ResizeFileFail(t *testing.T) { + db := btesting.MustCreateDB(t) + + err := gofail.Enable("resizeFileError", `return("resizeFile somehow failed")`) + require.NoError(t, err) + + err = db.Fill([]byte("data"), 1, 10000, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 100) }, + ) + + require.Error(t, err) + require.ErrorContains(t, err, "resizeFile somehow failed") + + // It should work after disabling the failpoint. + err = gofail.Disable("resizeFileError") + require.NoError(t, err) + db.MustClose() + db.MustReopen() + + err = db.Fill([]byte("data"), 1, 10000, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 100) }, + ) + + require.NoError(t, err) +} + +func TestFailpoint_LackOfDiskSpace(t *testing.T) { + db := btesting.MustCreateDB(t) + + err := gofail.Enable("lackOfDiskSpace", `return("grow somehow failed")`) + require.NoError(t, err) + + tx, err := db.Begin(true) + require.NoError(t, err) + + err = tx.Commit() + require.Error(t, err) + require.ErrorContains(t, err, "grow somehow failed") + + err = tx.Rollback() + require.Error(t, err) + require.ErrorIs(t, err, bolt.ErrTxClosed) + + // It should work after disabling the failpoint. + err = gofail.Disable("lackOfDiskSpace") + require.NoError(t, err) + + tx, err = db.Begin(true) + require.NoError(t, err) + + err = tx.Commit() + require.NoError(t, err) + + err = tx.Rollback() + require.Error(t, err) + require.ErrorIs(t, err, bolt.ErrTxClosed) +} diff --git a/tests/robustness/powerfailure_test.go b/tests/robustness/powerfailure_test.go index a1d0bc598..35ed628b4 100644 --- a/tests/robustness/powerfailure_test.go +++ b/tests/robustness/powerfailure_test.go @@ -4,8 +4,11 @@ package robustness import ( "bytes" + "crypto/rand" "fmt" "io" + "math" + "math/big" "net/http" "net/url" "os" @@ -23,9 +26,121 @@ import ( "golang.org/x/sys/unix" ) -// TestRestartFromPowerFailure is to test data after unexpected power failure. -func TestRestartFromPowerFailure(t *testing.T) { - flakey := initFlakeyDevice(t, t.Name(), dmflakey.FSTypeEXT4, "") +var panicFailpoints = []string{ + "beforeSyncDataPages", + "beforeSyncMetaPage", + "lackOfDiskSpace", + "mapError", + "resizeFileError", + "unmapError", +} + +// TestRestartFromPowerFailureExt4 is to test data after unexpected power failure on ext4. +func TestRestartFromPowerFailureExt4(t *testing.T) { + for _, tc := range []struct { + name string + du time.Duration + fsMountOpt string + useFailpoint bool + }{ + { + name: "fp_ext4_commit5s", + du: 5 * time.Second, + fsMountOpt: "commit=5", + useFailpoint: true, + }, + { + name: "fp_ext4_commit1s", + du: 10 * time.Second, + fsMountOpt: "commit=1", + useFailpoint: true, + }, + { + name: "fp_ext4_commit1000s", + du: 10 * time.Second, + fsMountOpt: "commit=1000", + useFailpoint: true, + }, + { + name: "kill_ext4_commit5s", + du: 5 * time.Second, + fsMountOpt: "commit=5", + }, + { + name: "kill_ext4_commit1s", + du: 10 * time.Second, + fsMountOpt: "commit=1", + }, + { + name: "kill_ext4_commit1000s", + du: 10 * time.Second, + fsMountOpt: "commit=1000", + }, + } { + t.Run(tc.name, func(t *testing.T) { + doPowerFailure(t, tc.du, dmflakey.FSTypeEXT4, "", tc.fsMountOpt, tc.useFailpoint) + }) + } +} + +func TestRestartFromPowerFailureXFS(t *testing.T) { + for _, tc := range []struct { + name string + mkfsOpt string + fsMountOpt string + useFailpoint bool + }{ + { + name: "xfs_no_opts", + mkfsOpt: "", + fsMountOpt: "", + useFailpoint: true, + }, + { + name: "lazy-log", + mkfsOpt: "-l lazy-count=1", + fsMountOpt: "", + useFailpoint: true, + }, + { + name: "odd-allocsize", + mkfsOpt: "", + fsMountOpt: "allocsize=" + fmt.Sprintf("%d", 4096*5), + useFailpoint: true, + }, + { + name: "nolargeio", + mkfsOpt: "", + fsMountOpt: "nolargeio", + useFailpoint: true, + }, + { + name: "odd-alignment", + mkfsOpt: "-d sunit=1024,swidth=1024", + fsMountOpt: "noalign", + useFailpoint: true, + }, + { + name: "openshift-sno-options", + mkfsOpt: "-m bigtime=1,finobt=1,rmapbt=0,reflink=1 -i sparse=1 -l lazy-count=1", + // openshift also supplies seclabel,relatime,prjquota on RHEL, but that's not supported on our CI + // prjquota is only unsupported on our ARM runners. + // You can find more information in either the man page with `man xfs` or `man mkfs.xfs`. + // Also refer to https://man7.org/linux/man-pages/man8/mkfs.xfs.8.html. + fsMountOpt: "rw,attr2,inode64,logbufs=8,logbsize=32k", + useFailpoint: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Logf("mkfs opts: %s", tc.mkfsOpt) + t.Logf("mount opts: %s", tc.fsMountOpt) + doPowerFailure(t, 5*time.Second, dmflakey.FSTypeXFS, tc.mkfsOpt, tc.fsMountOpt, tc.useFailpoint) + }) + } +} + +func doPowerFailure(t *testing.T, du time.Duration, fsType dmflakey.FSType, mkfsOpt string, fsMountOpt string, useFailpoint bool) { + flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), fsType, mkfsOpt, fsMountOpt) root := flakey.RootFS() dbPath := filepath.Join(root, "boltdb") @@ -38,6 +153,8 @@ func TestRestartFromPowerFailure(t *testing.T) { } logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name())) + require.NoError(t, os.MkdirAll(path.Dir(logPath), 0600)) + logFd, err := os.Create(logPath) require.NoError(t, err) defer logFd.Close() @@ -64,10 +181,18 @@ func TestRestartFromPowerFailure(t *testing.T) { } }() - time.Sleep(time.Duration(time.Now().UnixNano()%5+1) * time.Second) + time.Sleep(du) t.Logf("simulate power failure") - activeFailpoint(t, fpURL, "beforeSyncMetaPage", "panic") + if useFailpoint { + fpURL = "http://" + fpURL + targetFp := panicFailpoints[randomInt(t, math.MaxInt32)%len(panicFailpoints)] + t.Logf("random pick failpoint: %s", targetFp) + activeFailpoint(t, fpURL, targetFp, "panic") + } else { + t.Log("kill bbolt") + assert.NoError(t, cmd.Process.Kill()) + } select { case <-time.After(10 * time.Second): @@ -89,10 +214,10 @@ func TestRestartFromPowerFailure(t *testing.T) { // activeFailpoint actives the failpoint by http. func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) { - u, err := url.Parse("http://" + path.Join(targetUrl, fpName)) + u, err := url.JoinPath(targetUrl, fpName) require.NoError(t, err, "parse url %s", targetUrl) - req, err := http.NewRequest("PUT", u.String(), bytes.NewBuffer([]byte(fpVal))) + req, err := http.NewRequest("PUT", u, bytes.NewBuffer([]byte(fpVal))) require.NoError(t, err) resp, err := http.DefaultClient.Do(req) @@ -116,10 +241,10 @@ type FlakeyDevice interface { } // initFlakeyDevice returns FlakeyDevice instance with a given filesystem. -func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mntOpt string) FlakeyDevice { +func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mkfsOpt string, mntOpt string) FlakeyDevice { imgDir := t.TempDir() - flakey, err := dmflakey.InitFlakey(name, imgDir, fsType) + flakey, err := dmflakey.InitFlakey(name, imgDir, fsType, mkfsOpt) require.NoError(t, err, "init flakey %s", name) t.Cleanup(func() { assert.NoError(t, flakey.Teardown()) @@ -170,7 +295,7 @@ func (f *flakeyT) PowerFailure(mntOpt string) error { } if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil { - return fmt.Errorf("failed to mount rootfs %s: %w", f.rootDir, err) + return fmt.Errorf("failed to mount rootfs %s (%s): %w", f.rootDir, mntOpt, err) } return nil } @@ -192,3 +317,9 @@ func unmountAll(target string) error { } return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY) } + +func randomInt(t *testing.T, max int) int { + n, err := rand.Int(rand.Reader, big.NewInt(int64(max))) + assert.NoError(t, err) + return int(n.Int64()) +} diff --git a/tx.go b/tx.go index 7a873066c..766395de3 100644 --- a/tx.go +++ b/tx.go @@ -1,6 +1,7 @@ package bbolt import ( + "errors" "fmt" "io" "os" @@ -185,6 +186,10 @@ func (tx *Tx) Commit() error { // If the high water mark has moved up then attempt to grow the database. if tx.meta.pgid > opgid { + _ = errors.New("") + // gofail: var lackOfDiskSpace string + // tx.rollback() + // return errors.New(lackOfDiskSpace) if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { tx.rollback() return err