-
Notifications
You must be signed in to change notification settings - Fork 651
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[1.3] backport go-dmflakey and robustness test (#812)
* tx.go: introduce failpoint before fdatasync It can be used for power failure cases. Signed-off-by: Wei Fu <[email protected]> (cherry picked from commit 445a07b) Signed-off-by: Wei Fu <[email protected]> * tests/*: introduce go-dmflakey This commit is to add go-binding for go-dmflakey. It's used to simulate powerfailure with common filesystems. Signed-off-by: Wei Fu <[email protected]> (cherry picked from commit b05bec2) Signed-off-by: Wei Fu <[email protected]> * tests/robustness: init with powerfailure case Add `Robustness Test` pipeline for robustness test cases. Signed-off-by: Wei Fu <[email protected]> (cherry picked from commit 300e72a) Signed-off-by: Wei Fu <[email protected]> --------- Signed-off-by: Wei Fu <[email protected]>
- Loading branch information
Showing
10 changed files
with
963 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
name: Robustness Test | ||
on: [push, pull_request] | ||
permissions: read-all | ||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- id: goversion | ||
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT" | ||
- uses: actions/setup-go@v4 | ||
with: | ||
go-version: ${{ steps.goversion.outputs.goversion }} | ||
- run: | | ||
make gofail-enable | ||
# build bbolt with failpoint | ||
go install ./cmd/bbolt | ||
sudo -E PATH=$PATH make test-robustness |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,322 @@ | ||
//go:build linux | ||
|
||
package dmflakey | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"strings" | ||
"time" | ||
|
||
"golang.org/x/sys/unix" | ||
) | ||
|
||
type featCfg struct { | ||
// SyncFS attempts to synchronize filesystem before inject failure. | ||
syncFS bool | ||
// interval is used to determine the up time for feature. | ||
// | ||
// For AllowWrites, it means that the device is available for `interval` seconds. | ||
// For Other features, the device exhibits unreliable behaviour for | ||
// `interval` seconds. | ||
interval time.Duration | ||
} | ||
|
||
// Default values. | ||
const ( | ||
// defaultImgSize is the default size for filesystem image. | ||
defaultImgSize int64 = 1024 * 1024 * 1024 * 10 // 10 GiB | ||
// defaultInterval is the default interval for the up time of feature. | ||
defaultInterval = 2 * time.Minute | ||
) | ||
|
||
// defaultFeatCfg is the default setting for flakey feature. | ||
var defaultFeatCfg = featCfg{interval: defaultInterval} | ||
|
||
// FeatOpt is used to configure failure feature. | ||
type FeatOpt func(*featCfg) | ||
|
||
// WithIntervalFeatOpt updates the up time for the feature. | ||
func WithIntervalFeatOpt(interval time.Duration) FeatOpt { | ||
return func(cfg *featCfg) { | ||
cfg.interval = interval | ||
} | ||
} | ||
|
||
// WithSyncFSFeatOpt is to determine if the caller wants to synchronize | ||
// filesystem before inject failure. | ||
func WithSyncFSFeatOpt(syncFS bool) FeatOpt { | ||
return func(cfg *featCfg) { | ||
cfg.syncFS = syncFS | ||
} | ||
} | ||
|
||
// Flakey is to inject failure into device. | ||
type Flakey interface { | ||
// DevicePath returns the flakey device path. | ||
DevicePath() string | ||
|
||
// Filesystem returns filesystem's type. | ||
Filesystem() FSType | ||
|
||
// AllowWrites allows write I/O. | ||
AllowWrites(opts ...FeatOpt) error | ||
|
||
// DropWrites drops all write I/O silently. | ||
DropWrites(opts ...FeatOpt) error | ||
|
||
// ErrorWrites drops all write I/O and returns error. | ||
ErrorWrites(opts ...FeatOpt) error | ||
|
||
// Teardown releases the flakey device. | ||
Teardown() error | ||
} | ||
|
||
// FSType represents the filesystem name. | ||
type FSType string | ||
|
||
// Supported filesystems. | ||
const ( | ||
FSTypeEXT4 FSType = "ext4" | ||
FSTypeXFS FSType = "xfs" | ||
) | ||
|
||
// InitFlakey creates an filesystem on a loopback device and returns Flakey on it. | ||
// | ||
// The device-mapper device will be /dev/mapper/$flakeyDevice. And the filesystem | ||
// image will be created at $dataStorePath/$flakeyDevice.img. By default, the | ||
// device is available for 2 minutes and size is 10 GiB. | ||
func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType) (_ Flakey, retErr error) { | ||
imgPath := filepath.Join(dataStorePath, fmt.Sprintf("%s.img", flakeyDevice)) | ||
if err := createEmptyFSImage(imgPath, fsType); err != nil { | ||
return nil, err | ||
} | ||
defer func() { | ||
if retErr != nil { | ||
os.RemoveAll(imgPath) | ||
} | ||
}() | ||
|
||
loopDevice, err := attachToLoopDevice(imgPath) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer func() { | ||
if retErr != nil { | ||
_ = detachLoopDevice(loopDevice) | ||
} | ||
}() | ||
|
||
imgSize, err := getBlkSize(loopDevice) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if err := newFlakeyDevice(flakeyDevice, loopDevice, defaultInterval); err != nil { | ||
return nil, err | ||
} | ||
|
||
return &flakey{ | ||
fsType: fsType, | ||
imgPath: imgPath, | ||
imgSize: imgSize, | ||
|
||
loopDevice: loopDevice, | ||
flakeyDevice: flakeyDevice, | ||
}, nil | ||
} | ||
|
||
type flakey struct { | ||
fsType FSType | ||
imgPath string | ||
imgSize int64 | ||
|
||
loopDevice string | ||
flakeyDevice string | ||
} | ||
|
||
// DevicePath returns the flakey device path. | ||
func (f *flakey) DevicePath() string { | ||
return fmt.Sprintf("/dev/mapper/%s", f.flakeyDevice) | ||
} | ||
|
||
// Filesystem returns filesystem's type. | ||
func (f *flakey) Filesystem() FSType { | ||
return f.fsType | ||
} | ||
|
||
// AllowWrites allows write I/O. | ||
func (f *flakey) AllowWrites(opts ...FeatOpt) error { | ||
var o = defaultFeatCfg | ||
for _, opt := range opts { | ||
opt(&o) | ||
} | ||
|
||
// NOTE: Table parameters | ||
// | ||
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]] | ||
// | ||
// Mandatory parameters: | ||
// | ||
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number. | ||
// <offset>: Starting sector within the device. | ||
// <up interval>: Number of seconds device is available. | ||
// <down interval>: Number of seconds device returns errors. | ||
// | ||
// Optional: | ||
// | ||
// If no feature parameters are present, during the periods of unreliability, all I/O returns errors. | ||
// | ||
// For AllowWrites, the device will handle data correctly in `interval` seconds. | ||
// | ||
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html. | ||
table := fmt.Sprintf("0 %d flakey %s 0 %d 0", | ||
f.imgSize, f.loopDevice, int(o.interval.Seconds())) | ||
|
||
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table) | ||
} | ||
|
||
// DropWrites drops all write I/O silently. | ||
func (f *flakey) DropWrites(opts ...FeatOpt) error { | ||
var o = defaultFeatCfg | ||
for _, opt := range opts { | ||
opt(&o) | ||
} | ||
|
||
// NOTE: Table parameters | ||
// | ||
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]] | ||
// | ||
// Mandatory parameters: | ||
// | ||
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number. | ||
// <offset>: Starting sector within the device. | ||
// <up interval>: Number of seconds device is available. | ||
// <down interval>: Number of seconds device returns errors. | ||
// | ||
// Optional: | ||
// | ||
// <num_features>: How many arguments (length of <feature_arguments>) | ||
// | ||
// For DropWrites, | ||
// | ||
// num_features: 1 (there is only one argument) | ||
// feature_arguments: drop_writes | ||
// | ||
// The Device will drop all the writes into disk in `interval` seconds. | ||
// Read I/O is handled correctly. | ||
// | ||
// For example, the application calls fsync, all the dirty pages will | ||
// be flushed into disk ideally. But during DropWrites, device will | ||
// ignore all the data and return successfully. It can be used to | ||
// simulate data-loss after power failure. | ||
// | ||
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html. | ||
table := fmt.Sprintf("0 %d flakey %s 0 0 %d 1 drop_writes", | ||
f.imgSize, f.loopDevice, int(o.interval.Seconds())) | ||
|
||
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table) | ||
} | ||
|
||
// ErrorWrites drops all write I/O and returns error. | ||
func (f *flakey) ErrorWrites(opts ...FeatOpt) error { | ||
var o = defaultFeatCfg | ||
for _, opt := range opts { | ||
opt(&o) | ||
} | ||
|
||
// NOTE: Table parameters | ||
// | ||
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]] | ||
// | ||
// Mandatory parameters: | ||
// | ||
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number. | ||
// <offset>: Starting sector within the device. | ||
// <up interval>: Number of seconds device is available. | ||
// <down interval>: Number of seconds device returns errors. | ||
// | ||
// Optional: | ||
// | ||
// <num_features>: How many arguments (length of <feature_arguments>) | ||
// | ||
// For ErrorWrites, | ||
// | ||
// num_features: 1 (there is only one argument) | ||
// feature_arguments: error_writes | ||
// | ||
// The Device will drop all the writes into disk in `interval` seconds | ||
// and return failure to caller. Read I/O is handled correctly. | ||
// | ||
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html. | ||
table := fmt.Sprintf("0 %d flakey %s 0 0 %d 1 error_writes", | ||
f.imgSize, f.loopDevice, int(o.interval.Seconds())) | ||
|
||
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table) | ||
} | ||
|
||
// Teardown releases the flakey device. | ||
func (f *flakey) Teardown() error { | ||
if err := deleteFlakeyDevice(f.flakeyDevice); err != nil { | ||
if !strings.Contains(err.Error(), "No such device or address") { | ||
return err | ||
} | ||
} | ||
if err := detachLoopDevice(f.loopDevice); err != nil { | ||
if !errors.Is(err, unix.ENXIO) { | ||
return err | ||
} | ||
} | ||
return os.RemoveAll(f.imgPath) | ||
} | ||
|
||
// createEmptyFSImage creates empty filesystem on dataStorePath folder with | ||
// default size - 10 GiB. | ||
func createEmptyFSImage(imgPath string, fsType FSType) error { | ||
if err := validateFSType(fsType); err != nil { | ||
return err | ||
} | ||
|
||
mkfs, err := exec.LookPath(fmt.Sprintf("mkfs.%s", fsType)) | ||
if err != nil { | ||
return fmt.Errorf("failed to ensure mkfs.%s: %w", fsType, err) | ||
} | ||
|
||
if _, err := os.Stat(imgPath); err == nil { | ||
return fmt.Errorf("failed to create image because %s already exists", imgPath) | ||
} | ||
|
||
f, err := os.Create(imgPath) | ||
if err != nil { | ||
return fmt.Errorf("failed to create image %s: %w", imgPath, err) | ||
} | ||
|
||
if err = func() error { | ||
defer f.Close() | ||
|
||
return f.Truncate(defaultImgSize) | ||
}(); err != nil { | ||
return fmt.Errorf("failed to truncate image %s with %v bytes: %w", | ||
imgPath, defaultImgSize, err) | ||
} | ||
|
||
output, err := exec.Command(mkfs, imgPath).CombinedOutput() | ||
if err != nil { | ||
return fmt.Errorf("failed to mkfs.%s on %s (out: %s): %w", | ||
fsType, imgPath, string(output), err) | ||
} | ||
return nil | ||
} | ||
|
||
// validateFSType validates the fs type input. | ||
func validateFSType(fsType FSType) error { | ||
switch fsType { | ||
case FSTypeEXT4, FSTypeXFS: | ||
return nil | ||
default: | ||
return fmt.Errorf("unsupported filesystem %s", fsType) | ||
} | ||
} |
Oops, something went wrong.