From 97bc7794af9171123ca0b3932ff8c8fb38eae575 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Thu, 7 Nov 2024 09:42:12 +0100 Subject: [PATCH] Next iteration Signed-off-by: Tom Wieczorek --- go.mod | 2 +- internal/os/unix/dirfd_linux.go | 52 +++- internal/os/unix/dirfd_linux_test.go | 47 +++ internal/os/unix/dirfd_unix.go | 317 ++++++++++++-------- internal/os/unix/dirfd_unix_test.go | 10 +- internal/slices/slices.go | 16 + inttest/reset/obstruct-data-dir.sh | 114 +++++++ inttest/reset/reset_test.go | 58 +++- pkg/cleanup/directories.go | 13 +- pkg/cleanup/directories_linux.go | 414 ++++++++++++++++---------- pkg/cleanup/directories_linux_test.go | 297 ++++++++++++++++++ pkg/cleanup/testdata/mountinfo | 35 +++ 12 files changed, 1065 insertions(+), 310 deletions(-) create mode 100644 internal/os/unix/dirfd_linux_test.go create mode 100644 inttest/reset/obstruct-data-dir.sh create mode 100644 pkg/cleanup/directories_linux_test.go create mode 100644 pkg/cleanup/testdata/mountinfo diff --git a/go.mod b/go.mod index 4e0ed2955019..0ede8cb08da7 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( go.etcd.io/etcd/etcdutl/v3 v3.5.16 go.uber.org/zap v1.27.0 golang.org/x/crypto v0.28.0 + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 golang.org/x/mod v0.21.0 golang.org/x/sync v0.8.0 golang.org/x/sys v0.26.0 @@ -257,7 +258,6 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/net v0.30.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/term v0.25.0 // indirect diff --git a/internal/os/unix/dirfd_linux.go b/internal/os/unix/dirfd_linux.go index fe24e769c506..6bb137cd9503 100644 --- a/internal/os/unix/dirfd_linux.go +++ b/internal/os/unix/dirfd_linux.go @@ -31,12 +31,40 @@ import ( "golang.org/x/sys/unix" ) -// Opens the path with the given name by using the openat2 syscall. +// An open Linux-native handle to some path on the file system. +type LinuxPath interface { + Path + + // Stats this path using the fstatat(path, "", AT_EMPTY_PATH) syscall. + StatSelf() (*FileInfo, error) +} + +var _ LinuxPath = (*PathFD)(nil) + +// Stats this path using the fstatat(path, "", AT_EMPTY_PATH) syscall. +func (p *PathFD) StatSelf() (*FileInfo, error) { + return p.UnwrapDir().StatSelf() +} + +var _ LinuxPath = (*DirFD)(nil) + +// Stats this path using the fstatat(path, "", AT_EMPTY_PATH) syscall. +func (d *DirFD) StatSelf() (*FileInfo, error) { + return d.StatAt("", unix.AT_EMPTY_PATH) +} + +// Opens the path with the given name. +// The path is opened relative to the receiver, using the openat2 syscall. +// +// Note that, in contrast to [os.Open] and [os.OpenFile], the returned +// descriptor is not put into non-blocking mode automatically. Callers may +// decide if they want this by setting the [syscall.O_NONBLOCK] flag. +// // Available since Linux 5.6 (April 2020). // // https://www.man7.org/linux/man-pages/man2/openat2.2.html // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=fddb5d430ad9fa91b49b1d34d0202ffe2fa0e179 -func (d *DirFD) Open2(name string, how unix.OpenHow) (*DirFDEntry, error) { +func (d *DirFD) Open2(name string, how unix.OpenHow) (*PathFD, error) { var opened int if err := openAt2Support.guard(func() error { return syscallControl(d, func(fd uintptr) (err error) { @@ -45,13 +73,13 @@ func (d *DirFD) Open2(name string, how unix.OpenHow) (*DirFDEntry, error) { if err == nil { return nil } - return &os.PathError{Op: "openat2", Path: name, Err: err} + return pathErr("openat2", d, name, err) }) }); err != nil { return nil, err } - return (*DirFDEntry)(os.NewFile(uintptr(opened), name)), nil + return (*PathFD)(os.NewFile(uintptr(opened), name)), nil } // Opens the directory with the given name by using the openat2 syscall. @@ -60,7 +88,7 @@ func (d *DirFD) Open2(name string, how unix.OpenHow) (*DirFDEntry, error) { func (d *DirFD) OpenDir2(name string, how unix.OpenHow) (*DirFD, error) { how.Flags |= unix.O_DIRECTORY f, err := d.Open2(name, how) - return f.AsDir(), err + return f.UnwrapDir(), err } var openAt2Support = runtimeSupport{test: func() error { @@ -75,7 +103,9 @@ var openAt2Support = runtimeSupport{test: func() error { return nil }} -// Stats the path with the given name by using the statx syscall. +// Stats the path with the given name. +// The path is interpreted relative to the receiver, using the statx syscall. +// // Available since Linux 4.11 (May 2017). // // https://www.man7.org/linux/man-pages/man2/statx.2.html @@ -84,7 +114,7 @@ func (d *DirFD) StatX(name string, flags, mask int) (*FileInfoX, error) { const requiredMask = unix.STATX_MODE | unix.STATX_TYPE | unix.STATX_SIZE | unix.STATX_MTIME info := FileInfoX{Path: name} - err := d.RawStatX(name, flags, mask|requiredMask, &info.StatX) + err := d.NativeStatX(name, flags, mask|requiredMask, &info.StatX) if err != nil { return nil, err } @@ -99,11 +129,9 @@ func (d *DirFD) StatX(name string, flags, mask int) (*FileInfoX, error) { return &info, nil } -// Stats the path with the given name by using the statx syscall, returning the -// raw stats. Available since Linux 4.11 (May 2017). -// -// https://www.man7.org/linux/man-pages/man2/statx.2.html -func (d *DirFD) RawStatX(name string, flags, mask int, stat *StatX) error { +// Stats the path with the given name, just like [DirFD.StatX], but returning +// stats in the native format of the operating system. +func (d *DirFD) NativeStatX(name string, flags, mask int, stat *StatX) error { return statxSupport.guard(func() error { return syscallControl(d, func(fd uintptr) error { err := unix.Statx(int(fd), name, flags, mask, (*unix.Statx_t)(stat)) diff --git a/internal/os/unix/dirfd_linux_test.go b/internal/os/unix/dirfd_linux_test.go new file mode 100644 index 000000000000..4498a0d97f92 --- /dev/null +++ b/internal/os/unix/dirfd_linux_test.go @@ -0,0 +1,47 @@ +/* +Copyright 2024 k0s authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package unix_test + +import ( + "testing" + + osunix "github.com/k0sproject/k0s/internal/os/unix" + "golang.org/x/sys/unix" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPathFD_StatSelf(t *testing.T) { + dirPath := t.TempDir() + + p, err := osunix.OpenPath(dirPath, unix.O_PATH, 0) + require.NoError(t, err) + t.Cleanup(func() { assert.NoError(t, p.Close()) }) + + // An O_PATH descriptor cannot read anything. + _, err = p.UnwrapDir().Readdirnames(1) + assert.ErrorIs(t, err, unix.EBADF) + + // Verify that the fstatat syscall works for O_PATH file descriptors. + // It's not documented in the Linux man pages, just fstat is. + // See open(2). + stat, err := p.StatSelf() + if assert.NoError(t, err) { + assert.True(t, stat.IsDir()) + } +} diff --git a/internal/os/unix/dirfd_unix.go b/internal/os/unix/dirfd_unix.go index 05c5d3f06d79..efc0f947449e 100644 --- a/internal/os/unix/dirfd_unix.go +++ b/internal/os/unix/dirfd_unix.go @@ -31,6 +31,68 @@ import ( "golang.org/x/sys/unix" ) +// An open handle to some path on the file system. +type Path interface { + io.Closer + syscall.Conn + Name() string // Delegates to [os.File.Name]. + Stat() (os.FileInfo, error) // Delegates to [os.File.Stat]. +} + +// A file descriptor pointing to a path. +// It is unspecified if that descriptor is referring to a file or a directory. +type PathFD os.File + +// Opens a [PathFD] referring to the given path. +// +// This function can be used to open a path without knowing if it's a directory +// or a file, then use [PathFD.Stat] to figure out if [PathFD.UnwrapFile] or +// [PathFD.UnwrapDir] is appropriate. +// +// Note that, in contrast to [os.Open] and [os.OpenFile], the returned +// descriptor is not put into non-blocking mode automatically. Callers may +// decide if they want this by setting the [syscall.O_NONBLOCK] flag. +func OpenPath(path string, flags int, perm os.FileMode) (*PathFD, error) { + // Use the raw syscall instead of os.OpenFile here, as the latter tries to + // put the fds into non-blocking mode. + flags, mode, err := sysOpenFlags(flags, perm) + if err != nil { + return nil, &os.PathError{Op: "open", Path: path, Err: err} + } + + fd, err := syscall.Open(path, flags, mode) + if err != nil { + return nil, &os.PathError{Op: "open", Path: path, Err: err} + } + + return (*PathFD)(os.NewFile(uintptr(fd), path)), nil +} + +// The interface that [PathFD] is about to implement. +var _ Path = (*PathFD)(nil) + +// Delegates to [os.File.Close]. +func (p *PathFD) Close() error { return (*os.File)(p).Close() } + +// Delegates to [os.File.Name]. +func (p *PathFD) Name() string { return (*os.File)(p).Name() } + +// Delegates to [os.File.Stat]. +func (p *PathFD) Stat() (os.FileInfo, error) { return (*os.File)(p).Stat() } + +// Delegates to [os.File.SyscallConn]. +func (p *PathFD) SyscallConn() (syscall.RawConn, error) { return (*os.File)(p).SyscallConn() } + +// Converts this pointer to an [*os.File] without any additional checks. +// +// Note that both [os.File.ReadDir] and [os.File.Readdir] will NOT work if this +// pointer has been opened via a [DirFD] pointer. +// See [DirFD.Readdirnames] for details. +func (f *PathFD) UnwrapFile() *os.File { return (*os.File)(f) } + +// Converts this pointer to a [*DirFD] without any additional checks. +func (f *PathFD) UnwrapDir() *DirFD { return (*DirFD)(f) } + // A file descriptor pointing to a directory (a.k.a. dirfd). It uses the // syscalls that accept a dirfd, i.e. openat, fstatat ... // @@ -46,101 +108,62 @@ import ( // attack. type DirFD os.File -// The interfaces that [DirFD] is about to implement. -var _ interface { - io.Closer - syscall.Conn -} = (*DirFD)(nil) +// The interface that [DirFD] is about to implement. +var _ Path = (*DirFD)(nil) // Opens a [DirFD] referring to the given path. // // Note that this is not a chroot: The *at syscalls will only use dirfd to // resolve relative paths, and will happily follow symlinks and cross mount // points. -func OpenDir(path string) (*DirFD, error) { - f, err := os.OpenFile(path, syscall.O_DIRECTORY, 0) +func OpenDir(path string, flags int) (*DirFD, error) { + // Use the raw syscall instead of os.OpenFile here, as the latter tries to + // put the fds into non-blocking mode. + fd, err := syscall.Open(path, flags|syscall.O_DIRECTORY|syscall.O_CLOEXEC, 0) if err != nil { - return nil, err + return nil, &os.PathError{Op: "open", Path: path, Err: err} } - return (*DirFD)(f), nil -} - -// Close implements [io.Closer]. -func (d *DirFD) Close() error { - return (*os.File)(d).Close() + return (*DirFD)(os.NewFile(uintptr(fd), path)), nil } -// SyscallConn implements [syscall.Conn]. -func (d *DirFD) SyscallConn() (syscall.RawConn, error) { - return (*os.File)(d).SyscallConn() -} - -type DirFDEntry os.File - -// The interfaces that [DirFDEntry] is about to implement. -var _ interface { - io.Closer - syscall.Conn -} = (*DirFDEntry)(nil) +// Delegates to [os.File.Close]. +func (d *DirFD) Close() error { return (*os.File)(d).Close() } -// Close implements [io.Closer]. -func (e *DirFDEntry) Close() error { - return (*os.File)(e).Close() -} +// Delegates to [os.File.SyscallConn]. +func (d *DirFD) SyscallConn() (syscall.RawConn, error) { return (*os.File)(d).SyscallConn() } -// SyscallConn implements [syscall.Conn]. -func (e *DirFDEntry) SyscallConn() (syscall.RawConn, error) { - return (*os.File)(e).SyscallConn() -} +// Delegates to [os.File.Name]. +func (d *DirFD) Name() string { return (*os.File)(d).Name() } // Delegates to [io.File.Stat]. -func (e *DirFDEntry) Stat() (os.FileInfo, error) { - return (*os.File)(e).Stat() -} +func (d *DirFD) Stat() (os.FileInfo, error) { return (*os.File)(d).Stat() } -// Assumes that this entry is referring to a directory. -func (f *DirFDEntry) AsDir() *DirFD { - return (*DirFD)(f) -} +// Delegates to [io.File.Chown]. +func (d *DirFD) Chown(uid, gid int) error { return (*os.File)(d).Chown(uid, gid) } -// Returns the pointer to the underlying [os.File]. -// -// Note that both [os.File.ReadDir] and [os.File.Readdir] will NOT work. -// See [DirFD.Readdirnames] for details. -func (f *DirFDEntry) Unwrap() *os.File { - return (*os.File)(f) -} +// Delegates to [io.File.Chmod]. +func (d *DirFD) Chmod(mode os.FileMode) error { return (*os.File)(d).Chmod(mode) } -// Opens the path with the given name by using the openat syscall. +// Opens the path with the given name. +// The path is opened relative to the receiver, using the openat syscall. +// +// Note that, in contrast to [os.Open] and [os.OpenFile], the returned +// descriptor is not put into non-blocking mode automatically. Callers may +// decide if they want this by setting the [unix.O_NONBLOCK] flag. // // https://www.man7.org/linux/man-pages/man2/open.2.html -func (d *DirFD) Open(name string, flags int, mode os.FileMode) (*DirFDEntry, error) { +func (d *DirFD) Open(name string, flags int, mode os.FileMode) (*PathFD, error) { var opened int err := syscallControl(d, func(fd uintptr) error { - const mask = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky - if mode != (mode & mask) { - return &os.PathError{Op: "openat", Path: name, Err: errors.New("invalid mode bits")} - } - if mode != 0 && flags|os.O_CREATE == 0 { - return &os.PathError{Op: "openat", Path: name, Err: errors.New("mode may only be used when creating")} - } - - sysMode := uint32(mode & os.ModePerm) - if mode&os.ModeSetuid != 0 { - sysMode |= syscall.S_ISUID - } - if mode&os.ModeSetgid != 0 { - sysMode |= syscall.S_ISGID - } - if mode&os.ModeSticky != 0 { - sysMode |= syscall.S_ISVTX + flags, mode, err := sysOpenFlags(flags, mode) + if err != nil { + return pathErr("openat", d, name, err) } - var err error - opened, err = syscall.Openat(int(fd), name, flags|syscall.O_CLOEXEC, sysMode) + opened, err = unix.Openat(int(fd), name, flags, mode) if err != nil { - return &os.PathError{Op: "openat", Path: name, Err: err} + return pathErr("openat", d, name, err) } return nil @@ -149,56 +172,28 @@ func (d *DirFD) Open(name string, flags int, mode os.FileMode) (*DirFDEntry, err return nil, err } - return (*DirFDEntry)(os.NewFile(uintptr(opened), name)), nil -} - -// Remove the name and possibly the file it refers to using the unlinkat syscall. -// -// https://www.man7.org/linux/man-pages/man2/unlink.2.html -func (d *DirFD) Remove(name string) error { - return syscallControl(d, func(fd uintptr) error { - err := unix.Unlinkat(int(fd), name, 0) - if err != nil { - return &os.PathError{Op: "unlinkat", Path: name, Err: err} - } - - return nil - }) -} - -// Remove the directory with the given name using the unlinkat syscall. -// -// https://www.man7.org/linux/man-pages/man2/unlink.2.html -func (d *DirFD) RemoveDir(name string) error { - return syscallControl(d, func(fd uintptr) error { - err := unix.Unlinkat(int(fd), name, unix.AT_REMOVEDIR) - if err != nil { - return &os.PathError{Op: "unlinkat", Path: name, Err: err} - } - - return nil - }) + return (*PathFD)(os.NewFile(uintptr(opened), name)), nil } -// Opens the directory with the giveRn name. +// Opens the directory with the given name. +// The name is opened relative to the receiver, using the openat syscall. func (d *DirFD) OpenDir(name string, flags int) (*DirFD, error) { f, err := d.Open(name, flags|syscall.O_DIRECTORY, 0) - return f.AsDir(), err + return f.UnwrapDir(), err } -// Delegates to [io.File.Stat]. -func (d *DirFD) Stat() (os.FileInfo, error) { - return (*os.File)(d).Stat() -} +// Converts this pointer to a [*PathFD]. +func (d *DirFD) AsPath() *PathFD { return (*PathFD)(d) } -// Stats the path with the given name by using the fstatat syscall. +// Stats the path with the given name. +// The name is interpreted relative to the receiver, using the fstatat syscall. // // https://www.man7.org/linux/man-pages/man2/stat.2.html func (d *DirFD) StatAt(name string, flags int) (*FileInfo, error) { info := FileInfo{Path: name} if err := syscallControl(d, func(fd uintptr) error { if err := unix.Fstatat(int(fd), name, (*unix.Stat_t)(&info.Stat), flags); err != nil { - return &os.PathError{Op: "fstatat", Path: name, Err: err} + return pathErr("fstatat", d, name, err) } return nil @@ -209,23 +204,31 @@ func (d *DirFD) StatAt(name string, flags int) (*FileInfo, error) { return &info, nil } -// Returns information about a mounted filesystem by using the fstatfs syscall. +// Remove the name and possibly the file it refers to. +// The name is removed relative to the receiver, using the unlinkat syscall. // -// https://www.man7.org/linux/man-pages/man2/statfs.2.html -func (d *DirFD) StatFS() (st syscall.Statfs_t, _ error) { - err := syscallControl(d, func(fd uintptr) error { - if err := syscall.Fstatfs(int(fd), &st); err != nil { - return &os.PathError{ - Op: "fstatfs", - Path: (*os.File)(d).Name(), - Err: err, - } +// https://www.man7.org/linux/man-pages/man2/unlink.2.html +func (d *DirFD) Remove(name string) error { + return d.unlink(name, 0) +} + +// Remove the directory with the given name using the unlinkat syscall. +// The name is removed relative to the receiver, using the unlinkat syscall. +// +// https://www.man7.org/linux/man-pages/man2/unlink.2.html +func (d *DirFD) RemoveDir(name string) error { + return d.unlink(name, unix.AT_REMOVEDIR) +} + +func (d *DirFD) unlink(name string, flags int) error { + return syscallControl(d, func(fd uintptr) error { + err := unix.Unlinkat(int(fd), name, flags) + if err != nil { + return pathErr("unlinkat", d, name, err) } return nil }) - - return st, err } // Delegates to [os.File.Readdirnames]. @@ -241,7 +244,7 @@ func (d *DirFD) StatFS() (st syscall.Statfs_t, _ error) { // reimplement substantial OS-dependent parts of the standard library's internal // dir entry handling (which feels like the "nuclear option"). For this reason, // DirFD cannot simply implement [fs.FS], since the stat-like information should -// also be queriable in the [fs.DirEntry] interface. +// also be queryable in the [fs.DirEntry] interface. func (d *DirFD) Readdirnames(n int) ([]string, error) { return (*os.File)(d).Readdirnames(n) } @@ -254,10 +257,13 @@ func (d *DirFD) ReadEntryNames() iter.Seq2[string, error] { // Using n=1 is required in order to be able // to resume iteration after early breaks. names, err := d.Readdirnames(1) - eof := errors.Is(err, io.EOF) - if err != nil && !eof { - yield("", err) - return + var eof bool + if err != nil { + if !errors.Is(err, io.EOF) { + yield("", err) + return + } + eof = true } for _, name := range names { @@ -273,6 +279,19 @@ func (d *DirFD) ReadEntryNames() iter.Seq2[string, error] { } } +// Creates a new directory, just as [os.Mkdir] does. +// The directory is created relative to the receiver, using the mkdirat syscall. +// +// https://www.man7.org/linux/man-pages/man2/mkdir.2.html +func (d *DirFD) Mkdir(name string, mode os.FileMode) error { + return syscallControl(d, func(fd uintptr) error { + if err := unix.Mkdirat(int(fd), name, toSysMode(mode)); err != nil { + return pathErr("mkdirat", d, name, err) + } + return nil + }) +} + // Reads the named file and returns the contents, similar to [os.ReadFile]. func (d *DirFD) ReadFile(name string) ([]byte, error) { // Leverage fs.ReadFile for the heavy lifting. @@ -281,6 +300,17 @@ func (d *DirFD) ReadFile(name string) ([]byte, error) { return fs.ReadFile((*unsafeDIRFDFS)(d), name) } +// Writes data to the named file, just as [os.WriteFile] is doing it. +// The file is opened relative to d, using [DirFD.Open]. +func (d *DirFD) WriteFile(name string, data []byte, perm os.FileMode) error { + f, err := d.Open(name, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC|syscall.O_NONBLOCK, perm) + if err != nil { + return err + } + _, err = f.UnwrapFile().Write(data) + return errors.Join(err, f.Close()) +} + type Stat unix.Stat_t func (s *Stat) ToFileMode() os.FileMode { return toFileMode(s.Mode) } @@ -344,11 +374,37 @@ type unsafeDIRFDFS DirFD // Open implements [fs.FS]. func (u *unsafeDIRFDFS) Open(name string) (fs.File, error) { - f, err := (*DirFD)(u).Open(name, 0, 0) - return f.Unwrap(), err + f, err := (*DirFD)(u).Open(name, syscall.O_NONBLOCK, 0) + return f.UnwrapFile(), err +} + +func sysOpenFlags(flags int, mode os.FileMode) (int, uint32, error) { + const mask = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky + if mode != (mode & mask) { + return 0, 0, errors.New("invalid mode bits") + } + if mode != 0 && flags|os.O_CREATE == 0 { + return 0, 0, errors.New("mode may only be used when creating") + } + + return flags | syscall.O_CLOEXEC, toSysMode(mode), nil +} + +func toSysMode(mode os.FileMode) uint32 { + sysMode := uint32(mode & os.ModePerm) + if mode&os.ModeSetuid != 0 { + sysMode |= syscall.S_ISUID + } + if mode&os.ModeSetgid != 0 { + sysMode |= syscall.S_ISGID + } + if mode&os.ModeSticky != 0 { + sysMode |= syscall.S_ISVTX + } + return sysMode } -func syscallControl(conn syscall.Conn, f func(fd uintptr) error) error { +func syscallControl[C syscall.Conn](conn C, f func(fd uintptr) error) error { rawConn, err := conn.SyscallConn() if err != nil { return err @@ -360,3 +416,10 @@ func syscallControl(conn syscall.Conn, f func(fd uintptr) error) error { } return err } + +func pathErr[P Path](op string, path P, name string, err error) *os.PathError { + if !filepath.IsAbs(name) { + name = filepath.Join(path.Name(), name) + } + return &os.PathError{Op: op, Path: name, Err: err} +} diff --git a/internal/os/unix/dirfd_unix_test.go b/internal/os/unix/dirfd_unix_test.go index 2ffcf329b99c..22834ad36663 100644 --- a/internal/os/unix/dirfd_unix_test.go +++ b/internal/os/unix/dirfd_unix_test.go @@ -1,3 +1,5 @@ +//go:build unix + /* Copyright 2024 k0s authors @@ -36,7 +38,7 @@ import ( func TestDirFD_NotExist(t *testing.T) { path := filepath.Join(t.TempDir(), "foo") - d, err := osunix.OpenDir(path) + d, err := osunix.OpenDir(path, 0) if err == nil { assert.NoError(t, d.Close()) } @@ -46,7 +48,7 @@ func TestDirFD_NotExist(t *testing.T) { func TestDirFD_Empty(t *testing.T) { path := t.TempDir() - d, err := osunix.OpenDir(path) + d, err := osunix.OpenDir(path, 0) require.NoError(t, err) t.Cleanup(func() { assert.NoError(t, d.Close()) }) @@ -76,7 +78,7 @@ func TestDirFD_Filled(t *testing.T) { require.NoError(t, os.Mkdir(filepath.Join(dirPath, "bar"), 0755)) require.NoError(t, os.WriteFile(filepath.Join(dirPath, "bar", "baz"), []byte("ipsum"), 0400)) - d, err := osunix.OpenDir(dirPath) + d, err := osunix.OpenDir(dirPath, 0) require.NoError(t, err) t.Cleanup(func() { assert.NoError(t, d.Close()) }) @@ -134,7 +136,7 @@ func TestDirFD_Entries(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dirPath, expectedNames[i]), nil, 0644)) } - d, err := osunix.OpenDir(dirPath) + d, err := osunix.OpenDir(dirPath, 0) require.NoError(t, err) close := sync.OnceFunc(func() { assert.NoError(t, d.Close()) }) t.Cleanup(close) diff --git a/internal/slices/slices.go b/internal/slices/slices.go index 96f2fa73c1be..b5f1ae833de2 100644 --- a/internal/slices/slices.go +++ b/internal/slices/slices.go @@ -19,8 +19,24 @@ package slices import ( "iter" "math/bits" + + "golang.org/x/exp/constraints" ) +// Yields the elements of s along with their 0-base element index. +// This is a way to make s behave as if it was a slice in range loops. +func Enumerate[I constraints.Integer, E any](s iter.Seq[E]) iter.Seq2[I, E] { + return func(yield func(I, E) bool) { + var i I + for e := range s { + if !yield(i, e) { + return + } + i++ + } + } +} + // Yields all sub-slices of the given slice. In other words, AllSubSlices // computes the mathematical power set of s by yielding all of the power set's // elements. diff --git a/inttest/reset/obstruct-data-dir.sh b/inttest/reset/obstruct-data-dir.sh new file mode 100644 index 000000000000..b2849faaf770 --- /dev/null +++ b/inttest/reset/obstruct-data-dir.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env sh +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2024 k0s authors +#shellcheck shell=ash + +set -eu + +# FIXME check for overmounts? https://github.com/util-linux/util-linux/commit/e2f6c9eaa2deb019ba2a9dad495881b20e797f91 + +make_dir() { mkdir -- "$1" && echo "$1"; } +make_file() { echo "$1" >"$1" && echo "$1"; } + +make_bind_mounts() { + local real="$1" + local target="$2" + + # Directory bind mount + make_dir "$real/real_dir" + make_file "$real/real_dir/real_dir_info.txt" + make_dir "$target/bind_dir" + mount --bind -- "$real/real_dir" "$target/bind_dir" + + # File bind mount + make_file "$real/real_file.txt" + make_file "$target/bind_file.txt" + mount --bind -- "$real/real_file.txt" "$target/bind_file.txt" + + # Recursive directory bind mount + make_dir "$real/real_recursive_dir" + make_file "$real/real_recursive_dir/real_recursive_dir.txt" + make_dir "$real/real_recursive_dir/bind_dir" + mount --bind -- "$real/real_dir" "$real/real_recursive_dir/bind_dir" + make_file "$real/real_recursive_dir/bind_file.txt" + mount --bind -- "$real/real_file.txt" "$real/real_recursive_dir/bind_file.txt" + make_dir "$target/rbind_dir" + mount --rbind -- "$real/real_recursive_dir" "$target/rbind_dir" +} + +create() { + local dataDir="$1" + local scratchDir="$2" + + local dir="$dataDir"/obstructed + make_dir "$dir" + + # Directories and files with restricted permissions + make_dir "$dir/restricted_dir" + make_file "$dir/restricted_dir/no_read_file.txt" + chmod 000 -- "$dir/restricted_dir/no_read_file.txt" # No permissions on the file + make_dir "$dir/restricted_dir/no_exec_dir" + chmod 000 -- "$dir/restricted_dir/no_exec_dir" # No permissions on the directory + make_dir "$dir/restricted_dir/no_exec_nonempty_dir" + make_file "$dir/restricted_dir/no_exec_nonempty_dir/.hidden_file" + chmod 000 -- "$dir/restricted_dir/no_exec_nonempty_dir" # No permissions on the directory + + # Symlinks pointing outside the directory tree + make_dir "$scratchDir/some_dir" + make_file "$scratchDir/some_dir/real_file.txt" + ln -s -- "$scratchDir/some_dir/real_file.txt" "$dir/symlink_to_file" # Symlink to a file + ln -s -- "$scratchDir/some_dir" "$dir/symlink_to_dir" # Symlink to a directory + + # Bind mounts pointing outside the directory tree + make_bind_mounts "$scratchDir" "$dir" + + # Bind mounts outside the directory tree pointing into it + # make_bind_mounts "$dir" "$scratchDir" +} + +#shellcheck disable=2319 +verify() { + local scratchDir="$2" + local errors=0 + + [ -f "$scratchDir/real_dir/real_file.txt" ] || { + echo Not a file "($?)": "$scratchDir/real_dir/real_file.txt" + errors=1 + } + [ -d "$scratchDir/real_dir" ] || { + echo Not a directory "($?)": "$scratchDir/real_dir" + errors=1 + } + + [ -f "$scratchDir/bound_dir/file_in_bound_dir.txt" ] || { + echo Not a file "($?)": "$scratchDir/bound_dir/file_in_bound_dir.txt" + errors=1 + } + [ -d "$scratchDir/bound_dir" ] || { + echo Not a directory "($?)": "$scratchDir/bound_dir" + errors=1 + } + [ -f "$scratchDir/bound_file.txt" ] || { + echo Not a file "($?)": "$scratchDir/bound_file.txt" + errors=1 + } + + return $errors +} + +case "$1" in +create) + shift + create "$@" + ;; + +verify) + shift + verify "$@" + ;; + +*) + echo Dunno about "$@" >&2 + exit 1 + ;; +esac diff --git a/inttest/reset/reset_test.go b/inttest/reset/reset_test.go index e34fcef8d989..cadad4521875 100644 --- a/inttest/reset/reset_test.go +++ b/inttest/reset/reset_test.go @@ -17,6 +17,12 @@ limitations under the License. package reset import ( + "bytes" + _ "embed" + "fmt" + "io" + "slices" + "strings" "testing" testifysuite "github.com/stretchr/testify/suite" @@ -28,11 +34,17 @@ type suite struct { common.BootlooseSuite } +//go:embed obstruct-data-dir.sh +var obstructScript []byte + func (s *suite) TestReset() { ctx := s.Context() workerNode := s.WorkerNode(0) - if ok := s.Run("k0s gets up", func() { + if !s.Run("k0s gets up", func() { + if true { + return + } s.Require().NoError(s.InitController(0, "--disable-components=konnectivity-server,metrics-server")) s.Require().NoError(s.RunWorkers()) @@ -44,11 +56,7 @@ func (s *suite) TestReset() { s.T().Log("waiting to see CNI pods ready") s.NoError(common.WaitForKubeRouterReady(ctx, kc), "CNI did not start") - }); !ok { - return - } - s.Run("k0s reset", func() { ssh, err := s.SSH(ctx, workerNode) s.Require().NoError(err) defer ssh.Disconnect() @@ -57,14 +65,52 @@ func (s *suite) TestReset() { s.NoError(ssh.Exec(ctx, "test -d /run/k0s", common.SSHStreams{}), "/run/k0s is not a directory") s.NoError(ssh.Exec(ctx, "pidof containerd-shim-runc-v2 >&2", common.SSHStreams{}), "Expected some running containerd shims") + }) { + return + } - s.NoError(s.StopWorker(workerNode), "Failed to stop k0s") + var scratchDir bytes.Buffer + var createdPaths bytes.Buffer + if !s.Run("prepare k0s reset", func() { + // s.NoError(s.StopWorker(workerNode), "Failed to stop k0s") + + ssh, err := s.SSH(ctx, workerNode) + s.Require().NoError(err) + defer ssh.Disconnect() + + err = ssh.Exec(ctx, "mktemp -t -d k0s_reset_inttest.XXXXXX", common.SSHStreams{Out: &scratchDir}) + s.Require().NoError(err) + scratchDir.Truncate(slices.Index(scratchDir.Bytes(), '\n')) + + streams, flushStreams := common.TestLogStreams(s.T(), "obstruct data dir") + streams.In = bytes.NewReader(obstructScript) + streams.Out = io.MultiWriter(&createdPaths, streams.Out) + err = ssh.Exec(ctx, fmt.Sprintf("sh -s -- create /var/lib/k0s %q", &scratchDir), streams) + flushStreams() + s.Require().NoError(err) + }) { + return + } + + s.Run("k0s reset", func() { + ssh, err := s.SSH(ctx, workerNode) + s.Require().NoError(err) + defer ssh.Disconnect() streams, flushStreams := common.TestLogStreams(s.T(), "reset") err = ssh.Exec(ctx, "k0s reset --debug", streams) flushStreams() s.NoError(err, "k0s reset didn't exit cleanly") + scratchDir := scratchDir.String() + for _, path := range strings.Split(string(bytes.TrimSpace(createdPaths.Bytes())), "\n") { + if strings.HasPrefix(path, scratchDir) { + s.NoError(ssh.Exec(ctx, fmt.Sprintf("test -e %q", path), common.SSHStreams{}), "Failed to verify existence of %s", path) + } else { + s.NoError(ssh.Exec(ctx, fmt.Sprintf("! test -e %q", path), common.SSHStreams{}), "Failed to verify non-existence of %s", path) + } + } + // /var/lib/k0s is a mount point in the Docker container and can't be deleted, so it must be empty s.NoError(ssh.Exec(ctx, `x="$(ls -A /var/lib/k0s)" && echo "$x" >&2 && [ -z "$x" ]`, common.SSHStreams{}), "/var/lib/k0s is not empty") s.NoError(ssh.Exec(ctx, "! test -e /run/k0s", common.SSHStreams{}), "/run/k0s still exists") diff --git a/pkg/cleanup/directories.go b/pkg/cleanup/directories.go index 540a9f928a95..654060df10be 100644 --- a/pkg/cleanup/directories.go +++ b/pkg/cleanup/directories.go @@ -35,8 +35,19 @@ func (d *directories) Name() string { return "remove directories step" } -// Run removes all kubelet mounts and deletes generated dataDir and runDir func (d *directories) Run() error { + var errs []error + if err := cleanupSameMount(logrus.StandardLogger(), d.Config.dataDir); err != nil { + errs = append(errs, fmt.Errorf("failed to delete data directory: %w", err)) + } + if err := cleanupSameMount(logrus.StandardLogger(), d.Config.runDir); err != nil { + errs = append(errs, fmt.Errorf("failed to delete run directory: %w", err)) + } + return errors.Join(errs...) +} + +// Run removes all kubelet mounts and deletes generated dataDir and runDir +func (d *directories) RunOrig() error { // unmount any leftover overlays (such as in alpine) mounter := mount.New("") procMounts, err := mounter.List() diff --git a/pkg/cleanup/directories_linux.go b/pkg/cleanup/directories_linux.go index a4df374f9983..25c42312abfb 100644 --- a/pkg/cleanup/directories_linux.go +++ b/pkg/cleanup/directories_linux.go @@ -17,6 +17,8 @@ limitations under the License. package cleanup import ( + "bufio" + "bytes" "errors" "fmt" "os" @@ -29,41 +31,49 @@ import ( "golang.org/x/sys/unix" ) -type sameDeviceCleaner struct { - log logrus.FieldLogger -} +const ( + cleanupOFlags = unix.O_NOFOLLOW + cleanupAtFlags = unix.AT_NO_AUTOMOUNT | unix.AT_SYMLINK_NOFOLLOW + cleanupResolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS +) -func cleanSameDevice(log logrus.FieldLogger, dataDirPath string) (err error) { - dataDir, err := osunix.OpenDir(dataDirPath) +func cleanupSameMount(log logrus.FieldLogger, dirPath string) (err error) { + // The real path is required as the code may be checking the mount info via + // the proc filesystem. + realDirPath, err := filepath.EvalSymlinks(dirPath) if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } return err } - defer func() { - err = errors.Join(err, dataDir.Close()) - }() - cleaner := sameDeviceCleaner{log: log} + dir, err := osunix.OpenDir(realDirPath, cleanupOFlags) + if err != nil { + return err + } + defer func() { err = errors.Join(err, dir.Close()) }() - empty, err := cleaner.cleanEntries(dataDir, dataDirPath, true) + empty, err := cleanupPathNames(log, dir, realDirPath, true) if err != nil { return err } if empty { - if err := os.Remove(dataDirPath); err != nil && !errors.Is(err, os.ErrNotExist) { - log.WithError(err).Warn("Leaving behind ", dataDirPath) + if err := os.Remove(realDirPath); err != nil && !errors.Is(err, os.ErrNotExist) { + log.WithError(err).Warn("Leaving behind ", realDirPath) } } return nil } -func (c *sameDeviceCleaner) cleanEntries(dir *osunix.DirFD, dirPath string, unlink bool) (bool, error) { +func cleanupPathNames(log logrus.FieldLogger, dir *osunix.DirFD, dirPath string, unlink bool) (bool, error) { var leftovers bool for name, err := range dir.ReadEntryNames() { if err != nil { return false, fmt.Errorf("failed to enumerate directory entries: %w", err) } - if !c.cleanEntryLoop(dir, dirPath, name, unlink) { + if !cleanupPathNameLoop(log, dir, dirPath, name, unlink) { leftovers = true } } @@ -71,57 +81,82 @@ func (c *sameDeviceCleaner) cleanEntries(dir *osunix.DirFD, dirPath string, unli return !leftovers, nil } -func (c *sameDeviceCleaner) cleanEntryLoop(dir *osunix.DirFD, dirPath, name string, unlink bool) bool { +type cleanupOutcome uint8 + +const ( + cleanIgnored cleanupOutcome = iota + 1 + cleanRetry + cleanUnlinked +) + +func cleanupPathNameLoop(log logrus.FieldLogger, dir *osunix.DirFD, dirPath, name string, unlink bool) bool { for attempt := 1; ; attempt++ { - outcome, err := c.cleanEntry(dir, dirPath, name, unlink) + outcome, err := cleanupPathName(log, dir, dirPath, name, unlink) if err == nil { switch outcome { case cleanUnlinked: return true case cleanIgnored: return false - case cleanUnmounted: - if attempt < 10 { + case cleanRetry: + if attempt < 256 { continue } err = errors.New("too many attempts") default: - err = fmt.Errorf("unexpected outcome (%d)", outcome) + log.WithError(err).Errorf("Unexpected outcome while cleaning up %s/%s: %d", dirPath, name, outcome) + return false } } if errors.Is(err, os.ErrNotExist) { return true } + if errors.Is(err, syscall.EINTR) { + continue + } - c.log.WithError(err).Warnf("Leaving behind %s/%s", dirPath, name) + log.WithError(err).Warnf("Leaving behind %s/%s", dirPath, name) return false } } -func (c *sameDeviceCleaner) cleanEntry(dir *osunix.DirFD, dirPath, name string, unlink bool) (_ cleanOutcome, err error) { - c.log.Debugf("cleanEntry: %s/%s %t", dirPath, name, unlink) +func cleanupPathName(log logrus.FieldLogger, dir *osunix.DirFD, dirPath, name string, unlink bool) (_ cleanupOutcome, err error) { + log.Debugf("cleanEntry: %s/%s %t", dirPath, name, unlink) if unlink { - outcome, err := c.unlinkEntry(dir, dirPath, name) - if err != nil || outcome != cleanRecurse { - return outcome, err + log.Debugf("Trying to unlink %s/%s", dirPath, name) + if outcome, err := unlinkPathName(log, dir, dirPath, name); err != nil { + return 0, err + } else if outcome == unlinkUnlinked { + return cleanUnlinked, nil + } else if outcome == unlinkUnmounted { + // Path has been unmounted. Retry to catch overmounts. + return cleanRetry, nil } } // Try to recurse into the directory. - subDir, isMountPoint, err := c.openDirMountAware(dir, dirPath, name) + log.Debugf("Trying to to open %s/%s", dirPath, name) + subDir, isMountPoint, err := openDirName(dir, dirPath, name) if err != nil { + // When not unlinking and this is not a directory, it might be a mounted + // file. Try to unmount. if !unlink && errors.Is(err, unix.ENOTDIR) { - ok, err := c.maybeUnmountEntry(dir, dirPath, name) - if err != nil { + if status, err := getPathNameMountStatus(dir, dirPath, name); err != nil { return 0, err + } else if status != pathMountStatusRegular { + err := unmount(log, filepath.Join(dirPath, name)) + if err == nil { + // Path has been unmounted. Retry to catch overmounts. + return cleanRetry, nil + } + if errors.Is(err, unix.EINVAL) { + // Not a mount point (or the mount point is locked). + return cleanIgnored, nil + } } - if ok { - return cleanUnmounted, nil - } else { - return cleanIgnored, nil - } + return 0, err } return 0, err @@ -141,7 +176,7 @@ func (c *sameDeviceCleaner) cleanEntry(dir *osunix.DirFD, dirPath, name string, var empty bool subDirPath := filepath.Join(dirPath, name) - empty, err = c.cleanEntries(subDir, subDirPath, unlink) + empty, err = cleanupPathNames(log, subDir, subDirPath, unlink) if err != nil { return 0, err } @@ -152,10 +187,10 @@ func (c *sameDeviceCleaner) cleanEntry(dir *osunix.DirFD, dirPath, name string, } if isMountPoint { - if err := c.unmountEntry(subDirPath); err != nil { + if err := unmount(log, subDirPath); err != nil { return 0, err } - return cleanUnmounted, nil + return cleanRetry, nil } if unlink && empty { @@ -169,68 +204,21 @@ func (c *sameDeviceCleaner) cleanEntry(dir *osunix.DirFD, dirPath, name string, return cleanIgnored, nil } -func (c *sameDeviceCleaner) maybeUnmountEntry(dir *osunix.DirFD, dirPath, name string) (bool, error) { - if ok, err := isPathMountPoint(dir, dirPath, name); err != nil { - return false, err - } else if !ok { - return false, nil - } - - if err := c.unmountEntry(filepath.Join(dirPath, name)); err != nil { - return false, err - } - - return true, nil -} - -func (c *sameDeviceCleaner) unmountEntry(path string) error { - c.log.Info("Attempting to unmount ", path) - if err := unix.Unmount(path, unix.UMOUNT_NOFOLLOW); err != nil { - return &os.PathError{Op: "unmount", Path: path, Err: err} - } - - return nil -} - -func isPathMountPoint(dir *osunix.DirFD, dirPath, name string) (_ bool, err error) { - if entry, err := dir.Open2(name, unix.OpenHow{ - Flags: cleanOFlags | unix.O_PATH, - Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS | unix.RESOLVE_NO_XDEV, - }); err == nil { - return false, entry.Close() - } else if errors.Is(err, unix.EXDEV) { - return true, nil - } else if !errors.Is(err, errors.ErrUnsupported) { - return false, err - } - - entry, err := dir.Open(name, cleanOFlags|unix.O_PATH, 0) - if err != nil { - return false, err - } - - defer func() { err = errors.Join(err, entry.Close()) }() - return canExpireMountPoint(entry, filepath.Join(dirPath, name)) -} - -type cleanOutcome uint8 +type unlinkOutcome uint8 const ( - cleanIgnored cleanOutcome = iota + 1 - cleanRecurse - cleanUnmounted - cleanUnlinked + unlinkUnlinked unlinkOutcome = iota + 1 + unlinkRecurse + unlinkUnmounted ) -func (c *sameDeviceCleaner) unlinkEntry(dir *osunix.DirFD, dirPath, name string) (cleanOutcome, error) { - c.log.Debugf("unlinkEntry: %s/%s", dirPath, name) - +func unlinkPathName(log logrus.FieldLogger, dir *osunix.DirFD, dirPath, name string) (unlinkOutcome, error) { // First try to simply unlink the name. // The assumption here is that mount points cannot be simply unlinked. fileErr := dir.Remove(name) if fileErr == nil || errors.Is(fileErr, os.ErrNotExist) { // That worked. Mission accomplished. - return cleanUnlinked, nil + return unlinkUnlinked, nil } // Try to remove an empty directory. @@ -238,66 +226,49 @@ func (c *sameDeviceCleaner) unlinkEntry(dir *osunix.DirFD, dirPath, name string) switch { case dirErr == nil: // That worked. Mission accomplished. - return cleanUnlinked, nil + return unlinkUnlinked, nil case errors.Is(dirErr, os.ErrExist): // It's a non-empty directory. - return cleanRecurse, nil + return unlinkRecurse, nil case errors.Is(dirErr, unix.ENOTDIR): - // It's not a directory. Try to unmount. - ok, err := c.maybeUnmountEntry(dir, dirPath, name) - if err != nil { - return 0, err - } - if ok { - return cleanUnmounted, nil + // It's not a directory. If it's a mount point, try to unmount it. + if status, err := getPathNameMountStatus(dir, dirPath, name); err != nil { + return 0, errors.Join(fileErr, err) + } else if status != pathMountStatusRegular { + return unlinkUnmounted, unmount(log, filepath.Join(dirPath, name)) } return 0, fileErr default: // Try to clean up recursively for all other errors. - return cleanRecurse, nil - } -} - -const ( - cleanOFlags = unix.O_NOFOLLOW - cleanAtFlags = unix.AT_NO_AUTOMOUNT | unix.AT_SYMLINK_NOFOLLOW -) - -func (c *sameDeviceCleaner) openDirMountAware(dir *osunix.DirFD, dirPath, name string) (*osunix.DirFD, bool, error) { - subDir, isMountPoint, err := c.openDirMountAwareOpenAt2(dir, name) - if err == nil || !errors.Is(err, errors.ErrUnsupported) { - return subDir, isMountPoint, err + return unlinkRecurse, nil } - return c.openDirMountAwareLegacy(dir, dirPath, name) } -func (c *sameDeviceCleaner) openDirMountAwareOpenAt2(dir *osunix.DirFD, name string) (*osunix.DirFD, bool, error) { - const cleanResolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS - +func openDirName(dir *osunix.DirFD, dirPath, name string) (_ *osunix.DirFD, isMountPoint bool, _ error) { // Try to use openat2 to open it in a way that won't cross mounts. subDir, err := dir.OpenDir2(name, unix.OpenHow{ - Flags: cleanOFlags, - Resolve: cleanResolveFlags | unix.RESOLVE_NO_XDEV, + Flags: cleanupOFlags, + Resolve: cleanupResolveFlags | unix.RESOLVE_NO_XDEV, }) // Did we cross a mount point? - var isMountPoint bool if errors.Is(err, unix.EXDEV) { isMountPoint = true subDir, err = dir.OpenDir2(name, unix.OpenHow{ - Flags: cleanOFlags, - Resolve: cleanResolveFlags, + Flags: cleanupOFlags, + Resolve: cleanupResolveFlags, }) } - return subDir, isMountPoint, err -} + if err == nil || !errors.Is(err, errors.ErrUnsupported) { + return subDir, isMountPoint, err + } -func (c *sameDeviceCleaner) openDirMountAwareLegacy(dir *osunix.DirFD, dirPath, name string) (_ *osunix.DirFD, _ bool, err error) { - subDir, err := dir.OpenDir(name, cleanOFlags) + // Fallback to legacy open. + subDir, err = dir.OpenDir(name, cleanupOFlags) if err != nil { return nil, false, err } @@ -309,60 +280,185 @@ func (c *sameDeviceCleaner) openDirMountAwareLegacy(dir *osunix.DirFD, dirPath, } }() + subDirPath := filepath.Join(dirPath, name) + status, err := getPathMountStatus(dir, subDir.AsPath(), subDirPath) + if err != nil { + return nil, false, err + } + if status == pathMountStatusMountPoint { + isMountPoint = true + } else if status == pathMountStatusUnknown { + // There's still no bullet-proof evidence to rule out that path is + // actually a mount point. As a last resort, have a look at the proc fs. + isMountPoint, err = mountInfoListsMountPoint("/proc/self/mountinfo", subDirPath) + if err != nil { + // The proc filesystem check failed, too. No other checks are left. + // Assume that it's not a mount point. + isMountPoint = false + } + } + + close = false + return subDir, isMountPoint, nil +} + +type pathMountStatus uint8 + +const ( + pathMountStatusUnknown pathMountStatus = iota + pathMountStatusRegular + pathMountStatusMountPoint +) + +func getPathNameMountStatus(dir *osunix.DirFD, dirPath, name string) (pathMountStatus, error) { + if path, err := dir.Open2(name, unix.OpenHow{ + Flags: cleanupOFlags | unix.O_PATH, + Resolve: cleanupResolveFlags | unix.RESOLVE_NO_XDEV, + }); err == nil { + return pathMountStatusRegular, path.Close() + } else if errors.Is(err, unix.EXDEV) { + return pathMountStatusMountPoint, nil + } else if !errors.Is(err, errors.ErrUnsupported) { + return 0, err + } + + path, err := dir.Open(name, cleanupOFlags|unix.O_PATH, 0) + if err != nil { + return 0, err + } + + defer func() { err = errors.Join(err, path.Close()) }() + return getPathMountStatus(dir, path, filepath.Join(dirPath, name)) +} + +func getPathMountStatus(dir *osunix.DirFD, fd osunix.LinuxPath, path string) (pathMountStatus, error) { // Don't bother to try statx() here. The interesting fields (stx_mnt_id) and // attributes (STATX_ATTR_MOUNT_ROOT) have been introduced in Linux 5.8, // whereas openat2() is a thing since Linux 5.6. So its highly unlikely that // those will be available when openat2() isn't. - // Instead, try to expire the mount point. - if ok, err := canExpireMountPoint(subDir, filepath.Join(dirPath, name)); err != nil { - return nil, false, err - } else { - close = false - return subDir, ok, nil + // Check if the paths have different device numbers. + if dirStat, err := dir.StatSelf(); err != nil { + return 0, err + } else if pathStat, err := fd.StatSelf(); err != nil { + return 0, err + } else if dirStat.Dev != pathStat.Dev { + return pathMountStatusMountPoint, nil } -} -func canExpireMountPoint( - fd interface { - Stat() (os.FileInfo, error) - SyscallConn() (syscall.RawConn, error) // Just to prove there's a real file descriptor - }, - path string, -) (bool, error) { + // Try to expire the mount point. err := unix.Unmount(path, unix.MNT_EXPIRE|unix.UMOUNT_NOFOLLOW) switch { case errors.Is(err, unix.EINVAL): - // This is the error we expect when path is a mount point. We have an - // open file descriptor for it, so it should be in use. - return false, nil + // This is the expected error when path is not a mount point. Note that + // there's still the chance that path is referring to a locked mount + // point, i.e. a mount point that is part of a more privileged mount + // namespace than k0s is in. That's not easy to rule out ... + // See https://www.man7.org/linux/man-pages/man2/umount.2.html#ERRORS. + // See https://man7.org/linux/man-pages/man7/mount_namespaces.7.html. + return pathMountStatusUnknown, nil case errors.Is(err, unix.EBUSY): - // This is the error we expect if path is a mount point. We have an open - // file descriptor to it, so it should be in use. - return true, nil + // This is the expected error when path is a mount point. It indicates + // that the resource is in use, which is guaranteed because there's an + // open file descriptor for it. + return pathMountStatusMountPoint, nil case errors.Is(err, unix.EAGAIN): - // This is the error we expect when path is an unused mount point. This - // shouldn't happen because we still have an open file descriptor to it. - // However, this error indicates that path is indeed a mount point. - return true, nil + // This is the expected error when path is an unused mount point. This + // shouldn't happen, since there's still an open file descriptor to path. + return 0, &os.PathError{ + Op: "unmount", + Path: path, + Err: fmt.Errorf("supposedly unreachable code path: %w", err), + } - case err == nil: - // This means that we've unmounted path as an already expired mount - // point. This shouldn't be possible, since we're actually holding a - // file descriptor to the mount point. + case errors.Is(err, unix.EPERM): + // This is the expected error if k0s doesn't have the privileges to + // unmount path. Since this code should be run with root privileges, + // this is not expected to happen. Anyhow, don't bail out. + return pathMountStatusUnknown, nil - // Anyhow, we're checking if name still exists. - if _, statErr := fd.Stat(); statErr != nil { - return false, statErr + case err == nil: + // This means that the path was unmounted, as it has already been + // expired before. This shouldn't happen, since there's still an open + // file descriptor to path. + return 0, &os.PathError{ + Op: "unmount", + Path: path, + Err: errors.New("supposedly unreachable code path: success"), } - // It still exists. Just return it as a mount point ¯\_(ツ)_/¯ - return true, nil - default: // Pass on all other errors. - return false, &os.PathError{Op: "unmount", Path: path, Err: err} + return 0, &os.PathError{Op: "unmount", Path: path, Err: err} } } + +// Checks whether path is listed as a mount point in the proc filesystems +// mountinfo file. +// +// https://man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html +func mountInfoListsMountPoint(mountInfoPath, path string) (bool, error) { + mountInfoBytes, err := os.ReadFile(mountInfoPath) + if err != nil { + return false, err + } + + mountInfoScanner := bufio.NewScanner(bytes.NewReader(mountInfoBytes)) + for mountInfoScanner.Scan() { + // The fifth field is the mount point. + fields := bytes.SplitN(mountInfoScanner.Bytes(), []byte{' '}, 6) + // Some characters are octal-escaped, most notably the space character. + if len(fields) > 5 && equalsOctalsUnsecaped(fields[4], path) { + return true, nil + } + } + + return false, mountInfoScanner.Err() +} + +// Compares if data and str are equal, converting any octal escape sequences of +// the form \NNN in data to their respective ASCII character on the fly. +func equalsOctalsUnsecaped(data []byte, str string) bool { + dlen, slen := len(data), len(str) + + // An escape sequence takes 4 bytes. + // The unescaped length of data is in range [dlen/4, dlen]. + if slen < dlen/4 || slen > dlen { + return false // Lengths don't match, data and str cannot be equal. + } + + doff := 0 + for soff := 0; soff < slen; soff, doff = soff+1, doff+1 { + if doff >= dlen { + return false // str is longer than unescaped data + } + ch := data[doff] + if ch == '\\' && doff < dlen-3 { // The next three bytes should be octal digits. + d1, d2, d3 := data[doff+1]-'0', data[doff+2]-'0', data[doff+3]-'0' + // The ASCII character range is [0, 127] decimal, which corresponds + // to [0, 177] octal. Check if the digits are in range. + if d1 <= 1 && d2 <= 7 && d3 <= 7 { + ch = d1<<6 | d2<<3 | d3 // Convert from octal digits (3 bits per digit). + doff += 3 // Skip the three digits in the next iteration. + } + } + + if str[soff] != ch { + return false + } + } + + return doff == dlen // Both are equal if data has been fully read. +} + +func unmount(log logrus.FieldLogger, path string) error { + log.Debug("Attempting to unmount ", path) + if err := unix.Unmount(path, unix.UMOUNT_NOFOLLOW); err != nil { + return &os.PathError{Op: "unmount", Path: path, Err: err} + } + + log.Debug("Unmounted ", path) + return nil +} diff --git a/pkg/cleanup/directories_linux_test.go b/pkg/cleanup/directories_linux_test.go new file mode 100644 index 000000000000..d841928011b9 --- /dev/null +++ b/pkg/cleanup/directories_linux_test.go @@ -0,0 +1,297 @@ +/* +Copyright 2024 k0s authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cleanup + +import ( + "cmp" + "iter" + "os" + "path/filepath" + "slices" + "strconv" + "syscall" + "testing" + + "github.com/k0sproject/k0s/internal/os/unix" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetPathMountStatus(t *testing.T) { + parent, err := unix.OpenDir(t.TempDir(), 0) + require.NoError(t, err) + t.Cleanup(func() { assert.NoError(t, parent.Close()) }) + + t.Run("nonexistent", func(t *testing.T) { + file, err := parent.Open("file", syscall.O_CREAT, 0644) + require.NoError(t, err) + t.Cleanup(func() { assert.NoError(t, file.Close()) }) + + bogusPath := filepath.Join(parent.Name(), "this is the wrong path") + status, err := getPathMountStatus(parent, file, bogusPath) + if pathErr := (*os.PathError)(nil); assert.ErrorAs(t, err, &pathErr) { + assert.Equal(t, "unmount", pathErr.Op) + assert.Equal(t, bogusPath, pathErr.Path) + assert.Equal(t, syscall.ENOENT, pathErr.Err) + } + assert.Zero(t, status) + }) + + t.Run("file", func(t *testing.T) { + file, err := parent.Open("file", syscall.O_CREAT, 0644) + require.NoError(t, err) + t.Cleanup(func() { assert.NoError(t, file.Close()) }) + + status, err := getPathMountStatus(parent, file, filepath.Join(parent.Name(), file.Name())) + if assert.NoError(t, err) { + assert.Equal(t, pathMountStatusUnknown, status) + } + }) + + t.Run("dir", func(t *testing.T) { + require.NoError(t, parent.Mkdir("dir", 0755)) + dir, err := parent.OpenDir("dir", 0) + require.NoError(t, err) + t.Cleanup(func() { assert.NoError(t, dir.Close()) }) + + status, err := getPathMountStatus(parent, dir.AsPath(), filepath.Join(parent.Name(), dir.Name())) + if assert.NoError(t, err) { + assert.Equal(t, pathMountStatusUnknown, status) + } + }) +} + +func TestMountInfoListsMountPoint(t *testing.T) { + for _, path := range []string{ + `/`, + `/dev`, + `/sys/fs/bpf`, + `/mnt/path with spaces`, + `/mnt/path\with\backslashes`, + } { + ok, err := mountInfoListsMountPoint("testdata/mountinfo", path) + if assert.NoError(t, err, "For %s", path) { + assert.True(t, ok, "For %s", path) + } + } + + for _, path := range []string{ + ``, + `/de`, + `/dev/`, + `/mnt/path with space`, + `/mnt/path with spaces/`, + `/mnt/path\040with\040spaces`, + `/mnt/path\with\backslash`, + `/mnt/path\with\backslashes/`, + } { + ok, err := mountInfoListsMountPoint("testdata/mountinfo", path) + if assert.NoError(t, err, "For %s", path) { + assert.False(t, ok, "For %s", path) + } + } +} + +func TestCleanDir_NonExistent(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + dir := t.TempDir() + + err := cleanupSameMount(log, filepath.Join(dir, "non-existent")) + assert.Equal(t, err, &os.PathError{ + Op: "open", + Path: filepath.Join(dir, "non-existent"), + Err: syscall.ENOENT, + }) + assert.DirExists(t, dir) +} + +func TestCleanDir_Yolo(t *testing.T) { + unrelatedPath := filepath.Join(t.TempDir(), "foo") + require.NoError(t, os.WriteFile(unrelatedPath, nil, 0644)) + + perms := []os.FileMode{0700, 0600, 0500, 0400, 0000} + var files, dirs Entries + for _, perm := range perms { + permStr := strconv.FormatUint(uint64(perm), 8) + name := "file-" + permStr + files = append(files, &File{Name: name, Perm: perm}) + files = append(files, &Symlink{Name: "symlink-" + permStr, Target: name}) + } + files = append(files, &Symlink{Name: "unrelated-symlink", Target: unrelatedPath}) + + for _, perm := range perms { + permStr := strconv.FormatUint(uint64(perm), 8) + dirs = append(dirs, &Dir{Name: "empty-dir-" + permStr, Perm: perm}) + dirs = append(dirs, &Dir{Name: "dir-" + permStr, Perm: perm, Entries: files}) + } + + dir := t.TempDir() + require.NoError(t, Entries{files, dirs}.Make(dir)) + + log := logrus.New() + // log.SetLevel(logrus.DebugLevel) + + err := cleanupSameMount(log, dir) + assert.NoError(t, err) + assert.NoDirExists(t, dir) +} + +/* +func TestCleanDir_Success(t *testing.T) { + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + type ( + E = Entry + D = Dir + F = File + S = File + ) + + for _, test := range struct { + name string + entries []E + }{ + {"empty", []E{}}, + {"single-file", []E{&F{Name: "file"}}}, + {"single-ro-file", []E{&F{Name: "ro-file", Perm: 0444}}}, + {"single-empty-dir", []E{&D{Name: "dir"}}}, + {"single-empty-ro-dir", []E{&D{Name: "dir", Perm: 0444}}}, + {"dir-file", []E{&D{Name: "dir", Entries: []E{}}}}, + } { + t.Run(t.Name(), func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, (&Dir{ + Name: "under-test", + Entries: test.entries, + }).Make(dir)) + }) + } +} + +func TestCleanDir_File(t *testing.T) { + + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + dir := t.TempDir() + require.NoError(t, (&Dir{ + Name: "under-test", + Entries: []Entry{ + &File{Name: "file"}, + }, + }).Make(dir)) + + err := cleanSameDevice(log, filepath.Join(dir, "under-test")) + assert.NoError(t, err) + assert.NoDirExists(t, filepath.Join(dir, "under-test")) +} + +/* + +func TestCleanDir(t *testing.T) { + tmp := t.TempDir() + unrelated := t.TempDir() + + fakeDataDir := Dir{ + Name: "dataDir", + Children: []Entry{ + &File{Name: "file"}, + &Dir{Name: "dir", + Children: []Entry{ + &Symlink{}, + }, + }, + }, + } + + require.NoError(t, os.Mkdir(filepath.Join(tmp, "emptydir"), 0755)) + + require.NoError(t, os.Mkdir(filepath.Join(tmp, "dir"), 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(tmp, "foo", "bar"), nil, 0644)) + require.NoError(t, os.WriteFile(filepath.Join(tmp, "baz"), nil, 0644)) + + log := logrus.New() + log.SetLevel(logrus.DebugLevel) + + err := cleanSameDevice(log, tmp) + require.NoError(t, err) + assert.NoDirExists(t, tmp) +} +*/ + +type Entry interface { + Make(path string) error +} + +type Entries []Entry + +func (e Entries) Make(path string) error { + return EntrySeq(slices.Values(e)).Make(path) +} + +type EntrySeq iter.Seq[Entry] + +func (e EntrySeq) Make(path string) error { + for entry := range e { + if err := entry.Make(path); err != nil { + return err + } + } + return nil +} + +type Dir struct { + Name string + Perm os.FileMode + Entries Entry +} + +func (d *Dir) Make(parentPath string) error { + path := filepath.Join(parentPath, d.Name) + if err := os.Mkdir(path, 0755); err != nil { + return err + } + if err := cmp.Or[Entry](d.Entries, Entries(nil)).Make(path); err != nil { + return err + } + if d.Perm != 0 { + return os.Chmod(path, d.Perm) + } + return nil +} + +type File struct { + Name string + Perm os.FileMode +} + +func (f *File) Make(parentPath string) error { + path := filepath.Join(parentPath, f.Name) + return os.WriteFile(path, nil, cmp.Or(f.Perm, 0644)) +} + +type Symlink struct { + Name string + Target string +} + +func (s *Symlink) Make(parentPath string) error { + return os.Symlink(s.Target, filepath.Join(parentPath, s.Name)) +} diff --git a/pkg/cleanup/testdata/mountinfo b/pkg/cleanup/testdata/mountinfo new file mode 100644 index 000000000000..d432a0faf592 --- /dev/null +++ b/pkg/cleanup/testdata/mountinfo @@ -0,0 +1,35 @@ +22 29 0:5 / /dev rw,nosuid shared:14 - devtmpfs devtmpfs rw,size=101288k,nr_inodes=250823,mode=755 +23 22 0:21 / /dev/pts rw,nosuid,noexec,relatime shared:15 - devpts devpts rw,gid=3,mode=620,ptmxmode=666 +24 22 0:22 / /dev/shm rw,nosuid,nodev shared:16 - tmpfs tmpfs rw +25 29 0:23 / /proc rw,nosuid,nodev,noexec,relatime shared:8 - proc proc rw +26 29 0:24 / /run rw,nosuid,nodev shared:17 - tmpfs tmpfs rw,size=506436k,mode=755 +27 26 0:25 / /run/keys rw,nosuid,nodev,relatime shared:18 - ramfs none rw,mode=750 +28 29 0:26 / /sys rw,nosuid,nodev,noexec,relatime shared:9 - sysfs sysfs rw +29 1 253:0 / / rw,relatime shared:1 - ext4 /dev/disk/by-label/nixos rw +30 29 0:27 / /nix/.ro-store rw,relatime shared:2 - 9p nix-store rw,dirsync,loose,access=client,msize=16384,trans=virtio +31 29 0:28 / /nix/.rw-store rw,relatime shared:3 - tmpfs tmpfs rw,mode=755 +34 29 0:29 / /nix/store rw,relatime shared:4 - overlay overlay rw,lowerdir=/mnt-root/nix/.ro-store,upperdir=/mnt-root/nix/.rw-store/upper,workdir=/mnt-root/nix/.rw-store/work +35 29 0:32 / /tmp/shared rw,relatime shared:6 - 9p shared rw,sync,dirsync,access=client,msize=16384,trans=virtio +36 29 0:33 / /tmp/xchg rw,relatime shared:7 - 9p xchg rw,sync,dirsync,access=client,msize=16384,trans=virtio +37 34 0:29 / /nix/store ro,relatime shared:5 - overlay overlay rw,lowerdir=/mnt-root/nix/.ro-store,upperdir=/mnt-root/nix/.rw-store/upper,workdir=/mnt-root/nix/.rw-store/work +38 28 0:6 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:10 - securityfs securityfs rw +39 28 0:34 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:11 - cgroup2 cgroup2 rw,nsdelegate,memory_recursiveprot +40 28 0:35 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:12 - pstore pstore rw +41 28 0:36 / /sys/fs/bpf rw,nosuid,nodev,noexec,relatime shared:13 - bpf bpf rw,mode=700 +42 28 0:7 / /sys/kernel/debug rw,nosuid,nodev,noexec,relatime shared:19 - debugfs debugfs rw +43 22 0:37 / /dev/hugepages rw,nosuid,nodev,relatime shared:20 - hugetlbfs hugetlbfs rw,pagesize=2M +44 22 0:19 / /dev/mqueue rw,nosuid,nodev,noexec,relatime shared:21 - mqueue mqueue rw +68 26 0:38 / /run/credentials/systemd-journald.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:22 - ramfs none rw,mode=700 +70 26 0:39 / /run/credentials/systemd-tmpfiles-setup-dev-early.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:23 - ramfs none rw,mode=700 +45 28 0:40 / /sys/kernel/config rw,nosuid,nodev,noexec,relatime shared:24 - configfs configfs rw +46 28 0:41 / /sys/fs/fuse/connections rw,nosuid,nodev,noexec,relatime shared:25 - fusectl fusectl rw +76 26 0:42 / /run/credentials/systemd-tmpfiles-setup-dev.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:26 - ramfs none rw,mode=700 +78 26 0:43 / /run/credentials/systemd-sysctl.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:27 - ramfs none rw,mode=700 +109 26 0:45 / /run/credentials/systemd-vconsole-setup.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:54 - ramfs none rw,mode=700 +49 26 0:46 / /run/wrappers rw,nodev,relatime shared:56 - tmpfs tmpfs rw,mode=755 +115 26 0:47 / /run/credentials/systemd-tmpfiles-setup.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:58 - ramfs none rw,mode=700 +321 26 0:56 / /run/credentials/getty@tty1.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:197 - ramfs none rw,mode=700 +55 26 0:57 / /run/credentials/serial-getty@ttyS0.service ro,nosuid,nodev,noexec,relatime,nosymfollow shared:229 - ramfs none rw,mode=700 +61 26 0:58 / /run/user/1000 rw,nosuid,nodev,relatime shared:235 - tmpfs tmpfs rw,size=202572k,nr_inodes=50643,mode=700,uid=1000,gid=999 +67 29 0:59 / /mnt/path\040with\040spaces rw,relatime shared:241 - tmpfs tmpfs rw,size=10240k +75 29 0:60 / /mnt/path\134with\134backslashes rw,relatime shared:247 - tmpfs tmpfs rw,size=10240k