Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport release-1.31] Fix deletion of persistent data with k0s reset #5301

Merged
merged 3 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/reset.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ following:
* Processes and containers: Terminates all running k0s processes to ensure that
there are no active components left. This includes all container processes
managed by the Container Runtime.
* Mounts under k0s data directory: In order to prevent persistent data to be
deleted, all mount points under k0s' data directory will be unmounted. If an
unmount fails, it will be unmounted lazy.
* Data stored on the node: Deletes the whole k0s data directory, which includes
* all k0s-related configuration files, including those used for cluster setup
and node-specific settings,
Expand All @@ -23,8 +26,8 @@ following:
reboot the host after a reset to ensure that there are no k0s remnants in the
host's network configuration.
* Registration with the host's init system: Reverts the registration done by
`k0s install`. After a reset, k0s won't be automatically started when the host
boots.
`k0s install`. After a reset, k0s won't be automatically started when the
host boots.

After a successful reset, the k0s binary itself remains. It can then be used to
join another cluster or create a new one.
Expand Down
77 changes: 77 additions & 0 deletions inttest/reset/clutter-data-dir.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: 2024 k0s authors
#shellcheck shell=ash

set -eu

make_dir() { mkdir -- "$1" && echo "$1"; }
make_file() { echo "$1" >"$1" && echo "$1"; }

make_bind_mounts() {
local real="$1"
local target="$2"

# Directory bind mount
make_dir "$real/real_dir"
make_file "$real/real_dir/real_dir_info.txt"
make_dir "$target/bind_dir"
mount --bind -- "$real/real_dir" "$target/bind_dir"

# File bind mount
make_file "$real/real_file.txt"
make_file "$target/bind_file.txt"
mount --bind -- "$real/real_file.txt" "$target/bind_file.txt"

# Recursive directory bind mount
make_dir "$real/real_recursive_dir"
make_file "$real/real_recursive_dir/real_recursive_dir.txt"
make_dir "$real/real_recursive_dir/bind_dir"
mount --bind -- "$real/real_dir" "$real/real_recursive_dir/bind_dir"
make_file "$real/real_recursive_dir/bind_file.txt"
mount --bind -- "$real/real_file.txt" "$real/real_recursive_dir/bind_file.txt"
make_dir "$target/rbind_dir"
mount --rbind -- "$real/real_recursive_dir" "$target/rbind_dir"

# Directory overmounts
make_dir "$real/overmount_dir"
make_file "$real/overmount_dir/in_overmount_dir.txt"
mount --bind -- "$real/overmount_dir" "$target/bind_dir"

# File overmounts
make_file "$real/overmount_file.txt"
mount --bind -- "$real/overmount_file.txt" "$target/bind_file.txt"
}

clutter() {
local dataDir="$1"
local realDir

realDir="$(mktemp -t -d k0s_reset_inttest.XXXXXX)"

local dir="$dataDir"/cluttered
make_dir "$dir"

# Directories and files with restricted permissions
make_dir "$dir/restricted_dir"
make_file "$dir/restricted_dir/no_read_file.txt"
chmod 000 -- "$dir/restricted_dir/no_read_file.txt" # No permissions on the file
make_dir "$dir/restricted_dir/no_exec_dir"
chmod 000 -- "$dir/restricted_dir/no_exec_dir" # No permissions on the directory
make_dir "$dir/restricted_dir/no_exec_nonempty_dir"
make_file "$dir/restricted_dir/no_exec_nonempty_dir/.hidden_file"
chmod 000 -- "$dir/restricted_dir/no_exec_nonempty_dir" # No permissions on the directory

# Symlinks pointing outside the directory tree
make_dir "$realDir/some_dir"
make_file "$realDir/some_dir/real_file.txt"
ln -s -- "$realDir/some_dir/real_file.txt" "$dir/symlink_to_file" # Symlink to a file
ln -s -- "$realDir/some_dir" "$dir/symlink_to_dir" # Symlink to a directory

# Bind mounts pointing outside the directory tree
make_bind_mounts "$realDir" "$dir"

# Bind mounts outside the directory tree pointing into it
# make_bind_mounts "$dir" "$realDir"
}

clutter "$@"
47 changes: 42 additions & 5 deletions inttest/reset/reset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ limitations under the License.
package reset

import (
"bytes"
_ "embed"
"fmt"
"io"
"strings"
"testing"

testifysuite "github.com/stretchr/testify/suite"
Expand All @@ -28,11 +33,14 @@ type suite struct {
common.BootlooseSuite
}

//go:embed clutter-data-dir.sh
var clutterScript []byte

func (s *suite) TestReset() {
ctx := s.Context()
workerNode := s.WorkerNode(0)

if ok := s.Run("k0s gets up", func() {
if !s.Run("k0s gets up", func() {
s.Require().NoError(s.InitController(0, "--disable-components=konnectivity-server,metrics-server"))
s.Require().NoError(s.RunWorkers())

Expand All @@ -44,11 +52,7 @@ func (s *suite) TestReset() {

s.T().Log("waiting to see CNI pods ready")
s.NoError(common.WaitForKubeRouterReady(ctx, kc), "CNI did not start")
}); !ok {
return
}

s.Run("k0s reset", func() {
ssh, err := s.SSH(ctx, workerNode)
s.Require().NoError(err)
defer ssh.Disconnect()
Expand All @@ -57,14 +61,47 @@ func (s *suite) TestReset() {
s.NoError(ssh.Exec(ctx, "test -d /run/k0s", common.SSHStreams{}), "/run/k0s is not a directory")

s.NoError(ssh.Exec(ctx, "pidof containerd-shim-runc-v2 >&2", common.SSHStreams{}), "Expected some running containerd shims")
}) {
return
}

var clutteringPaths bytes.Buffer

if !s.Run("prepare k0s reset", func() {
s.NoError(s.StopWorker(workerNode), "Failed to stop k0s")

ssh, err := s.SSH(ctx, workerNode)
s.Require().NoError(err)
defer ssh.Disconnect()

streams, flushStreams := common.TestLogStreams(s.T(), "clutter data dir")
streams.In = bytes.NewReader(clutterScript)
streams.Out = io.MultiWriter(&clutteringPaths, streams.Out)
err = ssh.Exec(ctx, "sh -s -- /var/lib/k0s", streams)
flushStreams()
s.Require().NoError(err)
}) {
return
}

s.Run("k0s reset", func() {
ssh, err := s.SSH(ctx, workerNode)
s.Require().NoError(err)
defer ssh.Disconnect()

streams, flushStreams := common.TestLogStreams(s.T(), "reset")
err = ssh.Exec(ctx, "k0s reset --debug", streams)
flushStreams()
s.NoError(err, "k0s reset didn't exit cleanly")

for _, path := range strings.Split(string(bytes.TrimSpace(clutteringPaths.Bytes())), "\n") {
if strings.HasPrefix(path, "/var/lib/k0s") {
s.NoError(ssh.Exec(ctx, fmt.Sprintf("! test -e %q", path), common.SSHStreams{}), "Failed to verify non-existence of %s", path)
} else {
s.NoError(ssh.Exec(ctx, fmt.Sprintf("test -e %q", path), common.SSHStreams{}), "Failed to verify existence of %s", path)
}
}

// /var/lib/k0s is a mount point in the Docker container and can't be deleted, so it must be empty
s.NoError(ssh.Exec(ctx, `x="$(ls -A /var/lib/k0s)" && echo "$x" >&2 && [ -z "$x" ]`, common.SSHStreams{}), "/var/lib/k0s is not empty")
s.NoError(ssh.Exec(ctx, "! test -e /run/k0s", common.SSHStreams{}), "/run/k0s still exists")
Expand Down
3 changes: 0 additions & 3 deletions pkg/cleanup/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ func (c *containers) stopAllContainers() error {
return fmt.Errorf("failed at listing pods %w", err)
}
if len(pods) > 0 {
if err := removeMount("kubelet/pods"); err != nil {
errs = append(errs, err)
}
if err := removeMount("run/netns"); err != nil {
errs = append(errs, err)
}
Expand Down
45 changes: 38 additions & 7 deletions pkg/cleanup/directories.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/sirupsen/logrus"
"k8s.io/mount-utils"
Expand All @@ -46,15 +47,39 @@ func (d *directories) Run() error {

var dataDirMounted bool

// search and unmount kubelet volume mounts
for _, v := range procMounts {
if v.Path == filepath.Join(d.Config.dataDir, "kubelet") {
// ensure that we don't delete any persistent data volumes that may be
// mounted by kubernetes by unmount every mount point under DataDir.
//
// Unmount in the reverse order it was mounted so we handle recursive
// bind mounts and over mounts properly. If we for any reason are not
// able to unmount, fall back to lazy unmount and if that also fails
// bail out and don't delete anything.
//
// Note that if there are any shared bind mounts under k0s data
// directory, we may end up unmounting stuff outside the k0s DataDir.
// If someone has set a bind mount to be shared, we assume that is the
// desired behavior. See MS_SHARED and NOTES:
// - https://man7.org/linux/man-pages/man2/mount.2.html
// - https://man7.org/linux/man-pages/man2/umount.2.html#NOTES
for i := len(procMounts) - 1; i >= 0; i-- {
v := procMounts[i]
// avoid unmount datadir if its mounted on separate partition
// k0s didn't mount it so leave it alone
if v.Path == d.Config.k0sVars.DataDir {
dataDirMounted = true
continue
}
if isUnderPath(v.Path, filepath.Join(d.Config.dataDir, "kubelet")) || isUnderPath(v.Path, d.Config.k0sVars.DataDir) {
logrus.Debugf("%v is mounted! attempting to unmount...", v.Path)
if err = mounter.Unmount(v.Path); err != nil {
logrus.Warningf("failed to unmount %v", v.Path)
// if we fail to unmount, try lazy unmount so
// we don't end up deleting stuff that we
// shouldn't
logrus.Warningf("lazy unmounting %v", v.Path)
if err = UnmountLazy(v.Path); err != nil {
return fmt.Errorf("failed unmount %v", v.Path)
}
}
} else if v.Path == d.Config.dataDir {
dataDirMounted = true
}
}

Expand All @@ -81,7 +106,13 @@ func (d *directories) Run() error {
return nil
}

// this is for checking if the error retrned by os.RemoveAll is due to
// test if the path is a directory equal to or under base
func isUnderPath(path, base string) bool {
rel, err := filepath.Rel(base, path)
return err == nil && !strings.HasPrefix(rel, "..") && !filepath.IsAbs(rel)
}

// this is for checking if the error returned by os.RemoveAll is due to
// it being a mount point. if it is, we can ignore the error. this way
// we can't rely on os.RemoveAll instead of recursively deleting the
// contents of the directory
Expand Down
27 changes: 27 additions & 0 deletions pkg/cleanup/unmount_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//go:build unix

/*
Copyright 2024 k0s authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cleanup

import (
"golang.org/x/sys/unix"
)

func UnmountLazy(path string) error {
return unix.Unmount(path, unix.MNT_DETACH)
}
23 changes: 23 additions & 0 deletions pkg/cleanup/unmount_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2024 k0s authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cleanup

import "fmt"

func UnmountLazy(path string) error {
return fmt.Errorf("lazy unmount is not supported on Windows for path: %s", path)
}
Loading