Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let cleanup steps hold their own data #5194

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion cmd/reset/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (c *command) reset() error {
}

// Get Cleanup Config
cfg, err := cleanup.NewConfig(c.K0sVars, c.CfgFile, c.WorkerOptions.CriSocket)
cfg, err := cleanup.NewConfig(c.Debug, c.K0sVars, c.WorkerOptions.CriSocket)
if err != nil {
return fmt.Errorf("failed to configure cleanup: %w", err)
}
Expand Down
96 changes: 54 additions & 42 deletions pkg/cleanup/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,71 +19,57 @@ package cleanup
import (
"errors"
"fmt"
"os/exec"

k0sv1beta1 "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
"github.com/k0sproject/k0s/pkg/component/worker"
workerconfig "github.com/k0sproject/k0s/pkg/component/worker/config"
"github.com/k0sproject/k0s/pkg/component/worker/containerd"
"github.com/k0sproject/k0s/pkg/config"

"github.com/k0sproject/k0s/pkg/constant"
"github.com/k0sproject/k0s/pkg/container/runtime"

"github.com/sirupsen/logrus"
)

type Config struct {
cfgFile string
containerd *containerdConfig
containerRuntime runtime.ContainerRuntime
dataDir string
k0sVars *config.CfgVars
runDir string
cleanupSteps []Step
}

type containerdConfig struct {
binPath string
cmd *exec.Cmd
socketPath string
}

func NewConfig(k0sVars *config.CfgVars, cfgFile string, criSocketFlag string) (*Config, error) {
runDir := "/run/k0s" // https://github.com/k0sproject/k0s/pull/591/commits/c3f932de85a0b209908ad39b817750efc4987395

var containerdCfg *containerdConfig
func NewConfig(debug bool, k0sVars *config.CfgVars, criSocketFlag string) (*Config, error) {
cfg, err := k0sVars.NodeConfig()
if err != nil {
logrus.Errorf("failed to get cluster setup: %v", err)
}

runtimeEndpoint, err := worker.GetContainerRuntimeEndpoint(criSocketFlag, runDir)
containers, err := newContainersStep(debug, k0sVars, criSocketFlag)
if err != nil {
return nil, err
}
if criSocketFlag == "" {
containerdCfg = &containerdConfig{
binPath: fmt.Sprintf("%s/%s", k0sVars.DataDir, "bin/containerd"),
socketPath: runtimeEndpoint.Path,
}
}

return &Config{
cfgFile: cfgFile,
containerd: containerdCfg,
containerRuntime: runtime.NewContainerRuntime(runtimeEndpoint),
dataDir: k0sVars.DataDir,
runDir: runDir,
k0sVars: k0sVars,
}, nil
}

func (c *Config) Cleanup() error {
var errs []error
cleanupSteps := []Step{
&containers{Config: c},
&users{Config: c},
&services{Config: c},
&directories{Config: c},
containers,
&users{
systemUsers: cfg.Spec.Install.SystemUsers,
},
&services{},
&directories{
dataDir: k0sVars.DataDir,
runDir: k0sVars.RunDir,
},
&cni{},
}

if bridge := newBridgeStep(); bridge != nil {
cleanupSteps = append(cleanupSteps, bridge)
}

for _, step := range cleanupSteps {
return &Config{cleanupSteps}, nil
}

func (c *Config) Cleanup() error {
var errs []error

for _, step := range c.cleanupSteps {
logrus.Info("* ", step.Name())
err := step.Run()
if err != nil {
Expand All @@ -97,6 +83,32 @@ func (c *Config) Cleanup() error {
return nil
}

func newContainersStep(debug bool, k0sVars *config.CfgVars, criSocketFlag string) (*containers, error) {
runtimeEndpoint, err := worker.GetContainerRuntimeEndpoint(criSocketFlag, k0sVars.RunDir)
if err != nil {
return nil, err
}

containers := containers{
containerRuntime: runtime.NewContainerRuntime(runtimeEndpoint),
}

if criSocketFlag == "" {
logLevel := "error"
if debug {
logLevel = "debug"
}
containers.managedContainerd = containerd.NewComponent(logLevel, k0sVars, &workerconfig.Profile{
PauseImage: &k0sv1beta1.ImageSpec{
Image: constant.KubePauseContainerImage,
Version: constant.KubePauseContainerImageVersion,
},
})
}

return &containers, nil
}

// Step interface is used to implement cleanup steps
type Step interface {
// Run impelements specific cleanup operations
Expand Down
84 changes: 22 additions & 62 deletions pkg/cleanup/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,20 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/k0sproject/k0s/internal/pkg/file"
"github.com/k0sproject/k0s/pkg/component/worker/containerd"
"github.com/k0sproject/k0s/pkg/container/runtime"

"github.com/avast/retry-go"
"github.com/sirupsen/logrus"
"k8s.io/mount-utils"
)

type containers struct {
Config *Config
managedContainerd *containerd.Component
containerRuntime runtime.ContainerRuntime
}

// Name returns the name of the step
Expand All @@ -46,23 +44,27 @@ func (c *containers) Name() string {
// Run removes all the pods and mounts and stops containers afterwards
// Run starts containerd if custom CRI is not configured
func (c *containers) Run() error {
if !c.isCustomCriUsed() {
if err := c.startContainerd(); err != nil {
if errors.Is(err, fs.ErrNotExist) || errors.Is(err, exec.ErrNotFound) {
logrus.Debugf("containerd binary not found. Skipping container cleanup")
return nil
}
return fmt.Errorf("failed to start containerd: %w", err)
if c.managedContainerd != nil {
ctx := context.TODO()
if err := c.managedContainerd.Init(ctx); err != nil {
logrus.WithError(err).Warn("Failed to initialize containerd, skipping container cleanup")
return nil
}
if err := c.managedContainerd.Start(ctx); err != nil {
logrus.WithError(err).Warn("Failed to start containerd, skipping container cleanup")
return nil
}
defer func() {
if err := c.managedContainerd.Stop(); err != nil {
logrus.WithError(err).Warn("Failed to stop containerd")
}
}()
}

if err := c.stopAllContainers(); err != nil {
logrus.Debugf("error stopping containers: %v", err)
}

if !c.isCustomCriUsed() {
c.stopContainerd()
}
return nil
}

Expand Down Expand Up @@ -91,48 +93,6 @@ func removeMount(path string) error {
return errors.Join(errs...)
}

func (c *containers) isCustomCriUsed() bool {
return c.Config.containerd == nil
}

func (c *containers) startContainerd() error {
logrus.Debugf("starting containerd")
args := []string{
"--root=" + filepath.Join(c.Config.dataDir, "containerd"),
"--state=" + filepath.Join(c.Config.runDir, "containerd"),
"--address=" + c.Config.containerd.socketPath,
}
if file.Exists("/etc/k0s/containerd.toml") {
args = append(args, "--config=/etc/k0s/containerd.toml")
}
cmd := exec.Command(c.Config.containerd.binPath, args...)
if err := cmd.Start(); err != nil {
return err
}

c.Config.containerd.cmd = cmd
logrus.Debugf("started containerd successfully")

return nil
}

func (c *containers) stopContainerd() {
logrus.Debug("attempting to stop containerd")
logrus.Debugf("found containerd pid: %v", c.Config.containerd.cmd.Process.Pid)
if err := c.Config.containerd.cmd.Process.Signal(os.Interrupt); err != nil {
logrus.Errorf("failed to kill containerd: %v", err)
}
// if process, didn't exit, wait a few seconds and send SIGKILL
if c.Config.containerd.cmd.ProcessState.ExitCode() != -1 {
time.Sleep(5 * time.Second)

if err := c.Config.containerd.cmd.Process.Kill(); err != nil {
logrus.Errorf("failed to send SIGKILL to containerd: %v", err)
}
}
logrus.Debug("successfully stopped containerd")
}

func (c *containers) stopAllContainers() error {
var errs []error

Expand All @@ -141,7 +101,7 @@ func (c *containers) stopAllContainers() error {
err := retry.Do(func() error {
logrus.Debugf("trying to list all pods")
var err error
pods, err = c.Config.containerRuntime.ListContainers(ctx)
pods, err = c.containerRuntime.ListContainers(ctx)
if err != nil {
return err
}
Expand All @@ -158,7 +118,7 @@ func (c *containers) stopAllContainers() error {

for _, pod := range pods {
logrus.Debugf("stopping container: %v", pod)
err := c.Config.containerRuntime.StopContainer(ctx, pod)
err := c.containerRuntime.StopContainer(ctx, pod)
if err != nil {
if strings.Contains(err.Error(), "443: connect: connection refused") {
// on a single node instance, we will see "connection refused" error. this is to be expected
Expand All @@ -168,13 +128,13 @@ func (c *containers) stopAllContainers() error {
errs = append(errs, fmt.Errorf("failed to stop running pod %s: %w", pod, err))
}
}
err = c.Config.containerRuntime.RemoveContainer(ctx, pod)
err = c.containerRuntime.RemoveContainer(ctx, pod)
if err != nil {
errs = append(errs, fmt.Errorf("failed to remove pod %s: %w", pod, err))
}
}

pods, err = c.Config.containerRuntime.ListContainers(ctx)
pods, err = c.containerRuntime.ListContainers(ctx)
if err == nil && len(pods) == 0 {
logrus.Info("successfully removed k0s containers!")
}
Expand Down
21 changes: 11 additions & 10 deletions pkg/cleanup/directories.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (
)

type directories struct {
Config *Config
dataDir string
runDir string
}

// Name returns the name of the step
Expand Down Expand Up @@ -65,11 +66,11 @@ func (d *directories) Run() error {
v := procMounts[i]
// avoid unmount datadir if its mounted on separate partition
// k0s didn't mount it so leave it alone
if v.Path == d.Config.k0sVars.DataDir {
if v.Path == d.dataDir {
dataDirMounted = true
continue
}
if isUnderPath(v.Path, filepath.Join(d.Config.dataDir, "kubelet")) || isUnderPath(v.Path, d.Config.k0sVars.DataDir) {
if isUnderPath(v.Path, filepath.Join(d.dataDir, "kubelet")) || isUnderPath(v.Path, d.dataDir) {
logrus.Debugf("%v is mounted! attempting to unmount...", v.Path)
if err = mounter.Unmount(v.Path); err != nil {
// if we fail to unmount, try lazy unmount so
Expand All @@ -84,23 +85,23 @@ func (d *directories) Run() error {
}

if dataDirMounted {
logrus.Debugf("removing the contents of mounted data-dir (%s)", d.Config.dataDir)
logrus.Debugf("removing the contents of mounted data-dir (%s)", d.dataDir)
} else {
logrus.Debugf("removing k0s generated data-dir (%s)", d.Config.dataDir)
logrus.Debugf("removing k0s generated data-dir (%s)", d.dataDir)
}

if err := os.RemoveAll(d.Config.dataDir); err != nil {
if err := os.RemoveAll(d.dataDir); err != nil {
if !dataDirMounted {
return fmt.Errorf("failed to delete k0s generated data-dir: %w", err)
}
if !errorIsUnlinkat(err, d.Config.dataDir) {
if !errorIsUnlinkat(err, d.dataDir) {
return fmt.Errorf("failed to delete contents of mounted data-dir: %w", err)
}
}

logrus.Debugf("deleting k0s generated run-dir (%s)", d.Config.runDir)
if err := os.RemoveAll(d.Config.runDir); err != nil {
return fmt.Errorf("failed to delete %s: %w", d.Config.runDir, err)
logrus.Debugf("deleting k0s generated run-dir (%s)", d.runDir)
if err := os.RemoveAll(d.runDir); err != nil {
return fmt.Errorf("failed to delete %s: %w", d.runDir, err)
}

return nil
Expand Down
4 changes: 1 addition & 3 deletions pkg/cleanup/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ import (
"github.com/k0sproject/k0s/pkg/install"
)

type services struct {
Config *Config
}
type services struct{}

// Name returns the name of the step
func (s *services) Name() string {
Expand Down
10 changes: 4 additions & 6 deletions pkg/cleanup/users.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ limitations under the License.
package cleanup

import (
k0sv1beta1 "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
"github.com/k0sproject/k0s/pkg/install"

"github.com/sirupsen/logrus"
)

type users struct {
Config *Config
systemUsers *k0sv1beta1.SystemUser
}

// Name returns the name of the step
Expand All @@ -32,11 +34,7 @@ func (u *users) Name() string {

// Run removes all controller users that are present on the host
func (u *users) Run() error {
cfg, err := u.Config.k0sVars.NodeConfig()
if err != nil {
logrus.Errorf("failed to get cluster setup: %v", err)
}
if err := install.DeleteControllerUsers(cfg.Spec.Install.SystemUsers); err != nil {
if err := install.DeleteControllerUsers(u.systemUsers); err != nil {
// don't fail, just notify on delete error
logrus.Warnf("failed to delete controller users: %v", err)
}
Expand Down
Loading