From 7824bc503f8cc338af71eb7303fc07d8c327d06a Mon Sep 17 00:00:00 2001 From: lfbzhm Date: Fri, 8 Nov 2024 01:43:24 +0000 Subject: [PATCH 1/3] libct: replace unix.Kill with os.Process.Signal Because we should switch to unix.PidFDSendSignal in new kernels, it has been supported in go runtime. We don't need to add fall back to unix.Kill code here. Signed-off-by: lfbzhm --- libcontainer/process_linux.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index fcbb54a3e41..a6643121cd3 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -115,11 +115,7 @@ func (p *setnsProcess) startTime() (uint64, error) { } func (p *setnsProcess) signal(sig os.Signal) error { - s, ok := sig.(unix.Signal) - if !ok { - return errors.New("os: unsupported signal type") - } - return unix.Kill(p.pid(), s) + return p.cmd.Process.Signal(sig) } func (p *setnsProcess) start() (retErr error) { @@ -838,11 +834,7 @@ func (p *initProcess) createNetworkInterfaces() error { } func (p *initProcess) signal(sig os.Signal) error { - s, ok := sig.(unix.Signal) - if !ok { - return errors.New("os: unsupported signal type") - } - return unix.Kill(p.pid(), s) + return p.cmd.Process.Signal(sig) } func (p *initProcess) setExternalDescriptors(newFds []string) { From 794e63304bd27cf13813616958078df1fb56d9c9 Mon Sep 17 00:00:00 2001 From: lfbzhm Date: Fri, 8 Nov 2024 06:54:08 +0000 Subject: [PATCH 2/3] libct: use pidfd and epoll to wait the init process exit Signed-off-by: lfbzhm --- delete.go | 13 ++---- libcontainer/container_linux.go | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/delete.go b/delete.go index fc8133438ea..7cb31a875f5 100644 --- a/delete.go +++ b/delete.go @@ -5,23 +5,16 @@ import ( "fmt" "os" "path/filepath" - "time" "github.com/opencontainers/runc/libcontainer" "github.com/urfave/cli" - - "golang.org/x/sys/unix" ) func killContainer(container *libcontainer.Container) error { - _ = container.Signal(unix.SIGKILL) - for i := 0; i < 100; i++ { - time.Sleep(100 * time.Millisecond) - if err := container.Signal(unix.Signal(0)); err != nil { - return container.Destroy() - } + if err := container.Kill(); err != nil { + return err } - return errors.New("container init still running") + return container.Destroy() } var deleteCommand = cli.Command{ diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c02116177ad..0f6e4395d3f 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -423,6 +423,78 @@ func (c *Container) signal(s os.Signal) error { return nil } +func (c *Container) killViaPidfd() error { + pidfd, err := unix.PidfdOpen(c.initProcess.pid(), 0) + if err != nil { + return err + } + defer unix.Close(pidfd) + + epollfd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return err + } + defer unix.Close(epollfd) + + event := unix.EpollEvent{ + Events: unix.EPOLLIN, + Fd: int32(pidfd), + } + if err := unix.EpollCtl(epollfd, unix.EPOLL_CTL_ADD, pidfd, &event); err != nil { + return err + } + + // We don't need unix.PidfdSendSignal because go runtime will use it if possible. + _ = c.Signal(unix.SIGKILL) + + events := make([]unix.EpollEvent, 1) + for { + // Set the timeout to 10s, the same as the traditional unix.Signal solution. + n, err := unix.EpollWait(epollfd, events, 10000) + if err != nil { + if err == unix.EINTR { + continue + } + return err + } + + if n == 0 { + return errors.New("container init still running") + } + + if n > 0 { + event := events[0] + if event.Fd == int32(pidfd) { + return nil + } + } + } +} + +func (c *Container) kill() error { + _ = c.Signal(unix.SIGKILL) + for i := 0; i < 100; i++ { + time.Sleep(100 * time.Millisecond) + if err := c.Signal(unix.Signal(0)); err != nil { + return nil + } + } + return errors.New("container init still running") +} + +// Kill kills the container and wait the init process exit. +func (c *Container) Kill() error { + if c.config.Namespaces.IsPrivate(configs.NEWPID) { + err := c.killViaPidfd() + if err == nil { + return nil + } + + logrus.Debugf("pidfd & epoll failed, falling back to unix.Signal: %v", err) + } + return c.kill() +} + func (c *Container) createExecFifo() (retErr error) { rootuid, err := c.Config().HostRootUID() if err != nil { From 783391231e5ff14e1a88516f186e277d8ea75026 Mon Sep 17 00:00:00 2001 From: Abel Feng Date: Tue, 5 Nov 2024 14:38:41 +0800 Subject: [PATCH 3/3] libct: reduce the delete delay When using unix.Kill to kill the container, we need a for loop to detect the init process exited or not manually, we sleep 100ms each time in the current, but for stopped containers or containers running in a low load machine, we don't need to wait so long time. This change will reduce the delete delay in some situations, especially for those pods with many containers in. Co-authored-by: Abel Feng Signed-off-by: lfbzhm --- delete.go | 21 +++++++++++++-------- libcontainer/container_linux.go | 9 +++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/delete.go b/delete.go index 7cb31a875f5..51f909c0cc0 100644 --- a/delete.go +++ b/delete.go @@ -7,6 +7,7 @@ import ( "path/filepath" "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" "github.com/urfave/cli" ) @@ -58,24 +59,28 @@ status of "ubuntu01" as "stopped" the following will delete resources held for } return err } - // When --force is given, we kill all container processes and - // then destroy the container. This is done even for a stopped - // container, because (in case it does not have its own PID - // namespace) there may be some leftover processes in the - // container's cgroup. - if force { - return killContainer(container) - } + s, err := container.Status() if err != nil { return err } switch s { case libcontainer.Stopped: + // For a stopped container, because (in case it does not have + // its own PID namespace) there may be some leftover processes + // in the container's cgroup. + if !container.Config().Namespaces.IsPrivate(configs.NEWPID) { + return killContainer(container) + } return container.Destroy() case libcontainer.Created: return killContainer(container) default: + // When --force is given, we kill all container processes and + // then destroy the container. + if force { + return killContainer(container) + } return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s) } }, diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 0f6e4395d3f..e9a09c86cc6 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -473,6 +473,15 @@ func (c *Container) killViaPidfd() error { func (c *Container) kill() error { _ = c.Signal(unix.SIGKILL) + + // For containers running in a low load machine, we only need to wait about 1ms. + time.Sleep(time.Millisecond) + if err := c.Signal(unix.Signal(0)); err != nil { + return nil + } + + // For some containers in a heavy load machine, we need to wait more time. + logrus.Debugln("We need more time to wait the init process exit.") for i := 0; i < 100; i++ { time.Sleep(100 * time.Millisecond) if err := c.Signal(unix.Signal(0)); err != nil {