diff --git a/docs/raspberry-pi4.md b/docs/raspberry-pi4.md index 2db4b230702f..d5b044d5ff9a 100644 --- a/docs/raspberry-pi4.md +++ b/docs/raspberry-pi4.md @@ -165,7 +165,7 @@ Operating system: Linux (pass) cgroup controller "cpuacct": available (via cpu in version 2) (pass) cgroup controller "cpuset": available (pass) cgroup controller "memory": available (pass) - cgroup controller "devices": available (kernel 5.15 >= 4.15) (pass) + cgroup controller "devices": unknown (warning: insufficient permissions, try with elevated permissions) cgroup controller "freezer": available (cgroup.freeze exists) (pass) cgroup controller "pids": available (pass) cgroup controller "hugetlb": available (pass) diff --git a/go.mod b/go.mod index fa756e7ecf84..8d802e7e73ef 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/bombsimon/logrusr/v2 v2.0.1 github.com/carlmjohnson/requests v0.23.4 github.com/cavaliergopher/grab/v3 v3.0.1 + github.com/cilium/ebpf v0.11.0 github.com/cloudflare/cfssl v1.6.4 github.com/containerd/cgroups/v3 v3.0.2 github.com/containerd/containerd v1.7.6 @@ -33,6 +34,7 @@ require ( github.com/mitchellh/go-homedir v1.1.0 github.com/olekukonko/tablewriter v0.0.5 github.com/opencontainers/image-spec v1.1.0-rc4 + github.com/opencontainers/runtime-spec v1.1.0-rc.2 github.com/otiai10/copy v1.12.0 github.com/pelletier/go-toml v1.9.5 github.com/robfig/cron v1.2.0 @@ -54,7 +56,7 @@ require ( go.uber.org/multierr v1.11.0 go.uber.org/zap v1.25.0 golang.org/x/crypto v0.13.0 - golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 + golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb golang.org/x/mod v0.12.0 golang.org/x/sync v0.3.0 golang.org/x/sys v0.12.0 @@ -104,7 +106,6 @@ require ( github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect - github.com/cilium/ebpf v0.9.1 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/containerd/console v1.0.3 // indirect github.com/containerd/continuity v0.4.2 // indirect @@ -208,7 +209,6 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/runc v1.1.9 // indirect - github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect github.com/opencontainers/selinux v1.11.0 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect diff --git a/go.sum b/go.sum index ce6b208d446c..9d6126bbf6cd 100644 --- a/go.sum +++ b/go.sum @@ -134,8 +134,8 @@ github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHe github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= -github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= +github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= +github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8= github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0= @@ -256,8 +256,8 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI= -github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= +github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= @@ -960,8 +960,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb h1:xIApU0ow1zwMa2uL1VDNeQlNVFTWMQxZUZCMDy0Q4Us= +golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/internal/pkg/sysinfo/probes/linux/cgroup_v2.go b/internal/pkg/sysinfo/probes/linux/cgroup_v2.go index e718385125bd..08b897fd5da6 100644 --- a/internal/pkg/sysinfo/probes/linux/cgroup_v2.go +++ b/internal/pkg/sysinfo/probes/linux/cgroup_v2.go @@ -22,12 +22,17 @@ package linux import ( "errors" "fmt" + "io/fs" "os" "path/filepath" - "regexp" - "strconv" + "strings" + "k8s.io/utils/pointer" + + "github.com/cilium/ebpf/rlimit" "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) type cgroupV2 struct { @@ -54,22 +59,112 @@ func (g *cgroupV2) loadControllers(seen func(string, string)) error { return g.detectListedRootControllers(seen) } -// The device controller has no interface files. Its availability is assumed -// based on the kernel version, as it is hard to detect it directly. +// Detects the device controller by trying to attach a dummy program of type +// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files +// and is implemented purely on top of BPF, this is the only reliable way to +// detect it. A best-guess detection via the kernel version has the major +// drawback of not working with kernels that have a lot of backported features, +// such as RHEL and friends. +// // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller func (g *cgroupV2) detectDevicesController() (cgroupControllerAvailable, error) { - major, minor, err := parseKernelRelease(g.probeUname) + err := attachDummyDeviceFilter(g.mountPoint) + switch { + case err == nil: + return cgroupControllerAvailable{true, "device filters attachable", ""}, nil + + // EACCES occurs when not allowed to create cgroups. + // EPERM occurs when not allowed to load eBPF programs. + case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0: + return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil + case errors.Is(err, unix.EROFS): + return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil + + case eBPFProgramUnsupported(err): + return cgroupControllerAvailable{false, err.Error(), ""}, nil + } + + return cgroupControllerAvailable{}, err +} + +// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created +// cgroup and removes the program and cgroup again. +func attachDummyDeviceFilter(mountPoint string) (err error) { + insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{ + Allow: true, + Type: "a", + Major: pointer.Int64(-1), + Minor: pointer.Int64(-1), + Access: "rwm", + }}) if err != nil { - return cgroupControllerAvailable{}, err + return fmt.Errorf("failed to create eBPF device filter program: %w", err) + } + + tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*") + if err != nil { + return fmt.Errorf("failed to create temporary cgroup: %w", err) + } + defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }() + + dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err}) + } + defer func() { + if closeErr := unix.Close(dirFD); closeErr != nil { + err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr}) + } + }() + + close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err != nil { + // RemoveMemlock may be required on kernels < 5.11 + // observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l + // https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360 + if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") { + if err2 := rlimit.RemoveMemlock(); err2 != nil { + err = errors.Join(err, err2) + } else { + // Try again, MEMLOCK should be removed by now. + close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err2 != nil { + err = errors.Join(err, err2) + } else { + err = nil + } + } + } } + if err != nil { + if eBPFProgramUnsupported(err) { + return err + } + return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err) + } + + return close() +} - // since 4.15 - available, op := false, "<" - if major > 4 || (major == 4 && minor >= 15) { - available, op = true, ">=" +// Returns true if the given error indicates that an eBPF program is unsupported +// by the kernel. +func eBPFProgramUnsupported(err error) bool { + // https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49 + + switch { + // EINVAL occurs when attempting to create a program with an unknown type. + case errors.Is(err, unix.EINVAL): + return true + + // E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of + // the struct known by the running kernel, meaning the kernel is too old to + // support the given prog type. + case errors.Is(err, unix.E2BIG): + return true + + default: + return false } - msg := fmt.Sprintf("kernel %d.%d %s 4.15", major, minor, op) - return cgroupControllerAvailable{available, msg, ""}, nil } // Detect the freezer controller. It doesn't appear in the cgroup.controllers @@ -140,26 +235,3 @@ func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err e return nil } - -func parseKernelRelease(probeUname unameProber) (int64, int64, error) { - uname, err := probeUname() - if err != nil { - return 0, 0, err - } - - var major, minor int64 - r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`) - if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil { - err = errors.New("unsupported format") - } else { - if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil { - minor, err = strconv.ParseInt(matches[2], 10, 16) - } - } - - if err != nil { - err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err) - } - - return major, minor, err -}