diff --git a/shell.nix b/shell.nix index da722b3..b372412 100644 --- a/shell.nix +++ b/shell.nix @@ -8,5 +8,8 @@ pkgs.mkShell { # For llvm-objdump llvmPackages.bintools + + # To aid testing + runc ]; } diff --git a/src/runc/container.rs b/src/runc/container.rs index 697a8b6..6d4215d 100644 --- a/src/runc/container.rs +++ b/src/runc/container.rs @@ -2,8 +2,8 @@ use std::fs::File; use std::io::{BufRead, BufReader, Seek}; use std::path::Path; -use anyhow::{bail, ensure, Context, Result}; -use rustix::fs::{FileType, Gid, Mode, Uid}; +use anyhow::{bail, Context, Result}; +use rustix::fs::{FileType, Mode}; use rustix::process::{Pid, Signal}; use tokio::io::unix::AsyncFd; use tokio::io::Interest; @@ -63,8 +63,10 @@ impl CgroupEventNotifier { } pub struct Container { - uid: Uid, - gid: Gid, + // Uid and gid of the primary container user. + // Note that they're inside the user namespace (if any). + uid: u32, + gid: u32, pid: Pid, wait: tokio::sync::watch::Receiver, cgroup_device_filter: Mutex>, @@ -87,11 +89,9 @@ impl Container { Box::new(DeviceAccessControllerV2::new(&state.cgroup_paths.unified)?) }; - ensure!(config.process.user.uid != u32::MAX && config.process.user.gid != u32::MAX); - Ok(Self { - uid: unsafe { Uid::from_raw(config.process.user.uid) }, - gid: unsafe { Gid::from_raw(config.process.user.gid) }, + uid: config.process.user.uid, + gid: config.process.user.gid, pid: Pid::from_raw(state.init_process_pid.try_into()?).context("Invalid PID")?, wait: recv, cgroup_device_filter: Mutex::new(cgroup_device_filter), @@ -113,7 +113,8 @@ impl Container { } pub async fn mknod(&self, node: &Path, (major, minor): (u32, u32)) -> Result<()> { - crate::util::namespace::MntNamespace::of_pid(self.pid)?.enter(|| { + let ns = crate::util::namespace::MntNamespace::of_pid(self.pid)?; + ns.enter(|| { if let Some(parent) = node.parent() { let _ = std::fs::create_dir_all(parent); } @@ -125,8 +126,8 @@ impl Container { Mode::from(0o644), rustix::fs::makedev(major, minor), )?; - if !self.uid.is_root() { - rustix::fs::chown(node, Some(self.uid), Some(self.gid))?; + if self.uid != 0 { + rustix::fs::chown(node, Some(ns.uid(self.uid)?), Some(ns.gid(self.gid)?))?; } Ok(()) })? diff --git a/src/util/namespace.rs b/src/util/namespace.rs index f50aa19..52d74b5 100644 --- a/src/util/namespace.rs +++ b/src/util/namespace.rs @@ -1,22 +1,70 @@ use std::fs::File; use std::os::fd::AsFd; -use std::os::unix::fs::MetadataExt; +use std::path::Path; -use anyhow::Result; +use anyhow::{Context, Result}; use rustix::fs::{Gid, Uid}; use rustix::process::Pid; use rustix::thread::{CapabilitiesSecureBits, LinkNameSpaceType, UnshareFlags}; +pub struct IdMap { + map: Vec<(u32, u32, u32)>, +} + +impl IdMap { + fn read(path: &Path) -> Result { + Self::parse(&std::fs::read_to_string(path)?) + } + + fn parse(content: &str) -> Result { + let mut map = Vec::new(); + for line in content.lines() { + let mut words = line.split_ascii_whitespace(); + let inside = words.next().context("unexpected id_map")?.parse()?; + let outside = words.next().context("unexpected id_map")?.parse()?; + let count = words.next().context("unexpected id_map")?.parse()?; + map.push((inside, outside, count)); + } + Ok(Self { map }) + } + + fn translate(&self, id: u32) -> Option { + for &(inside, outside, count) in self.map.iter() { + if (inside..inside.checked_add(count)?).contains(&id) { + return (id - inside).checked_add(outside); + } + } + None + } +} + pub struct MntNamespace { - fd: File, + mnt_fd: File, + uid_map: IdMap, + gid_map: IdMap, } impl MntNamespace { /// Open the mount namespace of a process. pub fn of_pid(pid: Pid) -> Result { - let path = format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()); - let fd = File::open(path)?; - Ok(MntNamespace { fd }) + let mnt_fd = File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?; + let uid_map = IdMap::read(format!("/proc/{}/uid_map", pid.as_raw_nonzero()).as_ref())?; + let gid_map = IdMap::read(format!("/proc/{}/gid_map", pid.as_raw_nonzero()).as_ref())?; + Ok(MntNamespace { + mnt_fd, + uid_map, + gid_map, + }) + } + + /// Translate user ID into a UID in the namespace. + pub fn uid(&self, uid: u32) -> Result { + Ok(unsafe { Uid::from_raw(self.uid_map.translate(uid).context("UID overflows")?) }) + } + + /// Translate group ID into a GID in the namespace. + pub fn gid(&self, gid: u32) -> Result { + Ok(unsafe { Gid::from_raw(self.gid_map.translate(gid).context("GID overflows")?) }) } /// Enter the mount namespace. @@ -32,7 +80,7 @@ impl MntNamespace { // Switch this particular thread to the container's mount namespace. rustix::thread::move_into_link_name_space( - self.fd.as_fd(), + self.mnt_fd.as_fd(), Some(LinkNameSpaceType::Mount), )?; @@ -51,9 +99,6 @@ impl MntNamespace { // // https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073 // https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111 - let metadata = std::fs::metadata("/")?; - let uid = metadata.uid(); - let gid = metadata.gid(); // By default `setuid` will drop capabilities when transitioning from root // to non-root user. This bit prevents it so our code still have superpower. @@ -61,8 +106,8 @@ impl MntNamespace { CapabilitiesSecureBits::NO_SETUID_FIXUP, )?; - rustix::thread::set_thread_uid(unsafe { Uid::from_raw(uid) })?; - rustix::thread::set_thread_gid(unsafe { Gid::from_raw(gid) })?; + rustix::thread::set_thread_uid(self.uid(0)?)?; + rustix::thread::set_thread_gid(self.gid(0)?)?; Ok(f()) })