Skip to content

Commit

Permalink
Unmount the root mount point of the container when the container is d…
Browse files Browse the repository at this point in the history
…eleted
  • Loading branch information
zzzzzzzzzy9 committed Jul 17, 2024
1 parent 666a5ec commit 6130801
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 1 deletion.
2 changes: 2 additions & 0 deletions crates/runc-shim/src/runc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use containerd_shim::{
asynchronous::monitor::{monitor_subscribe, monitor_unsubscribe, Subscription},
io_error,
monitor::{ExitEvent, Subject, Topic},
mount::umount_recursive,
other, other_error,
protos::{
api::ProcessInfo,
Expand Down Expand Up @@ -299,6 +300,7 @@ impl ProcessLifecycle<InitProcess> for RuncInitLifecycle {
);
}
}
umount_recursive(Path::new(&self.bundle).join("rootfs").to_str(), 0)?;
self.exit_signal.signal();
Ok(())
}
Expand Down
2 changes: 2 additions & 0 deletions crates/runc-shim/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use containerd_shim::{
event::Event,
io_error,
monitor::{Subject, Topic},
mount::umount_recursive,
protos::{events::task::TaskExit, protobuf::MessageDyn},
util::{
convert_to_timestamp, read_options, read_runtime, read_spec, timestamp, write_str_to_file,
Expand Down Expand Up @@ -120,6 +121,7 @@ impl Shim for Service {
runc.delete(&self.id, Some(&DeleteOpts { force: true }))
.await
.unwrap_or_else(|e| warn!("failed to remove runc container: {}", e));
umount_recursive(bundle.join("rootfs").to_str(), 0)?;
let mut resp = DeleteResponse::new();
// sigkill
resp.set_exit_status(137);
Expand Down
177 changes: 176 additions & 1 deletion crates/shim/src/mount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
use std::{
collections::HashMap,
env,
fs::File,
io::{BufRead, BufReader},
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, Not},
path::Path,
};

use lazy_static::lazy_static;
use log::error;
#[cfg(target_os = "linux")]
use nix::mount::{mount, MsFlags};
use nix::mount::{mount, MntFlags, MsFlags};
#[cfg(target_os = "linux")]
use nix::sched::{unshare, CloneFlags};
#[cfg(target_os = "linux")]
Expand Down Expand Up @@ -615,6 +617,179 @@ pub fn mount_rootfs(
Err(Error::Unimplemented("start".to_string()))
}

#[derive(Debug, Default, Clone)]
struct MountInfo {
/// id is a unique identifier of the mount (may be reused after umount).
pub id: u32,
/// parent is the ID of the parent mount (or of self for the root
/// of this mount namespace's mount tree).
pub parent: u32,
/// major and minor are the major and the minor components of the Dev
/// field of unix.Stat_t structure returned by unix.*Stat calls for
/// files on this filesystem.
pub major: u32,
pub minor: u32,
/// root is the pathname of the directory in the filesystem which forms
/// the root of this mount.
pub root: String,
/// mountpoint is the pathname of the mount point relative to the
/// process's root directory.
pub mountpoint: String,
/// options is a comma-separated list of mount options.
pub options: String,
/// optional are zero or more fields of the form "tag[:value]",
/// separated by a space. Currently, the possible optional fields are
/// "shared", "master", "propagate_from", and "unbindable". For more
/// information, see mount_namespaces(7) Linux man page.
pub optional: String,
/// fs_type is the filesystem type in the form "type[.subtype]".
pub fs_type: String,
/// source is filesystem-specific information, or "none".
pub source: String,
/// vfs_options is a comma-separated list of superblock options.
pub vfs_options: String,
}

#[cfg(target_os = "linux")]
pub fn umount_recursive(target: Option<&str>, flags: i32) -> Result<()> {
if let Some(target) = target {
let mut mounts = get_mounts(Some(prefix_filter(target.to_string())));
mounts.sort_by(|a, b| b.mountpoint.len().cmp(&a.mountpoint.len()));
for (index, target) in mounts.iter().enumerate() {
umount_all(Some(target.clone().mountpoint), flags)?;
}
};
Ok(())
}

#[cfg(not(target_os = "linux"))]
pub fn umount_recursive(target: Option<&str>, flags: i32) -> Result<()> {
Ok(())
}

#[cfg(target_os = "linux")]
pub fn umount_all(target: Option<String>, flags: i32) -> Result<()> {
if let Some(target) = target {
if let Err(e) = std::fs::metadata(target.clone()) {
if e.kind() == std::io::ErrorKind::NotFound {
return Ok(());
}
}
loop {
if let Err(e) = nix::mount::umount2(
&std::path::PathBuf::from(&target),
MntFlags::from_bits(flags).unwrap(),
) {
if e == nix::errno::Errno::EINVAL {
return Ok(());
}
return Err(Error::from(e));
}
}
};
Ok(())
}

#[cfg(target_os = "linux")]
fn prefix_filter(prefix: String) -> impl Fn(MountInfo) -> bool {
move |m: MountInfo| {
if let Some(s) = (m.mountpoint.clone() + "/").strip_prefix(&(prefix.clone() + "/")) {
return false;
}
return true;
}
}

#[cfg(target_os = "linux")]
fn get_mounts<F>(f: Option<F>) -> Vec<MountInfo>
where
F: Fn(MountInfo) -> bool,
{
let file = File::open("/proc/self/mountinfo").expect("Failed to open /proc/self/mountinfo");
let reader = BufReader::new(file);

let lines: Vec<String> = reader.lines().filter_map(|line| line.ok()).collect();
let mount_points = lines
.into_iter()
.filter_map(|line| {
/*
See http://man7.org/linux/man-pages/man5/proc.5.html
36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
(1) mount ID: unique identifier of the mount (may be reused after umount)
(2) parent ID: ID of parent (or of self for the top of the mount tree)
(3) major:minor: value of st_dev for files on filesystem
(4) root: root of the mount within the filesystem
(5) mount point: mount point relative to the process's root
(6) mount options: per mount options
(7) optional fields: zero or more fields of the form "tag[:value]"
(8) separator: marks the end of the optional fields
(9) filesystem type: name of filesystem of the form "type[.subtype]"
(10) mount source: filesystem specific information or "none"
(11) super options: per super block options
In other words, we have:
* 6 mandatory fields (1)..(6)
* 0 or more optional fields (7)
* a separator field (8)
* 3 mandatory fields (9)..(11)
*/
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 10 {
// mountpoint parse error.
return None;
}
// separator field
let mut sep_idx = parts.len() - 4;
// In Linux <= 3.9 mounting a cifs with spaces in a share
// name (like "//srv/My Docs") _may_ end up having a space
// in the last field of mountinfo (like "unc=//serv/My Docs").
// Since kernel 3.10-rc1, cifs option "unc=" is ignored,
// so spaces should not appear.
//
// Check for a separator, and work around the spaces bug
for i in (0..sep_idx).rev() {
if parts[i] == "-" {
sep_idx = i;
break;
}
if sep_idx == 5 {
// mountpoint parse error
return None;
}
}

let mut mount_info = MountInfo::default();
mount_info.mountpoint = parts[4].to_string();
mount_info.fs_type = parts[sep_idx + 1].to_string();
mount_info.source = parts[sep_idx + 2].to_string();
mount_info.vfs_options = parts[sep_idx + 3].to_string();
mount_info.id = str::parse::<u32>(parts[0]).unwrap();
mount_info.parent = str::parse::<u32>(parts[1]).unwrap();
let major_minor = parts[2].splitn(3, ":").collect::<Vec<&str>>();
if major_minor.len() != 2 {
// mountpoint parse error.
return None;
}
mount_info.major = str::parse::<u32>(major_minor[0]).unwrap();
mount_info.minor = str::parse::<u32>(major_minor[1]).unwrap();
mount_info.root = parts[3].to_string();
mount_info.options = parts[5].to_string();
mount_info.optional = parts[6..sep_idx].join(" ");
if let Some(f) = &f {
if f(mount_info.clone()) {
// skip this mountpoint. This mountpoint is not the container's mountpoint
return None;
}
}
Some(mount_info)
})
.collect();
mount_points
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests {
Expand Down

0 comments on commit 6130801

Please sign in to comment.