From 30351c56a347ffa738cd0eb4b389317da0a10ff3 Mon Sep 17 00:00:00 2001 From: zhang yu 10307750 Date: Mon, 11 Sep 2023 11:49:13 +0800 Subject: [PATCH] feat: set THP_DISABLE=true in shim, and restore it before starting runc If /sys/kernel/mm/transparent_hugepage/enabled=always, the shim process will use huge pages, which will consume a lot of memory. Just like this: ps -efo pid,rss,comm | grep shim PID RSS COMMAND 2614 7464 containerd-shim I don't think shim needs to use huge pages, and if we turn off the huge pages option, we can save a lot of memory resources. After we set THP_DISABLE=true: ps -efo pid,comm,rss PID COMMAND RSS 1629841 containerd-shim 5648 containerd | |--shim1 --start | |--shim2 (this shim will on host) | |--runc create (when containerd send create request by ttrpc) | |--runc init (this is the pid 1 in container) we should set thp_disabled=1 in shim1 --start, because if we set this in shim 2, the huge page has been setted while func main() running, we set thp_disabled cannot change the setted huge pages. So We need to set thp_disabled=1 in shim1 so that shim2 inherits the settings of the parent process shim1, and shim2 has closed the hugepage when it starts. For runc processes, we need to set thp_disabled='before' in shim2 after fork() and before execve(). So we use cmd.pre_exec to do this. --- crates/runc-shim/Cargo.toml | 1 + crates/runc-shim/src/service.rs | 32 +++++++++++++++++++++++++++++ crates/runc/Cargo.toml | 1 + crates/runc/src/lib.rs | 36 +++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/crates/runc-shim/Cargo.toml b/crates/runc-shim/Cargo.toml index 2e31e3fe..996316b9 100644 --- a/crates/runc-shim/Cargo.toml +++ b/crates/runc-shim/Cargo.toml @@ -33,6 +33,7 @@ serde_json = "1.0.74" oci-spec = "0.6.0" crossbeam = "0.8.1" uuid = { version = "1.0.0", features = ["v4"] } +prctl = "1.0.0" # Async dependencies async-trait = { workspace = true } diff --git a/crates/runc-shim/src/service.rs b/crates/runc-shim/src/service.rs index 512a3d31..bfc7794a 100644 --- a/crates/runc-shim/src/service.rs +++ b/crates/runc-shim/src/service.rs @@ -64,6 +64,7 @@ impl Shim for Service { } } + #[cfg(not(target_os = "linux"))] async fn start_shim(&mut self, opts: StartOpts) -> containerd_shim::Result { let mut grouping = opts.id.clone(); let spec = read_spec("").await?; @@ -84,6 +85,37 @@ impl Shim for Service { Ok(address) } + #[cfg(target_os = "linux")] + async fn start_shim(&mut self, opts: StartOpts) -> containerd_shim::Result { + let mut grouping = opts.id.clone(); + let spec = read_spec("").await?; + match spec.annotations() { + Some(annotations) => { + for &label in GROUP_LABELS.iter() { + if let Some(value) = annotations.get(label) { + grouping = value.to_string(); + break; + } + } + } + None => {} + } + + let mut vars: Vec<(&str, &str)> = Vec::new(); + let thp_disabled = match prctl::get_thp_disable() { + Ok(x) => x.to_string(), + _ => "".to_string(), + }; + match prctl::set_thp_disable(true) { + _ => {} + } + vars.push(("THP_DISABLED", thp_disabled.as_str())); + + let address = spawn(opts, &grouping, vars).await?; + write_str_to_file("address", &address).await?; + Ok(address) + } + async fn delete_shim(&mut self) -> containerd_shim::Result { let namespace = self.namespace.as_str(); let bundle = current_dir().map_err(io_error!(e, "get current dir"))?; diff --git a/crates/runc/Cargo.toml b/crates/runc/Cargo.toml index a4c8b67a..e06276ce 100644 --- a/crates/runc/Cargo.toml +++ b/crates/runc/Cargo.toml @@ -28,6 +28,7 @@ thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde", "std"] } uuid = { version = "1.0.0", features = ["v4"] } os_pipe = "1.0.0" +prctl = "1.0.0" # Async dependencies tokio = { workspace = true, features = ["full"], optional = true } diff --git a/crates/runc/src/lib.rs b/crates/runc/src/lib.rs index d222dc1b..8da809e2 100644 --- a/crates/runc/src/lib.rs +++ b/crates/runc/src/lib.rs @@ -366,6 +366,7 @@ pub trait Spawner: Debug { /// and some other utilities. #[cfg(feature = "async")] impl Runc { + #[cfg(not(target_os = "linux"))] async fn launch(&self, cmd: Command, combined_output: bool) -> Result { debug!("Execute command {:?}", cmd); let (status, pid, stdout, stderr) = self.spawner.execute(cmd).await?; @@ -388,6 +389,41 @@ impl Runc { }) } } + #[cfg(target_os = "linux")] + async fn launch(&self, mut cmd: Command, combined_output: bool) -> Result { + debug!("Execute command {:?}", cmd); + if let Ok(thp) = std::env::var("THP_DISABLED") { + if let Ok(thp_disabled) = thp.parse::() { + unsafe { + cmd.pre_exec(move || { + if let Err(e) = prctl::set_thp_disable(thp_disabled) { + log::debug!("set_thp_disable err: {}", e); + }; + Ok(()) + }); + } + } + } + let (status, pid, stdout, stderr) = self.spawner.execute(cmd).await?; + if status.success() { + let output = if combined_output { + stdout + stderr.as_str() + } else { + stdout + }; + Ok(Response { + pid, + status, + output, + }) + } else { + Err(Error::CommandFailed { + status, + stdout, + stderr, + }) + } + } /// Create a new container pub async fn create

(