Skip to content

Commit

Permalink
feat: set THP_DISABLE=true in shim, and restore it before starting runc
Browse files Browse the repository at this point in the history
If /sys/kernel/mm/transparent_hugepage/enabled=always, the shim process
will use huge pages, which will consume a lot of memory.

Just like this:
ps -efo pid,rss,comm | grep shim
    PID   RSS COMMAND
   2614  7464 containerd-shim

I don't think shim needs to use huge pages, and if we turn off the huge
pages option, we can save a lot of memory resources.

After we set THP_DISABLE=true:
ps -efo pid,comm,rss
    PID COMMAND           RSS
1629841 containerd-shim  5648

containerd
    |
    |--shim1   --start
        |
        |--shim2    (this shim will on host)
            |
            |--runc create (when containerd send create request by ttrpc)
                |
                |--runc init (this is the pid 1 in container)

    we should set thp_disabled=1 in shim1 --start, because if we set this
    in shim 2, the huge page has been setted while func main() running,
    we set thp_disabled cannot change the setted huge pages.
    So We need to set thp_disabled=1 in shim1 so that shim2 inherits the
    settings of the parent process shim1, and shim2 has closed the
    hugepage when it starts.

    For runc processes, we need to set thp_disabled='before' in shim2 after
    fork() and before execve(). So we use cmd.pre_exec to do this.
  • Loading branch information
zzzzzzzzzy9 committed Sep 12, 2023
1 parent 9388ecf commit 30351c5
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/runc-shim/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ serde_json = "1.0.74"
oci-spec = "0.6.0"
crossbeam = "0.8.1"
uuid = { version = "1.0.0", features = ["v4"] }
prctl = "1.0.0"

# Async dependencies
async-trait = { workspace = true }
Expand Down
32 changes: 32 additions & 0 deletions crates/runc-shim/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ impl Shim for Service {
}
}

#[cfg(not(target_os = "linux"))]
async fn start_shim(&mut self, opts: StartOpts) -> containerd_shim::Result<String> {
let mut grouping = opts.id.clone();
let spec = read_spec("").await?;
Expand All @@ -84,6 +85,37 @@ impl Shim for Service {
Ok(address)
}

#[cfg(target_os = "linux")]
async fn start_shim(&mut self, opts: StartOpts) -> containerd_shim::Result<String> {
let mut grouping = opts.id.clone();
let spec = read_spec("").await?;
match spec.annotations() {
Some(annotations) => {
for &label in GROUP_LABELS.iter() {
if let Some(value) = annotations.get(label) {
grouping = value.to_string();
break;
}
}
}
None => {}
}

let mut vars: Vec<(&str, &str)> = Vec::new();
let thp_disabled = match prctl::get_thp_disable() {
Ok(x) => x.to_string(),
_ => "".to_string(),
};
match prctl::set_thp_disable(true) {
_ => {}
}
vars.push(("THP_DISABLED", thp_disabled.as_str()));

let address = spawn(opts, &grouping, vars).await?;
write_str_to_file("address", &address).await?;
Ok(address)
}

async fn delete_shim(&mut self) -> containerd_shim::Result<DeleteResponse> {
let namespace = self.namespace.as_str();
let bundle = current_dir().map_err(io_error!(e, "get current dir"))?;
Expand Down
1 change: 1 addition & 0 deletions crates/runc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde", "std"] }
uuid = { version = "1.0.0", features = ["v4"] }
os_pipe = "1.0.0"
prctl = "1.0.0"

# Async dependencies
tokio = { workspace = true, features = ["full"], optional = true }
Expand Down
36 changes: 36 additions & 0 deletions crates/runc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ pub trait Spawner: Debug {
/// and some other utilities.
#[cfg(feature = "async")]
impl Runc {
#[cfg(not(target_os = "linux"))]
async fn launch(&self, cmd: Command, combined_output: bool) -> Result<Response> {
debug!("Execute command {:?}", cmd);
let (status, pid, stdout, stderr) = self.spawner.execute(cmd).await?;
Expand All @@ -388,6 +389,41 @@ impl Runc {
})
}
}
#[cfg(target_os = "linux")]
async fn launch(&self, mut cmd: Command, combined_output: bool) -> Result<Response> {
debug!("Execute command {:?}", cmd);
if let Ok(thp) = std::env::var("THP_DISABLED") {
if let Ok(thp_disabled) = thp.parse::<bool>() {
unsafe {
cmd.pre_exec(move || {
if let Err(e) = prctl::set_thp_disable(thp_disabled) {
log::debug!("set_thp_disable err: {}", e);
};
Ok(())
});
}
}
}
let (status, pid, stdout, stderr) = self.spawner.execute(cmd).await?;
if status.success() {
let output = if combined_output {
stdout + stderr.as_str()
} else {
stdout
};
Ok(Response {
pid,
status,
output,
})
} else {
Err(Error::CommandFailed {
status,
stdout,
stderr,
})
}
}

/// Create a new container
pub async fn create<P>(
Expand Down

0 comments on commit 30351c5

Please sign in to comment.