Skip to content

Commit

Permalink
feat: set THP_DISABLE=true in shim, and restore it before starting runc
Browse files Browse the repository at this point in the history
If /sys/kernel/mm/transparent_hugepage/enabled=always, the shim process
will use huge pages, which will consume a lot of memory.

Just like this:
ps -efo pid,rss,comm | grep shim
    PID   RSS COMMAND
   2614  7464 containerd-shim

I don't think shim needs to use huge pages, and if we turn off the huge
pages option, we can save a lot of memory resources.

After we set THP_DISABLE=true:
ps -efo pid,comm,rss
    PID COMMAND           RSS
1629841 containerd-shim  5648

containerd
    |
    |--shim1   --start
        |
        |--shim2    (this shim will on host)
            |
            |--runc create (when containerd send create request by ttrpc)
                |
                |--runc init (this is the pid 1 in container)

    we should set thp_disabled=1 in shim1 --start, because if we set this
    in shim 2, the huge page has been setted while func main() running,
    we set thp_disabled cannot change the setted huge pages.
    So We need to set thp_disabled=1 in shim1 so that shim2 inherits the
    settings of the parent process shim1, and shim2 has closed the
    hugepage when it starts.

    For runc processes, we need to set thp_disabled='before' in shim2 after
    fork() and before execve(). So we use cmd.pre_exec to do this.
  • Loading branch information
zzzzzzzzzy9 committed Sep 28, 2023
1 parent 9388ecf commit 50b1963
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 2 deletions.
1 change: 1 addition & 0 deletions crates/runc-shim/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ serde_json = "1.0.74"
oci-spec = "0.6.0"
crossbeam = "0.8.1"
uuid = { version = "1.0.0", features = ["v4"] }
prctl = "1.0.0"

# Async dependencies
async-trait = { workspace = true }
Expand Down
21 changes: 20 additions & 1 deletion crates/runc-shim/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,27 @@ impl Shim for Service {
}
None => {}
}
#[cfg(not(target_os = "linux"))]
let thp_disabled = String::new();
#[cfg(target_os = "linux")]
// Our goal is to set thp disable = true on the shim side and then restore thp
// disable before starting runc. So we only need to focus on the return value
// of the function get_thp_disabled, which is Result<bool, i32>.
let thp_disabled = match prctl::get_thp_disable() {
Ok(x) => {
// The return value of the function set_thp_disabled is Result<(), i32>,
// we don't care if the setting is successful, because even if the
// setting failed, we should not exit the shim process, therefore,
// there is no need to pay attention to the set_thp_disabled function's
// return value.
let _ = prctl::set_thp_disable(true);
x.to_string()
}
Err(_) => String::new(),
};
let vars: Vec<(&str, &str)> = vec![("THP_DISABLED", thp_disabled.as_str())];

let address = spawn(opts, &grouping, Vec::new()).await?;
let address = spawn(opts, &grouping, vars).await?;
write_str_to_file("address", &address).await?;
Ok(address)
}
Expand Down
1 change: 1 addition & 0 deletions crates/runc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde", "std"] }
uuid = { version = "1.0.0", features = ["v4"] }
os_pipe = "1.0.0"
prctl = "1.0.0"

# Async dependencies
tokio = { workspace = true, features = ["full"], optional = true }
Expand Down
16 changes: 15 additions & 1 deletion crates/runc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,22 @@ pub trait Spawner: Debug {
/// and some other utilities.
#[cfg(feature = "async")]
impl Runc {
async fn launch(&self, cmd: Command, combined_output: bool) -> Result<Response> {
async fn launch(&self, mut cmd: Command, combined_output: bool) -> Result<Response> {
debug!("Execute command {:?}", cmd);
unsafe {
cmd.pre_exec(move || {
#[cfg(target_os = "linux")]
if let Ok(thp) = std::env::var("THP_DISABLED") {
if let Ok(thp_disabled) = thp.parse::<bool>() {
if let Err(e) = prctl::set_thp_disable(thp_disabled) {
debug!("set_thp_disable err: {}", e);
};
}
}
Ok(())
});
}

let (status, pid, stdout, stderr) = self.spawner.execute(cmd).await?;
if status.success() {
let output = if combined_output {
Expand Down

0 comments on commit 50b1963

Please sign in to comment.