Skip to content

Commit

Permalink
Merge pull request #163 from Mossaka/fix-stats
Browse files Browse the repository at this point in the history
feat: add cgroup v2 support for metrics
  • Loading branch information
mxpv authored Aug 3, 2023
2 parents 27a503a + aa44fbf commit 816771b
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:

strategy:
matrix:
os: [ubuntu-20.04]
os: [ubuntu-20.04, ubuntu-22.04]
containerd: [v1.6.21, v1.7.1]

steps:
Expand Down
87 changes: 77 additions & 10 deletions crates/shim/src/cgroup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use cgroups_rs::{
cgroup::get_cgroups_relative_paths_by_pid, hierarchies, Cgroup, CgroupPid, MaxValue, Subsystem,
};
use containerd_shim_protos::{
cgroups::metrics::{CPUStat, CPUUsage, MemoryEntry, MemoryStat, Metrics},
cgroups::metrics::{CPUStat, CPUUsage, MemoryEntry, MemoryStat, Metrics, PidsStat, Throttle},
protobuf::{well_known_types::any::Any, Message},
shim::oci::Options,
};
Expand Down Expand Up @@ -96,20 +96,68 @@ fn write_process_oom_score(pid: u32, score: i64) -> Result<()> {
/// Collect process cgroup stats, return only necessary parts of it
pub fn collect_metrics(pid: u32) -> Result<Metrics> {
let mut metrics = Metrics::new();
// get container main process cgroup
let path =
get_cgroups_relative_paths_by_pid(pid).map_err(other_error!(e, "get process cgroup"))?;
let cgroup = Cgroup::load_with_relative_paths(hierarchies::auto(), Path::new("."), path);

let hierarchies = hierarchies::auto();
let cgroup = if hierarchies.v2() {
let path = format!("/proc/{}/cgroup", pid);
let content = fs::read_to_string(path).map_err(io_error!(e, "read cgroup"))?;
let content = content.strip_suffix('\n').unwrap_or_default();

let parts: Vec<&str> = content.split("::").collect();
let path_parts: Vec<&str> = parts[1].split('/').collect();
let namespace = path_parts[1];
let cgroup_name = path_parts[2];
Cgroup::load(
hierarchies,
format!("/sys/fs/cgroup/{namespace}/{cgroup_name}").as_str(),
)
} else {
// get container main process cgroup
let path = get_cgroups_relative_paths_by_pid(pid)
.map_err(other_error!(e, "get process cgroup"))?;
Cgroup::load_with_relative_paths(hierarchies::auto(), Path::new("."), path)
};

// to make it easy, fill the necessary metrics only.
for sub_system in Cgroup::subsystems(&cgroup) {
match sub_system {
Subsystem::CpuAcct(cpuacct_ctr) => {
Subsystem::Cpu(cpu_ctr) => {
let mut cpu_usage = CPUUsage::new();
cpu_usage.set_total(cpuacct_ctr.cpuacct().usage);
let mut cpu_stat = CPUStat::new();
cpu_stat.set_usage(cpu_usage);
metrics.set_cpu(cpu_stat);
let mut throttle = Throttle::new();
let stat = cpu_ctr.cpu().stat;
for line in stat.lines() {
let parts = line.split(' ').collect::<Vec<&str>>();
if parts.len() != 2 {
Err(Error::Other(format!("invalid cpu stat line: {}", line)))?;
}

// https://github.com/opencontainers/runc/blob/dbe8434359ca35af1c1e10df42b1f4391c1e1010/libcontainer/cgroups/fs2/cpu.go#L70
match parts[0] {
"usage_usec" => {
cpu_usage.set_total(parts[1].parse::<u64>().unwrap());
}
"user_usec" => {
cpu_usage.set_user(parts[1].parse::<u64>().unwrap());
}
"system_usec" => {
cpu_usage.set_kernel(parts[1].parse::<u64>().unwrap());
}
"nr_periods" => {
throttle.set_periods(parts[1].parse::<u64>().unwrap());
}
"nr_throttled" => {
throttle.set_throttled_periods(parts[1].parse::<u64>().unwrap());
}
"throttled_usec" => {
throttle.set_throttled_time(parts[1].parse::<u64>().unwrap());
}
_ => {}
}
}
let mut cpu_stats = CPUStat::new();
cpu_stats.set_throttling(throttle);
cpu_stats.set_usage(cpu_usage);
metrics.set_cpu(cpu_stats);
}
Subsystem::Mem(mem_ctr) => {
let mem = mem_ctr.memory_stat();
Expand All @@ -120,6 +168,25 @@ pub fn collect_metrics(pid: u32) -> Result<Metrics> {
mem_stat.set_total_inactive_file(mem.stat.total_inactive_file);
metrics.set_memory(mem_stat);
}
Subsystem::Pid(pid_ctr) => {
let mut pid_stats = PidsStat::new();
pid_stats.set_current(
pid_ctr
.get_pid_current()
.map_err(other_error!(e, "get current pid"))?,
);
pid_stats.set_limit(
pid_ctr
.get_pid_max()
.map(|val| match val {
// See https://github.com/opencontainers/runc/blob/dbe8434359ca35af1c1e10df42b1f4391c1e1010/libcontainer/cgroups/fs/pids.go#L55
cgroups_rs::MaxValue::Max => 0,
cgroups_rs::MaxValue::Value(val) => val as u64,
})
.map_err(other_error!(e, "get pid limit"))?,
);
metrics.set_pids(pid_stats)
}
_ => {}
}
}
Expand Down

0 comments on commit 816771b

Please sign in to comment.