Skip to content

Commit

Permalink
Merge pull request #448 from DataDog/vianney/improve-container-id-com…
Browse files Browse the repository at this point in the history
…putation

[APMSP-1010] Improve container id computation

Co-authored-by: VianneyRuhlmann <[email protected]>
  • Loading branch information
dd-mergequeue[bot] and VianneyRuhlmann authored Jun 5, 2024
2 parents 56b1f7f + ac6a86c commit 0a1958c
Show file tree
Hide file tree
Showing 13 changed files with 461 additions and 85 deletions.
14 changes: 14 additions & 0 deletions ddcommon/src/entity_id/fallback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

pub fn set_cgroup_file() {}

pub fn set_cgroup_mount_path() {}

pub fn get_container_id() -> Option<&'static str> {
None
}

pub fn get_entity_id() -> Option<&'static str> {
None
}
64 changes: 64 additions & 0 deletions ddcommon/src/entity_id/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

//! Extract the entity id and container id
//!
//! The container id can be extracted from `/proc/self/group`
//!
//! The entity id is one of:
//! - `cid:<container id>` if available
//! - `in:<cgroup node inode>` if container id is not available (e.g. when using cgroupV2)
//!
//! # References
//! - [DataDog/dd-trace-go](https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go)
//! - [Qard/container-info](https://github.com/Qard/container-info/blob/master/index.js)
//! # Supported environments
//! ## Docker
//! /proc/self/cgroup should contain lines like:
//! ```text
//! 13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
//! ```
//! ## Kubernetes
//! /proc/self/cgroup should contain lines like:
//! ```text
//! 11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
//! ```
//!
//! Possibly with extra characters before id:
//! ```text
//! 1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope
//! ```
//!
//! Or a UUID:
//! ```text
//! 1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a
//! ```
//! ## ECS
//! /proc/self/cgroup should contain lines like:
//! ```text
//! 9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
//! ```
//! ## Fargate 1.3-:
//! /proc/self/cgroup should contain lines like:
//! ```test
//! 11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
//! ```
//! ## Fargate 1.4+:
//! Here we match a task id with a suffix
//! ```test
//! 1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469
//! ```

#[cfg(not(unix))]
pub use fallback::{get_container_id, get_entity_id, set_cgroup_file, set_cgroup_mount_path};

#[cfg(unix)]
pub use unix::{get_container_id, get_entity_id, set_cgroup_file, set_cgroup_mount_path};

/// Fallback module used for non-unix systems
#[cfg(not(unix))]
mod fallback;

/// Unix specific module allowing the use of unix specific functions
#[cfg(unix)]
mod unix;
144 changes: 144 additions & 0 deletions ddcommon/src/entity_id/unix/cgroup_inode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

//! This module provides functions to fetch cgroup node path and fetching it's inode
use super::CgroupFileParsingError;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::{fs, io};

// Those two variables are unused in tests
#[cfg(not(test))]
// From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49
// Currently, host namespace inode number are hardcoded, which can be used to detect
// if we're running in host namespace or not (does not work when running in Docker in Docker)
const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB;

#[cfg(not(test))]
const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup";

/// Returns the inode of file at `path`
fn get_inode(path: &Path) -> io::Result<u64> {
let meta = fs::metadata(path)?;
Ok(meta.ino())
}

/// Returns the cgroup mount path associated with `cgroup_v1_base_controller` or the default one for
/// cgroupV2
fn get_cgroup_node_path(
cgroup_v1_base_controller: &str,
cgroup_path: &Path,
cgroup_mount_path: &Path,
) -> Result<PathBuf, CgroupFileParsingError> {
let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?;
let reader = BufReader::new(file);

let mut node_path: Option<PathBuf> = None;

for line in reader.lines() {
let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?;
let cgroup_entry: Vec<&str> = line_content.split(':').collect();
if cgroup_entry.len() != 3 {
continue;
}
let controllers: Vec<&str> = cgroup_entry[1].split(',').collect();
// Only keep empty controller if it is the first line as cgroupV2 uses only one line
if controllers.contains(&cgroup_v1_base_controller) || controllers.contains(&"") {
let matched_operator = if controllers.contains(&cgroup_v1_base_controller) {
cgroup_v1_base_controller
} else {
""
};

let mut path = cgroup_mount_path.join(matched_operator);
path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative
node_path = Some(path);

// if we matched the V1 base controller we can return otherwise we continue until we
// find it or default to the empty controller name
if matched_operator == cgroup_v1_base_controller {
break;
}
}
}
node_path.ok_or(CgroupFileParsingError::CgroupNotFound)
}

#[cfg(not(test))]
/// Checks if the agent is running in the host cgroup namespace.
/// This check is disabled when testing
fn is_host_cgroup_namespace() -> Result<(), ()> {
let cgroup_namespace_inode = get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?;
if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE {
return Err(());
}
Ok(())
}

#[cfg(test)]
/// Mock version used in tests
fn is_host_cgroup_namespace() -> Result<(), ()> {
Ok(())
}

/// Returns the `cgroup_inode` if available, otherwise `None`
pub fn get_cgroup_inode(
cgroup_v1_base_controller: &str,
cgroup_path: &Path,
cgroup_mount_path: &Path,
) -> Option<String> {
// If we're running in the host cgroup namespace, do not get the inode.
// This would indicate that we're not in a container and the inode we'd
// return is not related to a container.
is_host_cgroup_namespace().ok()?;
let cgroup_mount_path =
get_cgroup_node_path(cgroup_v1_base_controller, cgroup_path, cgroup_mount_path).ok()?;
Some(get_inode(&cgroup_mount_path).ok()?.to_string())
}

#[cfg(test)]
mod tests {
use super::super::CGROUP_V1_BASE_CONTROLLER;
use super::*;
use maplit::hashmap;

#[test]
fn test_cgroup_node_path_parsing() {
let test_root_dir: &Path = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests"));

let test_files = hashmap! {
// parsing standard cgroupV2 file
"cgroup.v2" => Ok("/sys/fs/cgroup"),
// parsing cgroupV2 file with custom path
"cgroup.v2_custom_path" => Ok("/sys/fs/cgroup/custom/path"),
// a cgroupv1 container cgroup file returns the memory controller path
"cgroup.docker" => Ok("/sys/fs/cgroup/memory/docker/9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"),
// a non-container Linux cgroup file returns the memory controller path
"cgroup.linux" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"),
// a cgroupV1 file with an entry using 0 as a hierarchy id should not be detected as V2
"cgroup.v1_with_id_0" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"),
// a cgroupV1 file using multiple controllers in the same entry returns the correct path
"cgroup.multiple_controllers" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"),
// a cgroupV1 file missing the memory controller should return an error
"cgroup.no_memory" => Err(CgroupFileParsingError::CgroupNotFound),
// missing cgroup file should return a CannotOpenFile Error
"path/to/cgroup.missing" => Err(CgroupFileParsingError::CannotOpenFile),
// valid container ID with invalid line pattern makes an empty string
"cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::CgroupNotFound),
};

for (&filename, expected_result) in test_files.iter() {
assert_eq!(
get_cgroup_node_path(
CGROUP_V1_BASE_CONTROLLER,
&test_root_dir.join(filename),
Path::new("/sys/fs/cgroup")
),
expected_result.clone().map(PathBuf::from),
"testing file parsing for cgroup node path with file: {filename}"
);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,47 +1,13 @@
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

//! This module provides functions to parse the container id from the cgroup file
use super::CgroupFileParsingError;
use lazy_static::lazy_static;
use regex::Regex;
use std::error;
use std::fmt;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::path::PathBuf;

/* Extract container id from /proc/self/group
Sources:
- https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go
- https://github.com/Qard/container-info/blob/master/index.js
Following environments are supported:
- Docker
/proc/self/cgroup should contain lines like:
`13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860`)
- Kubernetes
/proc/self/cgroup should contain lines like:
`11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1`
Possibly with extra characters before id:
`1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope`
Or a UUID:
`1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a`
- ECS
/proc/self/cgroup should contain lines like:
`9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce`
- Fargate 1.3-:
/proc/self/cgroup should contain lines like:
`11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da`
- Fargate 1.4+:
Here we match a task id with a suffix
`1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469`
*/

const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup";

/// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values
static mut TESTING_CGROUP_PATH: Option<String> = None;

const UUID_SOURCE: &str =
r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}";
Expand All @@ -50,23 +16,12 @@ const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+";

lazy_static! {
static ref LINE_REGEX: Regex = Regex::new(r"^\d+:[^:]*:(.+)$").unwrap();
static ref CONTAINER_REGEX: Regex = Regex::new(&format!(
pub static ref CONTAINER_REGEX: Regex = Regex::new(&format!(
r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$"
))
.unwrap();
}

#[derive(Debug, Clone)]
struct ContainerIdNotFoundError;

impl fmt::Display for ContainerIdNotFoundError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "container id not found")
}
}

impl error::Error for ContainerIdNotFoundError {}

fn parse_line(line: &str) -> Option<&str> {
// unwrap is OK since if regex matches then the groups must exist
LINE_REGEX
Expand All @@ -75,42 +30,20 @@ fn parse_line(line: &str) -> Option<&str> {
.map(|captures| captures.get(1).unwrap().as_str())
}

fn extract_container_id(filepath: &Path) -> Result<String, Box<dyn std::error::Error>> {
let file = File::open(filepath)?;
/// Extract container id contained in the cgroup file located at `cgroup_path`
pub fn extract_container_id(cgroup_path: &Path) -> Result<String, CgroupFileParsingError> {
let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?;
let reader = BufReader::new(file);

for line in reader.lines() {
if let Some(container_id) = parse_line(&line?) {
if let Some(container_id) =
parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?)
{
return Ok(String::from(container_id));
}
}

Err(ContainerIdNotFoundError.into())
}

/// # Safety
/// Must not be called in multi-threaded contexts
pub unsafe fn set_cgroup_file(file: String) {
TESTING_CGROUP_PATH = Some(file)
}

fn get_cgroup_path() -> PathBuf {
// Safety: we assume set_cgroup_file is not called when it shouldn't
if let Some(path) = unsafe { TESTING_CGROUP_PATH.as_ref() } {
Path::new(path.as_str()).into()
} else {
Path::new(DEFAULT_CGROUP_PATH).into()
}
}

pub fn get_container_id() -> Option<&'static str> {
// cache container id in a static to avoid recomputing it at each call

lazy_static! {
static ref CONTAINER_ID: Option<String> =
extract_container_id(get_cgroup_path().as_path()).ok();
}
CONTAINER_ID.as_deref()
Err(CgroupFileParsingError::ContainerIdNotFound)
}

#[cfg(test)]
Expand All @@ -119,7 +52,7 @@ mod tests {
use maplit::hashmap;

#[test]
fn line_parsing() {
fn test_container_id_line_parsing() {
let test_lines = hashmap! {
"" => None,
"other_line" => None,
Expand Down Expand Up @@ -151,12 +84,16 @@ mod tests {
=> None,
};
for (line, &expected_result) in test_lines.iter() {
assert_eq!(parse_line(line), expected_result);
assert_eq!(
parse_line(line),
expected_result,
"testing line parsing for container id with line: {line}"
);
}
}

#[test]
fn file_parsing() {
fn test_container_id_file_parsing() {
let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests"));

let test_files = hashmap! {
Expand Down Expand Up @@ -208,7 +145,7 @@ mod tests {
assert_eq!(
extract_container_id(&test_root_dir.join(filename)).ok(),
expected_result.map(String::from),
"testing file {filename}"
"testing file parsing for container id with file: {filename}"
);
}
}
Expand Down
Loading

0 comments on commit 0a1958c

Please sign in to comment.