From abef32a84c9afac2064ab8e532a72761a0dae553 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Thu, 23 May 2024 14:50:08 +0200 Subject: [PATCH 01/13] Implement entity-id and cgroup inode computation --- ddcommon/src/container_id.rs | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index 5bea5d3eb..b7b90d4ec 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -5,8 +5,10 @@ use lazy_static::lazy_static; use regex::Regex; use std::error; use std::fmt; +use std::fs; use std::fs::File; use std::io::{BufRead, BufReader}; +use std::os::unix::fs::MetadataExt; use std::path::Path; use std::path::PathBuf; @@ -39,6 +41,10 @@ Following environments are supported: */ const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; +const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; + +/// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. +const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values static mut TESTING_CGROUP_PATH: Option = None; @@ -88,6 +94,44 @@ fn extract_container_id(filepath: &Path) -> Result Result> { + let meta = fs::metadata(path)?; + Ok(meta.ino()) +} + +/// Returns the cgroup mount path associated with `base_controller` or the default one for cgroupV2 +fn get_cgroup_node_path( + base_controller: &str, + cgroup_mount_path: &Path, +) -> Result> { + let file = File::open(cgroup_mount_path)?; + let reader = BufReader::new(file); + + let mut node_path: Option = None; + + for (index, line) in reader.lines().enumerate() { + let line_content = &line?; + let cgroup_entry: Vec<&str> = line_content.split(":").collect(); + if cgroup_entry.len() != 3 { + return Err("Error while parsing cgroup file".to_owned().into()); + } + let controllers: Vec<&str> = cgroup_entry[1].split(",").collect(); + // Only keep empty controller if it is the first line as cgroupV2 uses only one line + if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { + let mut path = Path::new(DEFAULT_CGROUP_MOUNT_PATH).join(cgroup_entry[1]); + path.push(cgroup_entry[2].strip_prefix("/").unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative + node_path = Some(path); + + // if we are using cgroupV1 we can stop looking for the controller + if index != 0 { + break; + } + } + } + node_path.ok_or("No matching cgroup".to_owned().into()) +} + /// # Safety /// Must not be called in multi-threaded contexts pub unsafe fn set_cgroup_file(file: String) { @@ -103,6 +147,7 @@ fn get_cgroup_path() -> PathBuf { } } +/// Returns the `container_id` if available in the cgroup file, otherwise returns `None` pub fn get_container_id() -> Option<&'static str> { // cache container id in a static to avoid recomputing it at each call @@ -113,6 +158,33 @@ pub fn get_container_id() -> Option<&'static str> { CONTAINER_ID.as_deref() } +/// Returns the `cgroup_inode` if available, otherwise `None` +pub fn get_cgroup_inode() -> Option<&'static str> { + lazy_static! { + static ref CGROUP_INODE: Option = { + let cgroup_mount_path = + get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) + .ok()?; + Some(get_inode(&cgroup_mount_path).ok()?.to_string()) + }; + } + CGROUP_INODE.as_deref() +} + +/// Returns the `entity id` either `cid-` if available or `in-` +pub fn get_entity_id() -> Option<&'static str> { + lazy_static! { + static ref ENTITY_ID: Option = if let Some(container_id) = get_container_id() { + Some(format!("cid-{container_id}")) + } else if let Some(inode) = get_cgroup_inode() { + Some(format!("in-{inode}")) + } else { + None + }; + } + ENTITY_ID.as_deref() +} + #[cfg(test)] mod tests { use super::*; From 631b050dfa989eca979f124213790b8c406dd218 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Thu, 23 May 2024 14:50:49 +0200 Subject: [PATCH 02/13] Add entity-id headers to Endpoint --- ddcommon/src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ddcommon/src/lib.rs b/ddcommon/src/lib.rs index 467051e07..6b0c8fe69 100644 --- a/ddcommon/src/lib.rs +++ b/ddcommon/src/lib.rs @@ -22,6 +22,7 @@ pub mod header { #![allow(clippy::declare_interior_mutable_const)] use hyper::{header::HeaderName, http::HeaderValue}; pub const DATADOG_CONTAINER_ID: HeaderName = HeaderName::from_static("datadog-container-id"); + pub const DATADOG_ENTITY_ID: HeaderName = HeaderName::from_static("datadog-entity-id"); pub const DATADOG_API_KEY: HeaderName = HeaderName::from_static("dd-api-key"); pub const APPLICATION_JSON: HeaderValue = HeaderValue::from_static("application/json"); } @@ -121,19 +122,30 @@ fn encode_uri_path_in_authority(scheme: &str, path: &str) -> anyhow::Result anyhow::Result { let mut builder = hyper::Request::builder() .uri(self.url.clone()) .header(hyper::header::USER_AGENT, user_agent); + // Add the Api key header if available if let Some(api_key) = &self.api_key { builder = builder.header(header::DATADOG_API_KEY, HeaderValue::from_str(api_key)?); } + // Add the Container Id header if available if let Some(container_id) = container_id::get_container_id() { builder = builder.header(header::DATADOG_CONTAINER_ID, container_id); } + // Add the Entity Id header if available + if let Some(entity_id) = container_id::get_entity_id() { + builder = builder.header(header::DATADOG_ENTITY_ID, entity_id); + } + Ok(builder) } } From 02686a47a414dadf0b779c1142c91b15a4cfd34f Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Thu, 23 May 2024 15:28:46 +0200 Subject: [PATCH 03/13] Add custom error type --- ddcommon/src/container_id.rs | 41 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index b7b90d4ec..50c67330c 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -7,6 +7,7 @@ use std::error; use std::fmt; use std::fs; use std::fs::File; +use std::io; use std::io::{BufRead, BufReader}; use std::os::unix::fs::MetadataExt; use std::path::Path; @@ -63,15 +64,25 @@ lazy_static! { } #[derive(Debug, Clone)] -struct ContainerIdNotFoundError; +enum CgroupFileParsingError { + ContainerIdNotFound, + CgroupNotFound, + CannotOpenFile, + InvalidFormat, +} -impl fmt::Display for ContainerIdNotFoundError { +impl fmt::Display for CgroupFileParsingError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "container id not found") + match self { + CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), + CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), + CgroupFileParsingError::CannotOpenFile => write!(f, "Error while opening cgroup file"), + CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), + } } } -impl error::Error for ContainerIdNotFoundError {} +impl error::Error for CgroupFileParsingError {} fn parse_line(line: &str) -> Option<&str> { // unwrap is OK since if regex matches then the groups must exist @@ -81,21 +92,23 @@ fn parse_line(line: &str) -> Option<&str> { .map(|captures| captures.get(1).unwrap().as_str()) } -fn extract_container_id(filepath: &Path) -> Result> { - let file = File::open(filepath)?; +fn extract_container_id(filepath: &Path) -> Result { + let file = File::open(filepath).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; let reader = BufReader::new(file); for line in reader.lines() { - if let Some(container_id) = parse_line(&line?) { + if let Some(container_id) = + parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?) + { return Ok(String::from(container_id)); } } - Err(ContainerIdNotFoundError.into()) + Err(CgroupFileParsingError::ContainerIdNotFound) } /// Returns the inode of file at `path` -fn get_inode(path: &Path) -> Result> { +fn get_inode(path: &Path) -> io::Result { let meta = fs::metadata(path)?; Ok(meta.ino()) } @@ -104,17 +117,17 @@ fn get_inode(path: &Path) -> Result> { fn get_cgroup_node_path( base_controller: &str, cgroup_mount_path: &Path, -) -> Result> { - let file = File::open(cgroup_mount_path)?; +) -> Result { + let file = File::open(cgroup_mount_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; let reader = BufReader::new(file); let mut node_path: Option = None; for (index, line) in reader.lines().enumerate() { - let line_content = &line?; + let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; let cgroup_entry: Vec<&str> = line_content.split(":").collect(); if cgroup_entry.len() != 3 { - return Err("Error while parsing cgroup file".to_owned().into()); + return Err(CgroupFileParsingError::InvalidFormat); } let controllers: Vec<&str> = cgroup_entry[1].split(",").collect(); // Only keep empty controller if it is the first line as cgroupV2 uses only one line @@ -129,7 +142,7 @@ fn get_cgroup_node_path( } } } - node_path.ok_or("No matching cgroup".to_owned().into()) + node_path.ok_or(CgroupFileParsingError::CgroupNotFound) } /// # Safety From 715cce032774e4478e1cf999000dcd749dd47e3e Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 24 May 2024 10:53:49 +0200 Subject: [PATCH 04/13] Make get_cgroup_inode private --- ddcommon/src/container_id.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index 50c67330c..9ff55e29a 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -145,6 +145,19 @@ fn get_cgroup_node_path( node_path.ok_or(CgroupFileParsingError::CgroupNotFound) } +/// Returns the `cgroup_inode` if available, otherwise `None` +fn get_cgroup_inode() -> Option<&'static str> { + lazy_static! { + static ref CGROUP_INODE: Option = { + let cgroup_mount_path = + get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) + .ok()?; + Some(get_inode(&cgroup_mount_path).ok()?.to_string()) + }; + } + CGROUP_INODE.as_deref() +} + /// # Safety /// Must not be called in multi-threaded contexts pub unsafe fn set_cgroup_file(file: String) { @@ -171,19 +184,6 @@ pub fn get_container_id() -> Option<&'static str> { CONTAINER_ID.as_deref() } -/// Returns the `cgroup_inode` if available, otherwise `None` -pub fn get_cgroup_inode() -> Option<&'static str> { - lazy_static! { - static ref CGROUP_INODE: Option = { - let cgroup_mount_path = - get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) - .ok()?; - Some(get_inode(&cgroup_mount_path).ok()?.to_string()) - }; - } - CGROUP_INODE.as_deref() -} - /// Returns the `entity id` either `cid-` if available or `in-` pub fn get_entity_id() -> Option<&'static str> { lazy_static! { From f31eba46f7d464f51e8e01a7620d93a994f432db Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 24 May 2024 13:30:07 +0200 Subject: [PATCH 05/13] Avoid inode computation on host namespace --- ddcommon/src/container_id.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index 9ff55e29a..88cc1bf37 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -43,10 +43,16 @@ Following environments are supported: const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; +const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; +// From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 +// Currently, host namespace inode number are hardcoded, which can be used to detect +// if we're running in host namespace or not (does not work when running in DinD) +const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; + /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values static mut TESTING_CGROUP_PATH: Option = None; @@ -145,10 +151,23 @@ fn get_cgroup_node_path( node_path.ok_or(CgroupFileParsingError::CgroupNotFound) } +/// Checks if the agent is running in the host cgroup namespace. +fn is_host_cgroup_namespace() -> Result<(), ()> { + let cgroup_namespace_inode = get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; + if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE { + return Err(()); + } + Ok(()) +} + /// Returns the `cgroup_inode` if available, otherwise `None` fn get_cgroup_inode() -> Option<&'static str> { lazy_static! { static ref CGROUP_INODE: Option = { + // If we're running in the host cgroup namespace, do not get the inode. + // This would indicate that we're not in a container and the inode we'd + // return is not related to a container. + is_host_cgroup_namespace().ok()?; let cgroup_mount_path = get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) .ok()?; From 0c3eb47923e48814f3586d556e1c2075fca7ebff Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 24 May 2024 14:56:43 +0200 Subject: [PATCH 06/13] Add fallback for windows and use rustdoc --- ddcommon/src/container_id.rs | 612 +++++++++++++++++++---------------- 1 file changed, 331 insertions(+), 281 deletions(-) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index 88cc1bf37..2e2881637 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -1,319 +1,369 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use lazy_static::lazy_static; -use regex::Regex; -use std::error; -use std::fmt; -use std::fs; -use std::fs::File; -use std::io; -use std::io::{BufRead, BufReader}; -use std::os::unix::fs::MetadataExt; -use std::path::Path; -use std::path::PathBuf; - -/* Extract container id from /proc/self/group - -Sources: - - https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go - - https://github.com/Qard/container-info/blob/master/index.js - -Following environments are supported: - - Docker - /proc/self/cgroup should contain lines like: - `13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860`) - - Kubernetes - /proc/self/cgroup should contain lines like: - `11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1` - Possibly with extra characters before id: - `1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope` - Or a UUID: - `1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a` - - ECS - /proc/self/cgroup should contain lines like: - `9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce` - - Fargate 1.3-: - /proc/self/cgroup should contain lines like: - `11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da` - - Fargate 1.4+: - Here we match a task id with a suffix - `1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469` -*/ - -const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; -const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; -const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; - -/// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. -const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; - -// From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 -// Currently, host namespace inode number are hardcoded, which can be used to detect -// if we're running in host namespace or not (does not work when running in DinD) -const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; - -/// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values -static mut TESTING_CGROUP_PATH: Option = None; - -const UUID_SOURCE: &str = - r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; -const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}"; -const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+"; - -lazy_static! { - static ref LINE_REGEX: Regex = Regex::new(r"^\d+:[^:]*:(.+)$").unwrap(); - static ref CONTAINER_REGEX: Regex = Regex::new(&format!( - r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$" - )) - .unwrap(); -} +//! Extract the entity id and container id +//! +//! The container id can be extracted from `/proc/self/group` +//! +//! The entity id is either: +//! - `cid:` if available +//! - `in:` if container id is not available (e.g. when using cgroupV2) +//! +//! # References +//! - [DataDog/dd-trace-go](https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go) +//! - [Qard/container-info](https://github.com/Qard/container-info/blob/master/index.js) +//! # Supported environments +//! ## Docker +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 +//! ``` +//! ## Kubernetes +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1 +//! ``` +//! +//! Possibly with extra characters before id: +//! ```text +//! 1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope +//! ``` +//! +//! Or a UUID: +//! ```text +//! 1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a +//! ``` +//! ## ECS +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce +//! ``` +//! ## Fargate 1.3-: +//! /proc/self/cgroup should contain lines like: +//! ```test +//! 11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da +//! ``` +//! ## Fargate 1.4+: +//! Here we match a task id with a suffix +//! ```test +//! 1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469 +//! ``` + +#[cfg(not(unix))] +pub use fallback::*; + +#[cfg(unix)] +pub use unix::*; + +/// Fallback module used for non-unix systems +#[cfg(not(unix))] +mod fallback { +/// # Safety + /// Marked as unsafe to match the signature of the unix version + pub unsafe fn set_cgroup_file(_file: String) {} -#[derive(Debug, Clone)] -enum CgroupFileParsingError { - ContainerIdNotFound, - CgroupNotFound, - CannotOpenFile, - InvalidFormat, -} + pub fn get_container_id() -> Option<&'static str> { + None + } -impl fmt::Display for CgroupFileParsingError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), - CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), - CgroupFileParsingError::CannotOpenFile => write!(f, "Error while opening cgroup file"), - CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), - } + pub fn get_entity_id() -> Option<&'static str> { + None } } -impl error::Error for CgroupFileParsingError {} +/// Unix specific module allowing the use of unix specific functions +#[cfg(unix)] +mod unix { + use lazy_static::lazy_static; + use regex::Regex; + use std::error; + use std::fmt; + use std::fs; + use std::fs::File; + use std::io; + use std::io::{BufRead, BufReader}; + use std::os::unix::fs::MetadataExt; + use std::path::{Path, PathBuf}; + + const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; + const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; + const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; + + /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. + const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; + + // From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 + // Currently, host namespace inode number are hardcoded, which can be used to detect + // if we're running in host namespace or not (does not work when running in DinD) + const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; + + /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values + static mut TESTING_CGROUP_PATH: Option = None; + + const UUID_SOURCE: &str = + r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; + const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}"; + const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+"; -fn parse_line(line: &str) -> Option<&str> { - // unwrap is OK since if regex matches then the groups must exist - LINE_REGEX - .captures(line) - .and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str())) - .map(|captures| captures.get(1).unwrap().as_str()) -} + lazy_static! { + static ref LINE_REGEX: Regex = Regex::new(r"^\d+:[^:]*:(.+)$").unwrap(); + static ref CONTAINER_REGEX: Regex = Regex::new(&format!( + r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$" + )) + .unwrap(); + } -fn extract_container_id(filepath: &Path) -> Result { - let file = File::open(filepath).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; - let reader = BufReader::new(file); + #[derive(Debug, Clone)] + enum CgroupFileParsingError { + ContainerIdNotFound, + CgroupNotFound, + CannotOpenFile, + InvalidFormat, + } - for line in reader.lines() { - if let Some(container_id) = - parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?) - { - return Ok(String::from(container_id)); + impl fmt::Display for CgroupFileParsingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), + CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), + CgroupFileParsingError::CannotOpenFile => { + write!(f, "Error while opening cgroup file") + } + CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), + } } } - Err(CgroupFileParsingError::ContainerIdNotFound) -} + impl error::Error for CgroupFileParsingError {} -/// Returns the inode of file at `path` -fn get_inode(path: &Path) -> io::Result { - let meta = fs::metadata(path)?; - Ok(meta.ino()) -} + fn parse_line(line: &str) -> Option<&str> { + // unwrap is OK since if regex matches then the groups must exist + LINE_REGEX + .captures(line) + .and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str())) + .map(|captures| captures.get(1).unwrap().as_str()) + } -/// Returns the cgroup mount path associated with `base_controller` or the default one for cgroupV2 -fn get_cgroup_node_path( - base_controller: &str, - cgroup_mount_path: &Path, -) -> Result { - let file = File::open(cgroup_mount_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; - let reader = BufReader::new(file); - - let mut node_path: Option = None; - - for (index, line) in reader.lines().enumerate() { - let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; - let cgroup_entry: Vec<&str> = line_content.split(":").collect(); - if cgroup_entry.len() != 3 { - return Err(CgroupFileParsingError::InvalidFormat); - } - let controllers: Vec<&str> = cgroup_entry[1].split(",").collect(); - // Only keep empty controller if it is the first line as cgroupV2 uses only one line - if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { - let mut path = Path::new(DEFAULT_CGROUP_MOUNT_PATH).join(cgroup_entry[1]); - path.push(cgroup_entry[2].strip_prefix("/").unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative - node_path = Some(path); - - // if we are using cgroupV1 we can stop looking for the controller - if index != 0 { - break; + fn extract_container_id(filepath: &Path) -> Result { + let file = File::open(filepath).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; + let reader = BufReader::new(file); + + for line in reader.lines() { + if let Some(container_id) = + parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?) + { + return Ok(String::from(container_id)); } } - } - node_path.ok_or(CgroupFileParsingError::CgroupNotFound) -} -/// Checks if the agent is running in the host cgroup namespace. -fn is_host_cgroup_namespace() -> Result<(), ()> { - let cgroup_namespace_inode = get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; - if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE { - return Err(()); + Err(CgroupFileParsingError::ContainerIdNotFound) } - Ok(()) -} -/// Returns the `cgroup_inode` if available, otherwise `None` -fn get_cgroup_inode() -> Option<&'static str> { - lazy_static! { - static ref CGROUP_INODE: Option = { - // If we're running in the host cgroup namespace, do not get the inode. - // This would indicate that we're not in a container and the inode we'd - // return is not related to a container. - is_host_cgroup_namespace().ok()?; - let cgroup_mount_path = - get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) - .ok()?; - Some(get_inode(&cgroup_mount_path).ok()?.to_string()) - }; + /// Returns the inode of file at `path` + fn get_inode(path: &Path) -> io::Result { + let meta = fs::metadata(path)?; + Ok(meta.ino()) } - CGROUP_INODE.as_deref() -} -/// # Safety -/// Must not be called in multi-threaded contexts -pub unsafe fn set_cgroup_file(file: String) { - TESTING_CGROUP_PATH = Some(file) -} + /// Returns the cgroup mount path associated with `base_controller` or the default one for + /// cgroupV2 + fn get_cgroup_node_path( + base_controller: &str, + cgroup_mount_path: &Path, + ) -> Result { + let file = + File::open(cgroup_mount_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; + let reader = BufReader::new(file); + + let mut node_path: Option = None; + + for (index, line) in reader.lines().enumerate() { + let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; + let cgroup_entry: Vec<&str> = line_content.split(":").collect(); + if cgroup_entry.len() != 3 { + return Err(CgroupFileParsingError::InvalidFormat); + } + let controllers: Vec<&str> = cgroup_entry[1].split(",").collect(); + // Only keep empty controller if it is the first line as cgroupV2 uses only one line + if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { + let mut path = Path::new(DEFAULT_CGROUP_MOUNT_PATH).join(cgroup_entry[1]); + path.push(cgroup_entry[2].strip_prefix("/").unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative + node_path = Some(path); + + // if we are using cgroupV1 we can stop looking for the controller + if index != 0 { + break; + } + } + } + node_path.ok_or(CgroupFileParsingError::CgroupNotFound) + } -fn get_cgroup_path() -> PathBuf { - // Safety: we assume set_cgroup_file is not called when it shouldn't - if let Some(path) = unsafe { TESTING_CGROUP_PATH.as_ref() } { - Path::new(path.as_str()).into() - } else { - Path::new(DEFAULT_CGROUP_PATH).into() + /// Checks if the agent is running in the host cgroup namespace. + fn is_host_cgroup_namespace() -> Result<(), ()> { + let cgroup_namespace_inode = + get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; + if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE { + return Err(()); + } + Ok(()) } -} -/// Returns the `container_id` if available in the cgroup file, otherwise returns `None` -pub fn get_container_id() -> Option<&'static str> { - // cache container id in a static to avoid recomputing it at each call + /// Returns the `cgroup_inode` if available, otherwise `None` + fn get_cgroup_inode() -> Option<&'static str> { + lazy_static! { + static ref CGROUP_INODE: Option = { + // If we're running in the host cgroup namespace, do not get the inode. + // This would indicate that we're not in a container and the inode we'd + // return is not related to a container. + is_host_cgroup_namespace().ok()?; + let cgroup_mount_path = + get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) + .ok()?; + Some(get_inode(&cgroup_mount_path).ok()?.to_string()) + }; + } + CGROUP_INODE.as_deref() + } - lazy_static! { - static ref CONTAINER_ID: Option = - extract_container_id(get_cgroup_path().as_path()).ok(); + /// # Safety + /// Must not be called in multi-threaded contexts + pub unsafe fn set_cgroup_file(file: String) { + TESTING_CGROUP_PATH = Some(file) } - CONTAINER_ID.as_deref() -} -/// Returns the `entity id` either `cid-` if available or `in-` -pub fn get_entity_id() -> Option<&'static str> { - lazy_static! { - static ref ENTITY_ID: Option = if let Some(container_id) = get_container_id() { - Some(format!("cid-{container_id}")) - } else if let Some(inode) = get_cgroup_inode() { - Some(format!("in-{inode}")) + fn get_cgroup_path() -> PathBuf { + // Safety: we assume set_cgroup_file is not called when it shouldn't + if let Some(path) = unsafe { TESTING_CGROUP_PATH.as_ref() } { + Path::new(path.as_str()).into() } else { - None - }; + Path::new(DEFAULT_CGROUP_PATH).into() + } + } + + /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` + pub fn get_container_id() -> Option<&'static str> { + // cache container id in a static to avoid recomputing it at each call + + lazy_static! { + static ref CONTAINER_ID: Option = + extract_container_id(get_cgroup_path().as_path()).ok(); + } + CONTAINER_ID.as_deref() } - ENTITY_ID.as_deref() -} -#[cfg(test)] -mod tests { - use super::*; - use maplit::hashmap; - - #[test] - fn line_parsing() { - let test_lines = hashmap! { - "" => None, - "other_line" => None, - "10:hugetlb:/kubepods/burstable/podfd52ef25-a87d-11e9-9423-0800271a638e/8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa" - => Some("8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa"), - "11:devices:/kubepods.slice/kubepods-pod97f1ae73_7ad9_11ec_b4a7_9a35488b4fab.slice/3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab" - => Some("3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab"), - "11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da" - => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), - "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376" - => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), - "1:name=systemd:/uuid/34dc0b5e-626f-2c5c-4c51-70e34b10e765" - => Some("34dc0b5e-626f-2c5c-4c51-70e34b10e765"), - "1:name=systemd:/ecs/34dc0b5e626f2c5c4c5170e34b10e765-1234567890" - => Some("34dc0b5e626f2c5c4c5170e34b10e765-1234567890"), - "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376.scope" - => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), - // k8s with additional characters before ID - "1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope" - => Some("7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199"), - // extra spaces - "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 " - => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), - // one char too short - "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f86986" - => None, - // invalid hex - "13:name=systemd:/docker/3726184226f5d3147g25fdeab5b60097e378e8a720503a5e19ecfdf29f869860" - => None, - }; - for (line, &expected_result) in test_lines.iter() { - assert_eq!(parse_line(line), expected_result); + /// Returns the `entity id` either `cid-` if available or `in-` + pub fn get_entity_id() -> Option<&'static str> { + lazy_static! { + static ref ENTITY_ID: Option = if let Some(container_id) = get_container_id() { + Some(format!("cid-{container_id}")) + } else if let Some(inode) = get_cgroup_inode() { + Some(format!("in-{inode}")) + } else { + None + }; } + ENTITY_ID.as_deref() } - #[test] - fn file_parsing() { - let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); - - let test_files = hashmap! { - // parse a Docker container ID" - "cgroup.docker" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), - // parse a Kubernetes container ID - "cgroup.kubernetes" => Some("3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1"), - // parse an ECS container ID - "cgroup.ecs" => Some("38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce"), - // parse a Fargate container ID - "cgroup.fargate" => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), - // parse a Fargate 1.4+ container ID - "cgroup.fargate.1.4" => Some("8cd79a803caf4d2aa945152e934a5c00-1053176469"), - - // Whitespace around the matching ID is permitted so long as it is matched within a valid cgroup line. - // parse a container ID with leading and trailing whitespace - "cgroup.whitespace" => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), - - // a non-container Linux cgroup file makes an empty string - "cgroup.linux" => None, - - // missing cgroup file should return None - "/path/to/cgroup.missing" => None, - - /* To be consistent with other tracers, unrecognized services that match the - * generic container ID regex patterns are considered valid. - */ - //parse unrecognized container ID - "cgroup.unrecognized" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), - - // error edge cases when parsing container ID - "cgroup.edge_cases" => None, - - // an empty cgroup file makes an empty string - "" => None, - - // valid container ID with invalid line pattern makes an empty string - "cgroup.invalid_line_container_id" => None, - - // valid task ID with invalid line pattern makes an empty string - "cgroup.invalid_line_task_id" => None, - - // To be consistent with other tracers we only match lower case hex - // uppercase container IDs return an empty string - "cgroup.upper" => None, - }; - - for (&filename, &expected_result) in test_files.iter() { - assert_eq!( - extract_container_id(&test_root_dir.join(filename)).ok(), - expected_result.map(String::from), - "testing file {filename}" - ); + #[cfg(test)] + mod tests { + use super::*; + use maplit::hashmap; + + #[test] + fn line_parsing() { + let test_lines = hashmap! { + "" => None, + "other_line" => None, + "10:hugetlb:/kubepods/burstable/podfd52ef25-a87d-11e9-9423-0800271a638e/8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa" + => Some("8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa"), + "11:devices:/kubepods.slice/kubepods-pod97f1ae73_7ad9_11ec_b4a7_9a35488b4fab.slice/3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab" + => Some("3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab"), + "11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da" + => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), + "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376" + => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), + "1:name=systemd:/uuid/34dc0b5e-626f-2c5c-4c51-70e34b10e765" + => Some("34dc0b5e-626f-2c5c-4c51-70e34b10e765"), + "1:name=systemd:/ecs/34dc0b5e626f2c5c4c5170e34b10e765-1234567890" + => Some("34dc0b5e626f2c5c4c5170e34b10e765-1234567890"), + "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376.scope" + => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), + // k8s with additional characters before ID + "1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope" + => Some("7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199"), + // extra spaces + "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 " + => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), + // one char too short + "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f86986" + => None, + // invalid hex + "13:name=systemd:/docker/3726184226f5d3147g25fdeab5b60097e378e8a720503a5e19ecfdf29f869860" + => None, + }; + for (line, &expected_result) in test_lines.iter() { + assert_eq!(parse_line(line), expected_result); + } + } + + #[test] + fn file_parsing() { + let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let test_files = hashmap! { + // parse a Docker container ID" + "cgroup.docker" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + // parse a Kubernetes container ID + "cgroup.kubernetes" => Some("3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1"), + // parse an ECS container ID + "cgroup.ecs" => Some("38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce"), + // parse a Fargate container ID + "cgroup.fargate" => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), + // parse a Fargate 1.4+ container ID + "cgroup.fargate.1.4" => Some("8cd79a803caf4d2aa945152e934a5c00-1053176469"), + + // Whitespace around the matching ID is permitted so long as it is matched within a valid cgroup line. + // parse a container ID with leading and trailing whitespace + "cgroup.whitespace" => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), + + // a non-container Linux cgroup file makes an empty string + "cgroup.linux" => None, + + // missing cgroup file should return None + "/path/to/cgroup.missing" => None, + + /* To be consistent with other tracers, unrecognized services that match the + * generic container ID regex patterns are considered valid. + */ + //parse unrecognized container ID + "cgroup.unrecognized" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + + // error edge cases when parsing container ID + "cgroup.edge_cases" => None, + + // an empty cgroup file makes an empty string + "" => None, + + // valid container ID with invalid line pattern makes an empty string + "cgroup.invalid_line_container_id" => None, + + // valid task ID with invalid line pattern makes an empty string + "cgroup.invalid_line_task_id" => None, + + // To be consistent with other tracers we only match lower case hex + // uppercase container IDs return an empty string + "cgroup.upper" => None, + }; + + for (&filename, &expected_result) in test_files.iter() { + assert_eq!( + extract_container_id(&test_root_dir.join(filename)).ok(), + expected_result.map(String::from), + "testing file {filename}" + ); + } } } } From 7f986a883b0dec7fafff1b44bdbdd7fad06a9f40 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 24 May 2024 17:08:48 +0200 Subject: [PATCH 07/13] Fix clippy --- ddcommon/src/container_id.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index 2e2881637..e8a969f10 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -179,15 +179,15 @@ mod unix { for (index, line) in reader.lines().enumerate() { let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; - let cgroup_entry: Vec<&str> = line_content.split(":").collect(); + let cgroup_entry: Vec<&str> = line_content.split(':').collect(); if cgroup_entry.len() != 3 { return Err(CgroupFileParsingError::InvalidFormat); } - let controllers: Vec<&str> = cgroup_entry[1].split(",").collect(); + let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); // Only keep empty controller if it is the first line as cgroupV2 uses only one line if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { let mut path = Path::new(DEFAULT_CGROUP_MOUNT_PATH).join(cgroup_entry[1]); - path.push(cgroup_entry[2].strip_prefix("/").unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative + path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative node_path = Some(path); // if we are using cgroupV1 we can stop looking for the controller @@ -255,13 +255,9 @@ mod unix { /// Returns the `entity id` either `cid-` if available or `in-` pub fn get_entity_id() -> Option<&'static str> { lazy_static! { - static ref ENTITY_ID: Option = if let Some(container_id) = get_container_id() { - Some(format!("cid-{container_id}")) - } else if let Some(inode) = get_cgroup_inode() { - Some(format!("in-{inode}")) - } else { - None - }; + static ref ENTITY_ID: Option = get_container_id() + .map(|container_id| format!("cid-{container_id}")) + .or(get_cgroup_inode().map(|inode| format!("in-{inode}"))); } ENTITY_ID.as_deref() } From 3ab18ab29c8cb89f23026b5f34ddb868d5696007 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Tue, 28 May 2024 13:47:43 +0200 Subject: [PATCH 08/13] Add tests for entity-id --- ddcommon/src/container_id.rs | 168 ++++++++++++++++-- ddcommon/tests/cgroup.multiple_controllers | 10 ++ ddcommon/tests/cgroup.no_memory | 10 ++ ddcommon/tests/cgroup.v1_with_id_0 | 11 ++ ddcommon/tests/cgroup.v2 | 1 + ddcommon/tests/cgroup.v2_custom_path | 1 + .../user.slice/user-0.slice/session-14.scope | 1 + 7 files changed, 191 insertions(+), 11 deletions(-) create mode 100644 ddcommon/tests/cgroup.multiple_controllers create mode 100644 ddcommon/tests/cgroup.no_memory create mode 100644 ddcommon/tests/cgroup.v1_with_id_0 create mode 100644 ddcommon/tests/cgroup.v2 create mode 100644 ddcommon/tests/cgroup.v2_custom_path create mode 100644 ddcommon/tests/cgroup/memory/user.slice/user-0.slice/session-14.scope diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs index e8a969f10..3a72da6ce 100644 --- a/ddcommon/src/container_id.rs +++ b/ddcommon/src/container_id.rs @@ -58,7 +58,7 @@ pub use unix::*; /// Fallback module used for non-unix systems #[cfg(not(unix))] mod fallback { -/// # Safety + /// # Safety /// Marked as unsafe to match the signature of the unix version pub unsafe fn set_cgroup_file(_file: String) {} @@ -87,19 +87,27 @@ mod unix { const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; - const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; + // Those two variables are unused in tests + #[cfg(not(test))] // From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 // Currently, host namespace inode number are hardcoded, which can be used to detect // if we're running in host namespace or not (does not work when running in DinD) const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; + #[cfg(not(test))] + const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; + /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values static mut TESTING_CGROUP_PATH: Option = None; + /// stores overridable cgroup mount path - used in end-to-end to mock cgroup node and be able to + /// compute inode + static mut TESTING_CGROUP_MOUNT_PATH: Option = None; + const UUID_SOURCE: &str = r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}"; @@ -113,7 +121,7 @@ mod unix { .unwrap(); } - #[derive(Debug, Clone)] + #[derive(Debug, Clone, PartialEq)] enum CgroupFileParsingError { ContainerIdNotFound, CgroupNotFound, @@ -169,10 +177,9 @@ mod unix { /// cgroupV2 fn get_cgroup_node_path( base_controller: &str, - cgroup_mount_path: &Path, + cgroup_path: &Path, ) -> Result { - let file = - File::open(cgroup_mount_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; + let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; let reader = BufReader::new(file); let mut node_path: Option = None; @@ -186,7 +193,14 @@ mod unix { let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); // Only keep empty controller if it is the first line as cgroupV2 uses only one line if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { - let mut path = Path::new(DEFAULT_CGROUP_MOUNT_PATH).join(cgroup_entry[1]); + let matched_operator = if controllers.contains(&base_controller) { + base_controller + } else { + "" + }; + + let mut path = get_cgroup_mount_path(); + path.push(matched_operator); path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative node_path = Some(path); @@ -199,7 +213,9 @@ mod unix { node_path.ok_or(CgroupFileParsingError::CgroupNotFound) } + #[cfg(not(test))] /// Checks if the agent is running in the host cgroup namespace. + /// This check is disabled when testing fn is_host_cgroup_namespace() -> Result<(), ()> { let cgroup_namespace_inode = get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; @@ -209,6 +225,12 @@ mod unix { Ok(()) } + #[cfg(test)] + /// Mock version used in tests + fn is_host_cgroup_namespace() -> Result<(), ()> { + Ok(()) + } + /// Returns the `cgroup_inode` if available, otherwise `None` fn get_cgroup_inode() -> Option<&'static str> { lazy_static! { @@ -241,6 +263,21 @@ mod unix { } } + /// # Safety + /// Must not be called in multi-threaded contexts + pub unsafe fn set_cgroup_mount_path(file: String) { + TESTING_CGROUP_MOUNT_PATH = Some(file) + } + + fn get_cgroup_mount_path() -> PathBuf { + // Safety: we assume set_cgroup_file is not called when it shouldn't + if let Some(path) = unsafe { TESTING_CGROUP_MOUNT_PATH.as_ref() } { + Path::new(path.as_str()).into() + } else { + Path::new(DEFAULT_CGROUP_MOUNT_PATH).into() + } + } + /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` pub fn get_container_id() -> Option<&'static str> { // cache container id in a static to avoid recomputing it at each call @@ -268,7 +305,7 @@ mod unix { use maplit::hashmap; #[test] - fn line_parsing() { + fn test_container_id_line_parsing() { let test_lines = hashmap! { "" => None, "other_line" => None, @@ -300,12 +337,16 @@ mod unix { => None, }; for (line, &expected_result) in test_lines.iter() { - assert_eq!(parse_line(line), expected_result); + assert_eq!( + parse_line(line), + expected_result, + "testing line parsing for container id with line: {line}" + ); } } #[test] - fn file_parsing() { + fn test_container_id_file_parsing() { let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); let test_files = hashmap! { @@ -357,9 +398,114 @@ mod unix { assert_eq!( extract_container_id(&test_root_dir.join(filename)).ok(), expected_result.map(String::from), - "testing file {filename}" + "testing file parsing for container id with file: {filename}" + ); + } + } + + #[test] + fn test_cgroup_node_path_parsing() { + let test_root_dir: &Path = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let test_files = hashmap! { + // parsing standard cgroupV2 file + "cgroup.v2" => Ok("/sys/fs/cgroup"), + // parsing cgroupV2 file with custom path + "cgroup.v2_custom_path" => Ok("/sys/fs/cgroup/custom/path"), + // a cgroupv1 container cgroup file returns the memory controller path + "cgroup.docker" => Ok("/sys/fs/cgroup/memory/docker/9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + // a non-container Linux cgroup file returns the memory controller path + "cgroup.linux" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file with an entry using 0 as a hierarchy id should not be detected as V2 + "cgroup.v1_with_id_0" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file using multiple controllers in the same entry returns the correct path + "cgroup.multiple_controllers" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file missing the memory controller should return an error + "cgroup.no_memory" => Err(CgroupFileParsingError::CgroupNotFound), + // missing cgroup file should return a CannotOpenFile Error + "path/to/cgroup.missing" => Err(CgroupFileParsingError::CannotOpenFile), + // valid container ID with invalid line pattern makes an empty string + "cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::InvalidFormat), + }; + + for (&filename, expected_result) in test_files.iter() { + assert_eq!( + get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, &test_root_dir.join(filename)), + expected_result.clone().map(PathBuf::from), + "testing file parsing for cgroup node path with file: {filename}" + ); + } + } + + lazy_static! { + static ref IN_REGEX: Regex = Regex::new(r"in-\d+").unwrap(); + static ref CID_REGEX: Regex = + Regex::new(&format!(r"cid-{}", CONTAINER_REGEX.as_str())).unwrap(); + } + + /// The following test can only be run in isolation because of caching behaviour introduced + /// by lazy_static + fn test_entity_id(filename: &str, expected_result: Option<&Regex>) { + let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + unsafe { + set_cgroup_mount_path( + test_root_dir + .join("cgroup") + .as_path() + .to_str() + .expect("Invalid test directory") + .to_owned(), + ); + } + unsafe { + set_cgroup_file( + test_root_dir + .join(filename) + .as_path() + .to_str() + .expect("Invalid test directory") + .to_owned(), + ); + } + + if let Some(regex) = expected_result { + assert!( + regex.is_match(get_entity_id().unwrap()), + "testing get_entity_id with file {}: {} is not matching the expected regex", + filename, + get_entity_id().unwrap_or("None") + ); + } else { + assert_eq!( + None, + get_entity_id(), + "testing get_entity_id with file {filename}" ); } } + + #[test] + #[ignore] + fn test_entity_id_for_v2() { + test_entity_id("cgroup.v2", Some(&IN_REGEX)) + } + + #[test] + #[ignore] + fn test_entity_id_for_v1() { + test_entity_id("cgroup.linux", Some(&IN_REGEX)) + } + + #[test] + #[ignore] + fn test_entity_id_for_container_id() { + test_entity_id("cgroup.docker", Some(&CID_REGEX)) + } + + #[test] + #[ignore] + fn test_entity_id_for_no_id() { + test_entity_id("cgroup.no_memory", None) + } } } diff --git a/ddcommon/tests/cgroup.multiple_controllers b/ddcommon/tests/cgroup.multiple_controllers new file mode 100644 index 000000000..eebbf8aa3 --- /dev/null +++ b/ddcommon/tests/cgroup.multiple_controllers @@ -0,0 +1,10 @@ +10:blkio:/user.slice/user-0.slice/session-14.scope +9:memory,pids:/user.slice/user-0.slice/session-14.scope +8:hugetlb:/ +7:cpuset:/ +6:freezer:/ +5:net_cls,net_prio:/ +4:perf_event:/ +3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope +2:devices:/user.slice/user-0.slice/session-14.scope +1:name=systemd:/user.slice/user-0.slice/session-14.scope diff --git a/ddcommon/tests/cgroup.no_memory b/ddcommon/tests/cgroup.no_memory new file mode 100644 index 000000000..b067ea0a4 --- /dev/null +++ b/ddcommon/tests/cgroup.no_memory @@ -0,0 +1,10 @@ +10:blkio:/user.slice/user-0.slice/session-14.scope +9:hugetlb:/ +8:cpuset:/ +7:pids:/user.slice/user-0.slice/session-14.scope +6:freezer:/ +5:net_cls,net_prio:/ +4:perf_event:/ +3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope +2:devices:/user.slice/user-0.slice/session-14.scope +1:name=systemd:/user.slice/user-0.slice/session-14.scope diff --git a/ddcommon/tests/cgroup.v1_with_id_0 b/ddcommon/tests/cgroup.v1_with_id_0 new file mode 100644 index 000000000..a62c55af6 --- /dev/null +++ b/ddcommon/tests/cgroup.v1_with_id_0 @@ -0,0 +1,11 @@ +0::/ +10:memory:/user.slice/user-0.slice/session-14.scope +9:hugetlb:/ +8:cpuset:/ +7:pids:/user.slice/user-0.slice/session-14.scope +6:freezer:/ +5:net_cls,net_prio:/ +4:perf_event:/ +3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope +2:devices:/user.slice/user-0.slice/session-14.scope +1:name=systemd:/user.slice/user-0.slice/session-14.scope diff --git a/ddcommon/tests/cgroup.v2 b/ddcommon/tests/cgroup.v2 new file mode 100644 index 000000000..1e027b2a3 --- /dev/null +++ b/ddcommon/tests/cgroup.v2 @@ -0,0 +1 @@ +0::/ diff --git a/ddcommon/tests/cgroup.v2_custom_path b/ddcommon/tests/cgroup.v2_custom_path new file mode 100644 index 000000000..622cee15c --- /dev/null +++ b/ddcommon/tests/cgroup.v2_custom_path @@ -0,0 +1 @@ +0::/custom/path diff --git a/ddcommon/tests/cgroup/memory/user.slice/user-0.slice/session-14.scope b/ddcommon/tests/cgroup/memory/user.slice/user-0.slice/session-14.scope new file mode 100644 index 000000000..fae0f3c7d --- /dev/null +++ b/ddcommon/tests/cgroup/memory/user.slice/user-0.slice/session-14.scope @@ -0,0 +1 @@ +This file is used to test the entity id computation when using inode From 2e360322f32d9dd0cbaf6eac00d9eb6d4e180bbb Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Wed, 29 May 2024 10:17:52 +0200 Subject: [PATCH 09/13] Refactor to allow integration tests --- ddcommon/src/container_id.rs | 511 -------------------- ddcommon/src/entity_id/mod.rs | 204 ++++++++ ddcommon/src/entity_id/unix/cgroup_inode.rs | 143 ++++++ ddcommon/src/entity_id/unix/container_id.rs | 152 ++++++ ddcommon/src/lib.rs | 6 +- ddtelemetry/src/lib.rs | 4 +- 6 files changed, 504 insertions(+), 516 deletions(-) delete mode 100644 ddcommon/src/container_id.rs create mode 100644 ddcommon/src/entity_id/mod.rs create mode 100644 ddcommon/src/entity_id/unix/cgroup_inode.rs create mode 100644 ddcommon/src/entity_id/unix/container_id.rs diff --git a/ddcommon/src/container_id.rs b/ddcommon/src/container_id.rs deleted file mode 100644 index 3a72da6ce..000000000 --- a/ddcommon/src/container_id.rs +++ /dev/null @@ -1,511 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -//! Extract the entity id and container id -//! -//! The container id can be extracted from `/proc/self/group` -//! -//! The entity id is either: -//! - `cid:` if available -//! - `in:` if container id is not available (e.g. when using cgroupV2) -//! -//! # References -//! - [DataDog/dd-trace-go](https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go) -//! - [Qard/container-info](https://github.com/Qard/container-info/blob/master/index.js) -//! # Supported environments -//! ## Docker -//! /proc/self/cgroup should contain lines like: -//! ```text -//! 13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 -//! ``` -//! ## Kubernetes -//! /proc/self/cgroup should contain lines like: -//! ```text -//! 11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1 -//! ``` -//! -//! Possibly with extra characters before id: -//! ```text -//! 1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope -//! ``` -//! -//! Or a UUID: -//! ```text -//! 1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a -//! ``` -//! ## ECS -//! /proc/self/cgroup should contain lines like: -//! ```text -//! 9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce -//! ``` -//! ## Fargate 1.3-: -//! /proc/self/cgroup should contain lines like: -//! ```test -//! 11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da -//! ``` -//! ## Fargate 1.4+: -//! Here we match a task id with a suffix -//! ```test -//! 1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469 -//! ``` - -#[cfg(not(unix))] -pub use fallback::*; - -#[cfg(unix)] -pub use unix::*; - -/// Fallback module used for non-unix systems -#[cfg(not(unix))] -mod fallback { - /// # Safety - /// Marked as unsafe to match the signature of the unix version - pub unsafe fn set_cgroup_file(_file: String) {} - - pub fn get_container_id() -> Option<&'static str> { - None - } - - pub fn get_entity_id() -> Option<&'static str> { - None - } -} - -/// Unix specific module allowing the use of unix specific functions -#[cfg(unix)] -mod unix { - use lazy_static::lazy_static; - use regex::Regex; - use std::error; - use std::fmt; - use std::fs; - use std::fs::File; - use std::io; - use std::io::{BufRead, BufReader}; - use std::os::unix::fs::MetadataExt; - use std::path::{Path, PathBuf}; - - const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; - const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; - - /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. - const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; - - // Those two variables are unused in tests - #[cfg(not(test))] - // From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 - // Currently, host namespace inode number are hardcoded, which can be used to detect - // if we're running in host namespace or not (does not work when running in DinD) - const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; - - #[cfg(not(test))] - const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; - - /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values - static mut TESTING_CGROUP_PATH: Option = None; - - /// stores overridable cgroup mount path - used in end-to-end to mock cgroup node and be able to - /// compute inode - static mut TESTING_CGROUP_MOUNT_PATH: Option = None; - - const UUID_SOURCE: &str = - r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; - const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}"; - const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+"; - - lazy_static! { - static ref LINE_REGEX: Regex = Regex::new(r"^\d+:[^:]*:(.+)$").unwrap(); - static ref CONTAINER_REGEX: Regex = Regex::new(&format!( - r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$" - )) - .unwrap(); - } - - #[derive(Debug, Clone, PartialEq)] - enum CgroupFileParsingError { - ContainerIdNotFound, - CgroupNotFound, - CannotOpenFile, - InvalidFormat, - } - - impl fmt::Display for CgroupFileParsingError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), - CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), - CgroupFileParsingError::CannotOpenFile => { - write!(f, "Error while opening cgroup file") - } - CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), - } - } - } - - impl error::Error for CgroupFileParsingError {} - - fn parse_line(line: &str) -> Option<&str> { - // unwrap is OK since if regex matches then the groups must exist - LINE_REGEX - .captures(line) - .and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str())) - .map(|captures| captures.get(1).unwrap().as_str()) - } - - fn extract_container_id(filepath: &Path) -> Result { - let file = File::open(filepath).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; - let reader = BufReader::new(file); - - for line in reader.lines() { - if let Some(container_id) = - parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?) - { - return Ok(String::from(container_id)); - } - } - - Err(CgroupFileParsingError::ContainerIdNotFound) - } - - /// Returns the inode of file at `path` - fn get_inode(path: &Path) -> io::Result { - let meta = fs::metadata(path)?; - Ok(meta.ino()) - } - - /// Returns the cgroup mount path associated with `base_controller` or the default one for - /// cgroupV2 - fn get_cgroup_node_path( - base_controller: &str, - cgroup_path: &Path, - ) -> Result { - let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; - let reader = BufReader::new(file); - - let mut node_path: Option = None; - - for (index, line) in reader.lines().enumerate() { - let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; - let cgroup_entry: Vec<&str> = line_content.split(':').collect(); - if cgroup_entry.len() != 3 { - return Err(CgroupFileParsingError::InvalidFormat); - } - let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); - // Only keep empty controller if it is the first line as cgroupV2 uses only one line - if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { - let matched_operator = if controllers.contains(&base_controller) { - base_controller - } else { - "" - }; - - let mut path = get_cgroup_mount_path(); - path.push(matched_operator); - path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative - node_path = Some(path); - - // if we are using cgroupV1 we can stop looking for the controller - if index != 0 { - break; - } - } - } - node_path.ok_or(CgroupFileParsingError::CgroupNotFound) - } - - #[cfg(not(test))] - /// Checks if the agent is running in the host cgroup namespace. - /// This check is disabled when testing - fn is_host_cgroup_namespace() -> Result<(), ()> { - let cgroup_namespace_inode = - get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; - if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE { - return Err(()); - } - Ok(()) - } - - #[cfg(test)] - /// Mock version used in tests - fn is_host_cgroup_namespace() -> Result<(), ()> { - Ok(()) - } - - /// Returns the `cgroup_inode` if available, otherwise `None` - fn get_cgroup_inode() -> Option<&'static str> { - lazy_static! { - static ref CGROUP_INODE: Option = { - // If we're running in the host cgroup namespace, do not get the inode. - // This would indicate that we're not in a container and the inode we'd - // return is not related to a container. - is_host_cgroup_namespace().ok()?; - let cgroup_mount_path = - get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, get_cgroup_path().as_path()) - .ok()?; - Some(get_inode(&cgroup_mount_path).ok()?.to_string()) - }; - } - CGROUP_INODE.as_deref() - } - - /// # Safety - /// Must not be called in multi-threaded contexts - pub unsafe fn set_cgroup_file(file: String) { - TESTING_CGROUP_PATH = Some(file) - } - - fn get_cgroup_path() -> PathBuf { - // Safety: we assume set_cgroup_file is not called when it shouldn't - if let Some(path) = unsafe { TESTING_CGROUP_PATH.as_ref() } { - Path::new(path.as_str()).into() - } else { - Path::new(DEFAULT_CGROUP_PATH).into() - } - } - - /// # Safety - /// Must not be called in multi-threaded contexts - pub unsafe fn set_cgroup_mount_path(file: String) { - TESTING_CGROUP_MOUNT_PATH = Some(file) - } - - fn get_cgroup_mount_path() -> PathBuf { - // Safety: we assume set_cgroup_file is not called when it shouldn't - if let Some(path) = unsafe { TESTING_CGROUP_MOUNT_PATH.as_ref() } { - Path::new(path.as_str()).into() - } else { - Path::new(DEFAULT_CGROUP_MOUNT_PATH).into() - } - } - - /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` - pub fn get_container_id() -> Option<&'static str> { - // cache container id in a static to avoid recomputing it at each call - - lazy_static! { - static ref CONTAINER_ID: Option = - extract_container_id(get_cgroup_path().as_path()).ok(); - } - CONTAINER_ID.as_deref() - } - - /// Returns the `entity id` either `cid-` if available or `in-` - pub fn get_entity_id() -> Option<&'static str> { - lazy_static! { - static ref ENTITY_ID: Option = get_container_id() - .map(|container_id| format!("cid-{container_id}")) - .or(get_cgroup_inode().map(|inode| format!("in-{inode}"))); - } - ENTITY_ID.as_deref() - } - - #[cfg(test)] - mod tests { - use super::*; - use maplit::hashmap; - - #[test] - fn test_container_id_line_parsing() { - let test_lines = hashmap! { - "" => None, - "other_line" => None, - "10:hugetlb:/kubepods/burstable/podfd52ef25-a87d-11e9-9423-0800271a638e/8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa" - => Some("8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa"), - "11:devices:/kubepods.slice/kubepods-pod97f1ae73_7ad9_11ec_b4a7_9a35488b4fab.slice/3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab" - => Some("3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab"), - "11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da" - => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), - "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376" - => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), - "1:name=systemd:/uuid/34dc0b5e-626f-2c5c-4c51-70e34b10e765" - => Some("34dc0b5e-626f-2c5c-4c51-70e34b10e765"), - "1:name=systemd:/ecs/34dc0b5e626f2c5c4c5170e34b10e765-1234567890" - => Some("34dc0b5e626f2c5c4c5170e34b10e765-1234567890"), - "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376.scope" - => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), - // k8s with additional characters before ID - "1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope" - => Some("7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199"), - // extra spaces - "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 " - => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), - // one char too short - "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f86986" - => None, - // invalid hex - "13:name=systemd:/docker/3726184226f5d3147g25fdeab5b60097e378e8a720503a5e19ecfdf29f869860" - => None, - }; - for (line, &expected_result) in test_lines.iter() { - assert_eq!( - parse_line(line), - expected_result, - "testing line parsing for container id with line: {line}" - ); - } - } - - #[test] - fn test_container_id_file_parsing() { - let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); - - let test_files = hashmap! { - // parse a Docker container ID" - "cgroup.docker" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), - // parse a Kubernetes container ID - "cgroup.kubernetes" => Some("3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1"), - // parse an ECS container ID - "cgroup.ecs" => Some("38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce"), - // parse a Fargate container ID - "cgroup.fargate" => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), - // parse a Fargate 1.4+ container ID - "cgroup.fargate.1.4" => Some("8cd79a803caf4d2aa945152e934a5c00-1053176469"), - - // Whitespace around the matching ID is permitted so long as it is matched within a valid cgroup line. - // parse a container ID with leading and trailing whitespace - "cgroup.whitespace" => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), - - // a non-container Linux cgroup file makes an empty string - "cgroup.linux" => None, - - // missing cgroup file should return None - "/path/to/cgroup.missing" => None, - - /* To be consistent with other tracers, unrecognized services that match the - * generic container ID regex patterns are considered valid. - */ - //parse unrecognized container ID - "cgroup.unrecognized" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), - - // error edge cases when parsing container ID - "cgroup.edge_cases" => None, - - // an empty cgroup file makes an empty string - "" => None, - - // valid container ID with invalid line pattern makes an empty string - "cgroup.invalid_line_container_id" => None, - - // valid task ID with invalid line pattern makes an empty string - "cgroup.invalid_line_task_id" => None, - - // To be consistent with other tracers we only match lower case hex - // uppercase container IDs return an empty string - "cgroup.upper" => None, - }; - - for (&filename, &expected_result) in test_files.iter() { - assert_eq!( - extract_container_id(&test_root_dir.join(filename)).ok(), - expected_result.map(String::from), - "testing file parsing for container id with file: {filename}" - ); - } - } - - #[test] - fn test_cgroup_node_path_parsing() { - let test_root_dir: &Path = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); - - let test_files = hashmap! { - // parsing standard cgroupV2 file - "cgroup.v2" => Ok("/sys/fs/cgroup"), - // parsing cgroupV2 file with custom path - "cgroup.v2_custom_path" => Ok("/sys/fs/cgroup/custom/path"), - // a cgroupv1 container cgroup file returns the memory controller path - "cgroup.docker" => Ok("/sys/fs/cgroup/memory/docker/9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), - // a non-container Linux cgroup file returns the memory controller path - "cgroup.linux" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), - // a cgroupV1 file with an entry using 0 as a hierarchy id should not be detected as V2 - "cgroup.v1_with_id_0" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), - // a cgroupV1 file using multiple controllers in the same entry returns the correct path - "cgroup.multiple_controllers" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), - // a cgroupV1 file missing the memory controller should return an error - "cgroup.no_memory" => Err(CgroupFileParsingError::CgroupNotFound), - // missing cgroup file should return a CannotOpenFile Error - "path/to/cgroup.missing" => Err(CgroupFileParsingError::CannotOpenFile), - // valid container ID with invalid line pattern makes an empty string - "cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::InvalidFormat), - }; - - for (&filename, expected_result) in test_files.iter() { - assert_eq!( - get_cgroup_node_path(CGROUP_V1_BASE_CONTROLLER, &test_root_dir.join(filename)), - expected_result.clone().map(PathBuf::from), - "testing file parsing for cgroup node path with file: {filename}" - ); - } - } - - lazy_static! { - static ref IN_REGEX: Regex = Regex::new(r"in-\d+").unwrap(); - static ref CID_REGEX: Regex = - Regex::new(&format!(r"cid-{}", CONTAINER_REGEX.as_str())).unwrap(); - } - - /// The following test can only be run in isolation because of caching behaviour introduced - /// by lazy_static - fn test_entity_id(filename: &str, expected_result: Option<&Regex>) { - let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); - unsafe { - set_cgroup_mount_path( - test_root_dir - .join("cgroup") - .as_path() - .to_str() - .expect("Invalid test directory") - .to_owned(), - ); - } - unsafe { - set_cgroup_file( - test_root_dir - .join(filename) - .as_path() - .to_str() - .expect("Invalid test directory") - .to_owned(), - ); - } - - if let Some(regex) = expected_result { - assert!( - regex.is_match(get_entity_id().unwrap()), - "testing get_entity_id with file {}: {} is not matching the expected regex", - filename, - get_entity_id().unwrap_or("None") - ); - } else { - assert_eq!( - None, - get_entity_id(), - "testing get_entity_id with file {filename}" - ); - } - } - - #[test] - #[ignore] - fn test_entity_id_for_v2() { - test_entity_id("cgroup.v2", Some(&IN_REGEX)) - } - - #[test] - #[ignore] - fn test_entity_id_for_v1() { - test_entity_id("cgroup.linux", Some(&IN_REGEX)) - } - - #[test] - #[ignore] - fn test_entity_id_for_container_id() { - test_entity_id("cgroup.docker", Some(&CID_REGEX)) - } - - #[test] - #[ignore] - fn test_entity_id_for_no_id() { - test_entity_id("cgroup.no_memory", None) - } - } -} diff --git a/ddcommon/src/entity_id/mod.rs b/ddcommon/src/entity_id/mod.rs new file mode 100644 index 000000000..341504e57 --- /dev/null +++ b/ddcommon/src/entity_id/mod.rs @@ -0,0 +1,204 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Extract the entity id and container id +//! +//! The container id can be extracted from `/proc/self/group` +//! +//! The entity id is either: +//! - `cid:` if available +//! - `in:` if container id is not available (e.g. when using cgroupV2) +//! +//! # References +//! - [DataDog/dd-trace-go](https://github.com/DataDog/dd-trace-go/blob/v1/internal/container.go) +//! - [Qard/container-info](https://github.com/Qard/container-info/blob/master/index.js) +//! # Supported environments +//! ## Docker +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 +//! ``` +//! ## Kubernetes +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 11:perf_event:/kubepods/besteffort/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1 +//! ``` +//! +//! Possibly with extra characters before id: +//! ```text +//! 1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope +//! ``` +//! +//! Or a UUID: +//! ```text +//! 1:name=systemd:/kubepods/besteffort/pode9b90526-f47d-11e8-b2a5-080027b9f4fb/15aa6e53-b09a-40c7-8558-c6c31e36c88a +//! ``` +//! ## ECS +//! /proc/self/cgroup should contain lines like: +//! ```text +//! 9:perf_event:/ecs/haissam-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce +//! ``` +//! ## Fargate 1.3-: +//! /proc/self/cgroup should contain lines like: +//! ```test +//! 11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da +//! ``` +//! ## Fargate 1.4+: +//! Here we match a task id with a suffix +//! ```test +//! 1:name=systemd:/ecs/8cd79a803caf4d2aa945152e934a5c00/8cd79a803caf4d2aa945152e934a5c00-1053176469 +//! ``` + +#[cfg(not(unix))] +pub use fallback::{get_container_id, get_entity_id}; + +#[cfg(unix)] +pub use unix::{get_container_id, get_entity_id}; + +/// Fallback module used for non-unix systems +#[cfg(not(unix))] +mod fallback { + pub fn get_container_id() -> Option<&'static str> { + None + } + + pub fn get_entity_id() -> Option<&'static str> { + None + } +} + +/// Unix specific module allowing the use of unix specific functions +#[cfg(unix)] +mod unix { + use lazy_static::lazy_static; + use std::error; + use std::fmt; + use std::path::Path; + + mod cgroup_inode; + mod container_id; + + const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; + const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; + + /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. + const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; + + #[derive(Debug, Clone, PartialEq)] + pub enum CgroupFileParsingError { + ContainerIdNotFound, + CgroupNotFound, + CannotOpenFile, + InvalidFormat, + } + + impl fmt::Display for CgroupFileParsingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), + CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), + CgroupFileParsingError::CannotOpenFile => { + write!(f, "Error while opening cgroup file") + } + CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), + } + } + } + + impl error::Error for CgroupFileParsingError {} + + fn compute_entity_id( + base_controller: &str, + cgroup_path: &Path, + cgroup_mount_path: &Path, + ) -> Option { + container_id::extract_container_id(cgroup_path) + .ok() + .map(|container_id| format!("cid-{container_id}")) + .or( + cgroup_inode::get_cgroup_inode(base_controller, cgroup_path, cgroup_mount_path) + .map(|inode| format!("in-{inode}")), + ) + } + + /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` + pub fn get_container_id() -> Option<&'static str> { + // cache container id in a static to avoid recomputing it at each call + + lazy_static! { + static ref CONTAINER_ID: Option = + container_id::extract_container_id(Path::new(DEFAULT_CGROUP_PATH)).ok(); + } + CONTAINER_ID.as_deref() + } + + /// Returns the `entity_id` if available, either `cid-` or `in-` + pub fn get_entity_id() -> Option<&'static str> { + lazy_static! { + static ref ENTITY_ID: Option = compute_entity_id( + CGROUP_V1_BASE_CONTROLLER, + Path::new(DEFAULT_CGROUP_PATH), + Path::new(DEFAULT_CGROUP_MOUNT_PATH), + ); + } + ENTITY_ID.as_deref() + } + + #[cfg(test)] + mod tests { + use super::*; + use regex::Regex; + + lazy_static! { + static ref IN_REGEX: Regex = Regex::new(r"in-\d+").unwrap(); + static ref CID_REGEX: Regex = + Regex::new(&format!(r"cid-{}", container_id::CONTAINER_REGEX.as_str())).unwrap(); + } + + /// The following test can only be run in isolation because of caching behaviour introduced + /// by lazy_static + fn test_entity_id(filename: &str, expected_result: Option<&Regex>) { + let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let entity_id = compute_entity_id( + CGROUP_V1_BASE_CONTROLLER, + test_root_dir.join(filename).as_path(), + test_root_dir.join("cgroup").as_path(), + ); + + if let Some(regex) = expected_result { + assert!( + regex.is_match(entity_id.as_deref().unwrap()), + "testing get_entity_id with file {}: {} is not matching the expected regex", + filename, + entity_id.as_deref().unwrap_or("None") + ); + } else { + assert_eq!( + None, entity_id, + "testing get_entity_id with file {filename}" + ); + } + } + + #[test] + fn test_entity_id_for_v2() { + test_entity_id("cgroup.v2", Some(&IN_REGEX)) + } + + #[test] + fn test_entity_id_for_v1() { + test_entity_id("cgroup.linux", Some(&IN_REGEX)) + } + + #[test] + fn test_entity_id_for_container_id() { + test_entity_id("cgroup.docker", Some(&CID_REGEX)) + } + + #[test] + fn test_entity_id_for_no_id() { + test_entity_id("cgroup.no_memory", None) + } + } +} diff --git a/ddcommon/src/entity_id/unix/cgroup_inode.rs b/ddcommon/src/entity_id/unix/cgroup_inode.rs new file mode 100644 index 000000000..e23e31359 --- /dev/null +++ b/ddcommon/src/entity_id/unix/cgroup_inode.rs @@ -0,0 +1,143 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! This module provides functions to fetch cgroup node path and fetching it's inode +use super::CgroupFileParsingError; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; +use std::{fs, io}; + +// Those two variables are unused in tests +#[cfg(not(test))] +// From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 +// Currently, host namespace inode number are hardcoded, which can be used to detect +// if we're running in host namespace or not (does not work when running in DinD) +const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; + +#[cfg(not(test))] +const DEFAULT_CGROUP_NS_PATH: &str = "/proc/self/ns/cgroup"; + +/// Returns the inode of file at `path` +fn get_inode(path: &Path) -> io::Result { + let meta = fs::metadata(path)?; + Ok(meta.ino()) +} + +/// Returns the cgroup mount path associated with `base_controller` or the default one for +/// cgroupV2 +fn get_cgroup_node_path( + base_controller: &str, + cgroup_path: &Path, + cgroup_mount_path: &Path, +) -> Result { + let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; + let reader = BufReader::new(file); + + let mut node_path: Option = None; + + for (index, line) in reader.lines().enumerate() { + let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; + let cgroup_entry: Vec<&str> = line_content.split(':').collect(); + if cgroup_entry.len() != 3 { + return Err(CgroupFileParsingError::InvalidFormat); + } + let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); + // Only keep empty controller if it is the first line as cgroupV2 uses only one line + if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { + let matched_operator = if controllers.contains(&base_controller) { + base_controller + } else { + "" + }; + + let mut path = cgroup_mount_path.join(matched_operator); + path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative + node_path = Some(path); + + // if we are using cgroupV1 we can stop looking for the controller + if index != 0 { + break; + } + } + } + node_path.ok_or(CgroupFileParsingError::CgroupNotFound) +} + +#[cfg(not(test))] +/// Checks if the agent is running in the host cgroup namespace. +/// This check is disabled when testing +fn is_host_cgroup_namespace() -> Result<(), ()> { + let cgroup_namespace_inode = get_inode(Path::new(DEFAULT_CGROUP_NS_PATH)).map_err(|_| ())?; + if cgroup_namespace_inode == HOST_CGROUP_NAMESPACE_INODE { + return Err(()); + } + Ok(()) +} + +#[cfg(test)] +/// Mock version used in tests +fn is_host_cgroup_namespace() -> Result<(), ()> { + Ok(()) +} + +/// Returns the `cgroup_inode` if available, otherwise `None` +pub fn get_cgroup_inode( + base_controller: &str, + cgroup_path: &Path, + cgroup_mount_path: &Path, +) -> Option { + // If we're running in the host cgroup namespace, do not get the inode. + // This would indicate that we're not in a container and the inode we'd + // return is not related to a container. + is_host_cgroup_namespace().ok()?; + let cgroup_mount_path = + get_cgroup_node_path(base_controller, cgroup_path, cgroup_mount_path).ok()?; + Some(get_inode(&cgroup_mount_path).ok()?.to_string()) +} + +#[cfg(test)] +mod tests { + use super::super::CGROUP_V1_BASE_CONTROLLER; + use super::*; + use maplit::hashmap; + + #[test] + fn test_cgroup_node_path_parsing() { + let test_root_dir: &Path = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let test_files = hashmap! { + // parsing standard cgroupV2 file + "cgroup.v2" => Ok("/sys/fs/cgroup"), + // parsing cgroupV2 file with custom path + "cgroup.v2_custom_path" => Ok("/sys/fs/cgroup/custom/path"), + // a cgroupv1 container cgroup file returns the memory controller path + "cgroup.docker" => Ok("/sys/fs/cgroup/memory/docker/9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + // a non-container Linux cgroup file returns the memory controller path + "cgroup.linux" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file with an entry using 0 as a hierarchy id should not be detected as V2 + "cgroup.v1_with_id_0" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file using multiple controllers in the same entry returns the correct path + "cgroup.multiple_controllers" => Ok("/sys/fs/cgroup/memory/user.slice/user-0.slice/session-14.scope"), + // a cgroupV1 file missing the memory controller should return an error + "cgroup.no_memory" => Err(CgroupFileParsingError::CgroupNotFound), + // missing cgroup file should return a CannotOpenFile Error + "path/to/cgroup.missing" => Err(CgroupFileParsingError::CannotOpenFile), + // valid container ID with invalid line pattern makes an empty string + "cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::InvalidFormat), + }; + + for (&filename, expected_result) in test_files.iter() { + assert_eq!( + get_cgroup_node_path( + CGROUP_V1_BASE_CONTROLLER, + &test_root_dir.join(filename), + Path::new("/sys/fs/cgroup") + ), + expected_result.clone().map(PathBuf::from), + "testing file parsing for cgroup node path with file: {filename}" + ); + } + } +} diff --git a/ddcommon/src/entity_id/unix/container_id.rs b/ddcommon/src/entity_id/unix/container_id.rs new file mode 100644 index 000000000..374e48ca3 --- /dev/null +++ b/ddcommon/src/entity_id/unix/container_id.rs @@ -0,0 +1,152 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! This module provides functions to parse the container id from the cgroup file +use super::CgroupFileParsingError; +use lazy_static::lazy_static; +use regex::Regex; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +const UUID_SOURCE: &str = + r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; +const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}"; +const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+"; + +lazy_static! { + static ref LINE_REGEX: Regex = Regex::new(r"^\d+:[^:]*:(.+)$").unwrap(); + pub static ref CONTAINER_REGEX: Regex = Regex::new(&format!( + r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$" + )) + .unwrap(); +} + +fn parse_line(line: &str) -> Option<&str> { + // unwrap is OK since if regex matches then the groups must exist + LINE_REGEX + .captures(line) + .and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str())) + .map(|captures| captures.get(1).unwrap().as_str()) +} + +/// Extract container id contained in the cgroup file located at `cgroup_path` +pub fn extract_container_id(cgroup_path: &Path) -> Result { + let file = File::open(cgroup_path).map_err(|_| CgroupFileParsingError::CannotOpenFile)?; + let reader = BufReader::new(file); + + for line in reader.lines() { + if let Some(container_id) = + parse_line(&line.map_err(|_| CgroupFileParsingError::InvalidFormat)?) + { + return Ok(String::from(container_id)); + } + } + + Err(CgroupFileParsingError::ContainerIdNotFound) +} + +#[cfg(test)] +mod tests { + use super::*; + use maplit::hashmap; + + #[test] + fn test_container_id_line_parsing() { + let test_lines = hashmap! { + "" => None, + "other_line" => None, + "10:hugetlb:/kubepods/burstable/podfd52ef25-a87d-11e9-9423-0800271a638e/8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa" + => Some("8c046cb0b72cd4c99f51b5591cd5b095967f58ee003710a45280c28ee1a9c7fa"), + "11:devices:/kubepods.slice/kubepods-pod97f1ae73_7ad9_11ec_b4a7_9a35488b4fab.slice/3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab" + => Some("3291bfddf3f3f8d87cb0cd1245fe9c45b2e1e5a9b6fe3de1bddf041aedaecbab"), + "11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da" + => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), + "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376" + => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), + "1:name=systemd:/uuid/34dc0b5e-626f-2c5c-4c51-70e34b10e765" + => Some("34dc0b5e-626f-2c5c-4c51-70e34b10e765"), + "1:name=systemd:/ecs/34dc0b5e626f2c5c4c5170e34b10e765-1234567890" + => Some("34dc0b5e626f2c5c4c5170e34b10e765-1234567890"), + "1:name=systemd:/docker/34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376.scope" + => Some("34dc0b5e626f2c5c4c5170e34b10e7654ce36f0fcd532739f4445baabea03376"), + // k8s with additional characters before ID + "1:name=systemd:/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d3da189_6407_48e3_9ab6_78188d75e609.slice/docker-7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199.scope" + => Some("7b8952daecf4c0e44bbcefe1b5c5ebc7b4839d4eefeccefe694709d3809b6199"), + // extra spaces + "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860 " + => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), + // one char too short + "13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f86986" + => None, + // invalid hex + "13:name=systemd:/docker/3726184226f5d3147g25fdeab5b60097e378e8a720503a5e19ecfdf29f869860" + => None, + }; + for (line, &expected_result) in test_lines.iter() { + assert_eq!( + parse_line(line), + expected_result, + "testing line parsing for container id with line: {line}" + ); + } + } + + #[test] + fn test_container_id_file_parsing() { + let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let test_files = hashmap! { + // parse a Docker container ID" + "cgroup.docker" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + // parse a Kubernetes container ID + "cgroup.kubernetes" => Some("3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1"), + // parse an ECS container ID + "cgroup.ecs" => Some("38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce"), + // parse a Fargate container ID + "cgroup.fargate" => Some("432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da"), + // parse a Fargate 1.4+ container ID + "cgroup.fargate.1.4" => Some("8cd79a803caf4d2aa945152e934a5c00-1053176469"), + + // Whitespace around the matching ID is permitted so long as it is matched within a valid cgroup line. + // parse a container ID with leading and trailing whitespace + "cgroup.whitespace" => Some("3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860"), + + // a non-container Linux cgroup file makes an empty string + "cgroup.linux" => None, + + // missing cgroup file should return None + "/path/to/cgroup.missing" => None, + + /* To be consistent with other tracers, unrecognized services that match the + * generic container ID regex patterns are considered valid. + */ + //parse unrecognized container ID + "cgroup.unrecognized" => Some("9d5b23edb1ba181e8910389a99906598d69ac9a0ead109ee55730cc416d95f7f"), + + // error edge cases when parsing container ID + "cgroup.edge_cases" => None, + + // an empty cgroup file makes an empty string + "" => None, + + // valid container ID with invalid line pattern makes an empty string + "cgroup.invalid_line_container_id" => None, + + // valid task ID with invalid line pattern makes an empty string + "cgroup.invalid_line_task_id" => None, + + // To be consistent with other tracers we only match lower case hex + // uppercase container IDs return an empty string + "cgroup.upper" => None, + }; + + for (&filename, &expected_result) in test_files.iter() { + assert_eq!( + extract_container_id(&test_root_dir.join(filename)).ok(), + expected_result.map(String::from), + "testing file parsing for container id with file: {filename}" + ); + } + } +} diff --git a/ddcommon/src/lib.rs b/ddcommon/src/lib.rs index 6b0c8fe69..2a2936fd8 100644 --- a/ddcommon/src/lib.rs +++ b/ddcommon/src/lib.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; pub mod azure_app_services; pub mod connector; -pub mod container_id; +pub mod entity_id; #[macro_use] pub mod cstr; pub mod config; @@ -137,12 +137,12 @@ impl Endpoint { } // Add the Container Id header if available - if let Some(container_id) = container_id::get_container_id() { + if let Some(container_id) = entity_id::get_container_id() { builder = builder.header(header::DATADOG_CONTAINER_ID, container_id); } // Add the Entity Id header if available - if let Some(entity_id) = container_id::get_entity_id() { + if let Some(entity_id) = entity_id::get_entity_id() { builder = builder.header(header::DATADOG_ENTITY_ID, entity_id); } diff --git a/ddtelemetry/src/lib.rs b/ddtelemetry/src/lib.rs index acfaa1b56..65ff476f9 100644 --- a/ddtelemetry/src/lib.rs +++ b/ddtelemetry/src/lib.rs @@ -4,7 +4,7 @@ #![allow(clippy::mutex_atomic)] #![allow(clippy::nonminimal_bool)] -use ddcommon::container_id; +use ddcommon::entity_id; pub mod config; pub mod data; @@ -15,7 +15,7 @@ pub mod worker; pub fn build_host() -> data::Host { data::Host { hostname: info::os::real_hostname().unwrap_or_else(|_| String::from("unknown_hostname")), - container_id: container_id::get_container_id().map(|f| f.to_string()), + container_id: entity_id::get_container_id().map(|f| f.to_string()), os: Some(String::from(info::os::os_name())), os_version: info::os::os_version().ok(), kernel_name: None, From a0816122bf622a3b93109cb1f93346ccdf8549a8 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Thu, 30 May 2024 14:35:20 +0200 Subject: [PATCH 10/13] Allow mocking by caller --- ddcommon/src/entity_id/mod.rs | 53 +++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/ddcommon/src/entity_id/mod.rs b/ddcommon/src/entity_id/mod.rs index 341504e57..2338665d0 100644 --- a/ddcommon/src/entity_id/mod.rs +++ b/ddcommon/src/entity_id/mod.rs @@ -50,14 +50,18 @@ //! ``` #[cfg(not(unix))] -pub use fallback::{get_container_id, get_entity_id}; +pub use fallback::{get_container_id, get_entity_id, set_cgroup_file, set_cgroup_mount_path}; #[cfg(unix)] -pub use unix::{get_container_id, get_entity_id}; +pub use unix::{get_container_id, get_entity_id, set_cgroup_file, set_cgroup_mount_path}; /// Fallback module used for non-unix systems #[cfg(not(unix))] mod fallback { + pub fn set_cgroup_file() {} + + pub fn set_cgroup_mount_path() {} + pub fn get_container_id() -> Option<&'static str> { None } @@ -84,6 +88,11 @@ mod unix { /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; + /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values + static mut TESTING_CGROUP_PATH: Option = None; + /// stores overridable cgroup mount path + static mut TESTING_CGROUP_MOUNT_PATH: Option = None; + #[derive(Debug, Clone, PartialEq)] pub enum CgroupFileParsingError { ContainerIdNotFound, @@ -121,24 +130,56 @@ mod unix { ) } + fn get_cgroup_path() -> &'static str { + // Safety: we assume set_cgroup_file is not called when it shouldn't + unsafe { + TESTING_CGROUP_PATH + .as_deref() + .unwrap_or(DEFAULT_CGROUP_PATH) + } + } + + fn get_cgroup_mount_path() -> &'static str { + // Safety: we assume set_cgroup_file is not called when it shouldn't + unsafe { + TESTING_CGROUP_MOUNT_PATH + .as_deref() + .unwrap_or(DEFAULT_CGROUP_MOUNT_PATH) + } + } + + /// Set the path to cgroup file to mock it during tests + /// # Safety + /// Must not be called in multi-threaded contexts + pub unsafe fn set_cgroup_file(file: String) { + TESTING_CGROUP_PATH = Some(file) + } + + /// Set cgroup mount path to mock during tests + /// # Safety + /// Must not be called in multi-threaded contexts + pub unsafe fn set_cgroup_mount_path(path: String) { + TESTING_CGROUP_MOUNT_PATH = Some(path) + } + /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` pub fn get_container_id() -> Option<&'static str> { // cache container id in a static to avoid recomputing it at each call - lazy_static! { static ref CONTAINER_ID: Option = - container_id::extract_container_id(Path::new(DEFAULT_CGROUP_PATH)).ok(); + container_id::extract_container_id(Path::new(get_cgroup_path())).ok(); } CONTAINER_ID.as_deref() } /// Returns the `entity_id` if available, either `cid-` or `in-` pub fn get_entity_id() -> Option<&'static str> { + // cache entity id in a static to avoid recomputing it at each call lazy_static! { static ref ENTITY_ID: Option = compute_entity_id( CGROUP_V1_BASE_CONTROLLER, - Path::new(DEFAULT_CGROUP_PATH), - Path::new(DEFAULT_CGROUP_MOUNT_PATH), + Path::new(get_cgroup_path()), + Path::new(get_cgroup_mount_path()), ); } ENTITY_ID.as_deref() From 7b5d5045121d174c0957f3afa676f255790e2c7a Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 31 May 2024 14:22:53 +0200 Subject: [PATCH 11/13] Refactor unix and fallback module --- ddcommon/src/entity_id/fallback.rs | 14 +++ ddcommon/src/entity_id/mod.rs | 187 +---------------------------- ddcommon/src/entity_id/unix/mod.rs | 171 ++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 184 deletions(-) create mode 100644 ddcommon/src/entity_id/fallback.rs create mode 100644 ddcommon/src/entity_id/unix/mod.rs diff --git a/ddcommon/src/entity_id/fallback.rs b/ddcommon/src/entity_id/fallback.rs new file mode 100644 index 000000000..bfa38258c --- /dev/null +++ b/ddcommon/src/entity_id/fallback.rs @@ -0,0 +1,14 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +pub fn set_cgroup_file() {} + +pub fn set_cgroup_mount_path() {} + +pub fn get_container_id() -> Option<&'static str> { + None +} + +pub fn get_entity_id() -> Option<&'static str> { + None +} diff --git a/ddcommon/src/entity_id/mod.rs b/ddcommon/src/entity_id/mod.rs index 2338665d0..e108765ee 100644 --- a/ddcommon/src/entity_id/mod.rs +++ b/ddcommon/src/entity_id/mod.rs @@ -5,7 +5,7 @@ //! //! The container id can be extracted from `/proc/self/group` //! -//! The entity id is either: +//! The entity id is one of: //! - `cid:` if available //! - `in:` if container id is not available (e.g. when using cgroupV2) //! @@ -57,189 +57,8 @@ pub use unix::{get_container_id, get_entity_id, set_cgroup_file, set_cgroup_moun /// Fallback module used for non-unix systems #[cfg(not(unix))] -mod fallback { - pub fn set_cgroup_file() {} - - pub fn set_cgroup_mount_path() {} - - pub fn get_container_id() -> Option<&'static str> { - None - } - - pub fn get_entity_id() -> Option<&'static str> { - None - } -} +mod fallback; /// Unix specific module allowing the use of unix specific functions #[cfg(unix)] -mod unix { - use lazy_static::lazy_static; - use std::error; - use std::fmt; - use std::path::Path; - - mod cgroup_inode; - mod container_id; - - const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; - const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; - - /// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. - const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; - - /// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values - static mut TESTING_CGROUP_PATH: Option = None; - /// stores overridable cgroup mount path - static mut TESTING_CGROUP_MOUNT_PATH: Option = None; - - #[derive(Debug, Clone, PartialEq)] - pub enum CgroupFileParsingError { - ContainerIdNotFound, - CgroupNotFound, - CannotOpenFile, - InvalidFormat, - } - - impl fmt::Display for CgroupFileParsingError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), - CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), - CgroupFileParsingError::CannotOpenFile => { - write!(f, "Error while opening cgroup file") - } - CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), - } - } - } - - impl error::Error for CgroupFileParsingError {} - - fn compute_entity_id( - base_controller: &str, - cgroup_path: &Path, - cgroup_mount_path: &Path, - ) -> Option { - container_id::extract_container_id(cgroup_path) - .ok() - .map(|container_id| format!("cid-{container_id}")) - .or( - cgroup_inode::get_cgroup_inode(base_controller, cgroup_path, cgroup_mount_path) - .map(|inode| format!("in-{inode}")), - ) - } - - fn get_cgroup_path() -> &'static str { - // Safety: we assume set_cgroup_file is not called when it shouldn't - unsafe { - TESTING_CGROUP_PATH - .as_deref() - .unwrap_or(DEFAULT_CGROUP_PATH) - } - } - - fn get_cgroup_mount_path() -> &'static str { - // Safety: we assume set_cgroup_file is not called when it shouldn't - unsafe { - TESTING_CGROUP_MOUNT_PATH - .as_deref() - .unwrap_or(DEFAULT_CGROUP_MOUNT_PATH) - } - } - - /// Set the path to cgroup file to mock it during tests - /// # Safety - /// Must not be called in multi-threaded contexts - pub unsafe fn set_cgroup_file(file: String) { - TESTING_CGROUP_PATH = Some(file) - } - - /// Set cgroup mount path to mock during tests - /// # Safety - /// Must not be called in multi-threaded contexts - pub unsafe fn set_cgroup_mount_path(path: String) { - TESTING_CGROUP_MOUNT_PATH = Some(path) - } - - /// Returns the `container_id` if available in the cgroup file, otherwise returns `None` - pub fn get_container_id() -> Option<&'static str> { - // cache container id in a static to avoid recomputing it at each call - lazy_static! { - static ref CONTAINER_ID: Option = - container_id::extract_container_id(Path::new(get_cgroup_path())).ok(); - } - CONTAINER_ID.as_deref() - } - - /// Returns the `entity_id` if available, either `cid-` or `in-` - pub fn get_entity_id() -> Option<&'static str> { - // cache entity id in a static to avoid recomputing it at each call - lazy_static! { - static ref ENTITY_ID: Option = compute_entity_id( - CGROUP_V1_BASE_CONTROLLER, - Path::new(get_cgroup_path()), - Path::new(get_cgroup_mount_path()), - ); - } - ENTITY_ID.as_deref() - } - - #[cfg(test)] - mod tests { - use super::*; - use regex::Regex; - - lazy_static! { - static ref IN_REGEX: Regex = Regex::new(r"in-\d+").unwrap(); - static ref CID_REGEX: Regex = - Regex::new(&format!(r"cid-{}", container_id::CONTAINER_REGEX.as_str())).unwrap(); - } - - /// The following test can only be run in isolation because of caching behaviour introduced - /// by lazy_static - fn test_entity_id(filename: &str, expected_result: Option<&Regex>) { - let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); - - let entity_id = compute_entity_id( - CGROUP_V1_BASE_CONTROLLER, - test_root_dir.join(filename).as_path(), - test_root_dir.join("cgroup").as_path(), - ); - - if let Some(regex) = expected_result { - assert!( - regex.is_match(entity_id.as_deref().unwrap()), - "testing get_entity_id with file {}: {} is not matching the expected regex", - filename, - entity_id.as_deref().unwrap_or("None") - ); - } else { - assert_eq!( - None, entity_id, - "testing get_entity_id with file {filename}" - ); - } - } - - #[test] - fn test_entity_id_for_v2() { - test_entity_id("cgroup.v2", Some(&IN_REGEX)) - } - - #[test] - fn test_entity_id_for_v1() { - test_entity_id("cgroup.linux", Some(&IN_REGEX)) - } - - #[test] - fn test_entity_id_for_container_id() { - test_entity_id("cgroup.docker", Some(&CID_REGEX)) - } - - #[test] - fn test_entity_id_for_no_id() { - test_entity_id("cgroup.no_memory", None) - } - } -} +mod unix; diff --git a/ddcommon/src/entity_id/unix/mod.rs b/ddcommon/src/entity_id/unix/mod.rs new file mode 100644 index 000000000..682ee8839 --- /dev/null +++ b/ddcommon/src/entity_id/unix/mod.rs @@ -0,0 +1,171 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use lazy_static::lazy_static; +use std::error; +use std::fmt; +use std::path::Path; + +mod cgroup_inode; +mod container_id; + +const DEFAULT_CGROUP_PATH: &str = "/proc/self/cgroup"; +const DEFAULT_CGROUP_MOUNT_PATH: &str = "/sys/fs/cgroup"; + +/// the base controller used to identify the cgroup v1 mount point in the cgroupMounts map. +const CGROUP_V1_BASE_CONTROLLER: &str = "memory"; + +/// stores overridable cgroup path - used in end-to-end testing to "stub" cgroup values +static mut TESTING_CGROUP_PATH: Option = None; +/// stores overridable cgroup mount path +static mut TESTING_CGROUP_MOUNT_PATH: Option = None; + +#[derive(Debug, Clone, PartialEq)] +pub enum CgroupFileParsingError { + ContainerIdNotFound, + CgroupNotFound, + CannotOpenFile, + InvalidFormat, +} + +impl fmt::Display for CgroupFileParsingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CgroupFileParsingError::ContainerIdNotFound => write!(f, "Container id not found"), + CgroupFileParsingError::CgroupNotFound => write!(f, "Cgroup not found"), + CgroupFileParsingError::CannotOpenFile => { + write!(f, "Error while opening cgroup file") + } + CgroupFileParsingError::InvalidFormat => write!(f, "Invalid format in cgroup file"), + } + } +} + +impl error::Error for CgroupFileParsingError {} + +fn compute_entity_id( + base_controller: &str, + cgroup_path: &Path, + cgroup_mount_path: &Path, +) -> Option { + container_id::extract_container_id(cgroup_path) + .ok() + .map(|container_id| format!("cid-{container_id}")) + .or( + cgroup_inode::get_cgroup_inode(base_controller, cgroup_path, cgroup_mount_path) + .map(|inode| format!("in-{inode}")), + ) +} + +fn get_cgroup_path() -> &'static str { + // Safety: we assume set_cgroup_file is not called when it shouldn't + unsafe { + TESTING_CGROUP_PATH + .as_deref() + .unwrap_or(DEFAULT_CGROUP_PATH) + } +} + +fn get_cgroup_mount_path() -> &'static str { + // Safety: we assume set_cgroup_file is not called when it shouldn't + unsafe { + TESTING_CGROUP_MOUNT_PATH + .as_deref() + .unwrap_or(DEFAULT_CGROUP_MOUNT_PATH) + } +} + +/// Set the path to cgroup file to mock it during tests +/// # Safety +/// Must not be called in multi-threaded contexts +pub unsafe fn set_cgroup_file(file: String) { + TESTING_CGROUP_PATH = Some(file) +} + +/// Set cgroup mount path to mock during tests +/// # Safety +/// Must not be called in multi-threaded contexts +pub unsafe fn set_cgroup_mount_path(path: String) { + TESTING_CGROUP_MOUNT_PATH = Some(path) +} + +/// Returns the `container_id` if available in the cgroup file, otherwise returns `None` +pub fn get_container_id() -> Option<&'static str> { + // cache container id in a static to avoid recomputing it at each call + lazy_static! { + static ref CONTAINER_ID: Option = + container_id::extract_container_id(Path::new(get_cgroup_path())).ok(); + } + CONTAINER_ID.as_deref() +} + +/// Returns the `entity_id` if available, either `cid-` or `in-` +pub fn get_entity_id() -> Option<&'static str> { + // cache entity id in a static to avoid recomputing it at each call + lazy_static! { + static ref ENTITY_ID: Option = compute_entity_id( + CGROUP_V1_BASE_CONTROLLER, + Path::new(get_cgroup_path()), + Path::new(get_cgroup_mount_path()), + ); + } + ENTITY_ID.as_deref() +} + +#[cfg(test)] +mod tests { + use super::*; + use regex::Regex; + + lazy_static! { + static ref IN_REGEX: Regex = Regex::new(r"in-\d+").unwrap(); + static ref CID_REGEX: Regex = + Regex::new(&format!(r"cid-{}", container_id::CONTAINER_REGEX.as_str())).unwrap(); + } + + /// The following test can only be run in isolation because of caching behaviour introduced + /// by lazy_static + fn test_entity_id(filename: &str, expected_result: Option<&Regex>) { + let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests")); + + let entity_id = compute_entity_id( + CGROUP_V1_BASE_CONTROLLER, + test_root_dir.join(filename).as_path(), + test_root_dir.join("cgroup").as_path(), + ); + + if let Some(regex) = expected_result { + assert!( + regex.is_match(entity_id.as_deref().unwrap()), + "testing get_entity_id with file {}: {} is not matching the expected regex", + filename, + entity_id.as_deref().unwrap_or("None") + ); + } else { + assert_eq!( + None, entity_id, + "testing get_entity_id with file {filename}" + ); + } + } + + #[test] + fn test_entity_id_for_v2() { + test_entity_id("cgroup.v2", Some(&IN_REGEX)) + } + + #[test] + fn test_entity_id_for_v1() { + test_entity_id("cgroup.linux", Some(&IN_REGEX)) + } + + #[test] + fn test_entity_id_for_container_id() { + test_entity_id("cgroup.docker", Some(&CID_REGEX)) + } + + #[test] + fn test_entity_id_for_no_id() { + test_entity_id("cgroup.no_memory", None) + } +} From 05c421e13d552d0dd36b727f40e1330372373680 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Fri, 31 May 2024 14:47:18 +0200 Subject: [PATCH 12/13] Remove fail on invalid cgroup entry --- ddcommon/src/entity_id/unix/cgroup_inode.rs | 22 +++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/ddcommon/src/entity_id/unix/cgroup_inode.rs b/ddcommon/src/entity_id/unix/cgroup_inode.rs index e23e31359..2fe011598 100644 --- a/ddcommon/src/entity_id/unix/cgroup_inode.rs +++ b/ddcommon/src/entity_id/unix/cgroup_inode.rs @@ -13,7 +13,7 @@ use std::{fs, io}; #[cfg(not(test))] // From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 // Currently, host namespace inode number are hardcoded, which can be used to detect -// if we're running in host namespace or not (does not work when running in DinD) +// if we're running in host namespace or not (does not work when running in Docker in Docker) const HOST_CGROUP_NAMESPACE_INODE: u64 = 0xEFFFFFFB; #[cfg(not(test))] @@ -25,10 +25,10 @@ fn get_inode(path: &Path) -> io::Result { Ok(meta.ino()) } -/// Returns the cgroup mount path associated with `base_controller` or the default one for +/// Returns the cgroup mount path associated with `cgroup_v1_base_controller` or the default one for /// cgroupV2 fn get_cgroup_node_path( - base_controller: &str, + cgroup_v1_base_controller: &str, cgroup_path: &Path, cgroup_mount_path: &Path, ) -> Result { @@ -41,13 +41,15 @@ fn get_cgroup_node_path( let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; let cgroup_entry: Vec<&str> = line_content.split(':').collect(); if cgroup_entry.len() != 3 { - return Err(CgroupFileParsingError::InvalidFormat); + continue; } let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); // Only keep empty controller if it is the first line as cgroupV2 uses only one line - if controllers.contains(&base_controller) || (controllers.contains(&"") && index == 0) { - let matched_operator = if controllers.contains(&base_controller) { - base_controller + if controllers.contains(&cgroup_v1_base_controller) + || (controllers.contains(&"") && index == 0) + { + let matched_operator = if controllers.contains(&cgroup_v1_base_controller) { + cgroup_v1_base_controller } else { "" }; @@ -84,7 +86,7 @@ fn is_host_cgroup_namespace() -> Result<(), ()> { /// Returns the `cgroup_inode` if available, otherwise `None` pub fn get_cgroup_inode( - base_controller: &str, + cgroup_v1_base_controller: &str, cgroup_path: &Path, cgroup_mount_path: &Path, ) -> Option { @@ -93,7 +95,7 @@ pub fn get_cgroup_inode( // return is not related to a container. is_host_cgroup_namespace().ok()?; let cgroup_mount_path = - get_cgroup_node_path(base_controller, cgroup_path, cgroup_mount_path).ok()?; + get_cgroup_node_path(cgroup_v1_base_controller, cgroup_path, cgroup_mount_path).ok()?; Some(get_inode(&cgroup_mount_path).ok()?.to_string()) } @@ -125,7 +127,7 @@ mod tests { // missing cgroup file should return a CannotOpenFile Error "path/to/cgroup.missing" => Err(CgroupFileParsingError::CannotOpenFile), // valid container ID with invalid line pattern makes an empty string - "cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::InvalidFormat), + "cgroup.invalid_line_container_id" => Err(CgroupFileParsingError::CgroupNotFound), }; for (&filename, expected_result) in test_files.iter() { From 0868c5b85cb720df016322718b34a6aa35d6ef94 Mon Sep 17 00:00:00 2001 From: Vianney Ruhlmann Date: Mon, 3 Jun 2024 11:32:29 +0200 Subject: [PATCH 13/13] Use empty controller as default for cgroupV1 --- ddcommon/src/entity_id/unix/cgroup_inode.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ddcommon/src/entity_id/unix/cgroup_inode.rs b/ddcommon/src/entity_id/unix/cgroup_inode.rs index 2fe011598..0e075b8b8 100644 --- a/ddcommon/src/entity_id/unix/cgroup_inode.rs +++ b/ddcommon/src/entity_id/unix/cgroup_inode.rs @@ -37,7 +37,7 @@ fn get_cgroup_node_path( let mut node_path: Option = None; - for (index, line) in reader.lines().enumerate() { + for line in reader.lines() { let line_content = &line.map_err(|_| CgroupFileParsingError::InvalidFormat)?; let cgroup_entry: Vec<&str> = line_content.split(':').collect(); if cgroup_entry.len() != 3 { @@ -45,9 +45,7 @@ fn get_cgroup_node_path( } let controllers: Vec<&str> = cgroup_entry[1].split(',').collect(); // Only keep empty controller if it is the first line as cgroupV2 uses only one line - if controllers.contains(&cgroup_v1_base_controller) - || (controllers.contains(&"") && index == 0) - { + if controllers.contains(&cgroup_v1_base_controller) || controllers.contains(&"") { let matched_operator = if controllers.contains(&cgroup_v1_base_controller) { cgroup_v1_base_controller } else { @@ -58,8 +56,9 @@ fn get_cgroup_node_path( path.push(cgroup_entry[2].strip_prefix('/').unwrap_or(cgroup_entry[2])); // Remove first / as the path is relative node_path = Some(path); - // if we are using cgroupV1 we can stop looking for the controller - if index != 0 { + // if we matched the V1 base controller we can return otherwise we continue until we + // find it or default to the empty controller name + if matched_operator == cgroup_v1_base_controller { break; } }