From 51d99404a18bfd248bc61b873417dc60be09b4f3 Mon Sep 17 00:00:00 2001 From: desbma-s1n Date: Wed, 22 May 2024 18:35:44 +0200 Subject: [PATCH] feat: add PEG based Pest parser feat: initial pest grammar & code refactor: rename SyscallArg to Expression refactor: IntegerExpression & BufferExpression types feat: buffers feat: macros feat: array fix: update/fix legacy regex parser refactor: rename expressions in grammar feat: simplify grammar, grab more metadata feat: multiplication feat: improve error handling feat: truncated feat: unfinished/resumed syscalls fix: update/fix regex parser feat: more flexible log level feat: more tests & fixes feat: in/out arguments feat: improve comment handling feat: named arguments refactor: move regex parser specific bench feat: grab more metadata feat: bit shift feat: macro dest addr --- Cargo.lock | 150 ++++ Cargo.toml | 11 +- src/main.rs | 8 +- src/strace/mod.rs | 63 +- src/strace/parser.rs | 1746 ------------------------------------ src/strace/parser/mod.rs | 1714 +++++++++++++++++++++++++++++++++++ src/strace/parser/peg.pest | 143 +++ src/strace/parser/peg.rs | 406 +++++++++ src/strace/parser/regex.rs | 477 ++++++++++ src/summarize.rs | 127 +-- 10 files changed, 3009 insertions(+), 1836 deletions(-) delete mode 100644 src/strace/parser.rs create mode 100644 src/strace/parser/mod.rs create mode 100644 src/strace/parser/peg.pest create mode 100644 src/strace/parser/peg.rs create mode 100644 src/strace/parser/regex.rs diff --git a/Cargo.lock b/Cargo.lock index 7e9bde1..5c04150 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -135,6 +135,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.9.1" @@ -214,6 +223,25 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "diff" version = "0.1.13" @@ -226,6 +254,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -254,6 +292,16 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -351,6 +399,57 @@ dependencies = [ "memchr", ] +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pest" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -516,6 +615,17 @@ dependencies = [ "syn", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shh" version = "2024.4.5" @@ -529,6 +639,8 @@ dependencies = [ "lazy_static", "log", "nix", + "pest", + "pest_derive", "predicates", "pretty_assertions", "rand", @@ -627,6 +739,38 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -639,6 +783,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 2902892..3d72de0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,8 @@ itertools = { version = "0.11.0", default-features = false, features = ["use_std lazy_static = { version = "1.4.0", default-features = false } log = { version = "0.4.19", default-features = false, features = ["max_level_trace", "release_max_level_info"] } nix = { version = "0.26.2", default-features = false, features = ["fs"] } +pest = { version = "2.7.10", default-features = false, features = ["std", "memchr"], optional = true } +pest_derive = { version = "2.7.10", default-features = false, features = ["std", "grammar-extras"], optional = true} rand = { version = "0.8.5", default-features = false, features = ["std", "std_rng"] } regex = { version = "1.9.1", default-features = false, features = ["std", "perf"] } serde = { version = "1.0.193", default-features = false, features = ["std", "derive"] } @@ -35,10 +37,11 @@ predicates = { version = "3.0.3", default-features = false, features = ["color"] pretty_assertions = { version = "1.4.0", default-features = false, features = ["std"] } [features] -# for benchmarks only -nightly = [] -# for tests only -as-root = [] +default = ["parser-peg"] +as-root = [] # for tests only +nightly = [] # for benchmarks only +parser-peg = ["dep:pest", "dep:pest_derive"] +parser-regex = [] [lints.rust] missing_docs = "warn" diff --git a/src/main.rs b/src/main.rs index b4739b7..cfc60bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,8 +35,14 @@ fn sd_options( fn main() -> anyhow::Result<()> { // Init logger simple_logger::SimpleLogger::new() + .with_level(if cfg!(debug_assertions) { + log::LevelFilter::Debug + } else { + log::LevelFilter::Info + }) + .env() .init() - .context("Failed to init logger")?; + .context("Failed to setup logger")?; // Get versions let sd_version = systemd::SystemdVersion::local_system()?; diff --git a/src/strace/mod.rs b/src/strace/mod.rs index 2f774a3..3319626 100644 --- a/src/strace/mod.rs +++ b/src/strace/mod.rs @@ -16,7 +16,7 @@ pub struct Syscall { pub pid: u32, pub rel_ts: f64, pub name: String, - pub args: Vec, + pub args: Vec, pub ret_val: SyscallRetVal, } @@ -27,59 +27,70 @@ pub enum BufferType { } #[derive(Debug, Clone, PartialEq)] -pub enum SyscallArg { - Buffer { - value: Vec, - type_: BufferType, - }, - Integer { - value: IntegerExpression, - metadata: Option>, +pub struct IntegerExpression { + pub value: IntegerExpressionValue, + pub metadata: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BufferExpression { + pub value: Vec, + pub type_: BufferType, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Expression { + Buffer(BufferExpression), + Integer(IntegerExpression), + Struct(HashMap), + // The strace syntax can be ambiguous between array and set (ie sigset_t in sigprocmask), + // so store both in this, and let the summary interpret + Collection { + complement: bool, + values: Vec, }, - Struct(HashMap), - Array(Vec), Macro { name: String, - args: Vec, + args: Vec, }, + // Only used for strace pseudo macro invocations, see `test_macro_addr_arg` for an example + DestinationAddress(String), } -impl SyscallArg { +impl Expression { pub fn metadata(&self) -> Option<&[u8]> { match self { - Self::Integer { metadata, .. } => metadata.as_deref(), + Self::Integer(IntegerExpression { metadata, .. }) => metadata.as_deref(), _ => None, } } } #[derive(Debug, Clone, PartialEq)] -pub enum IntegerExpression { - BinaryNot(Box), - BinaryOr(Vec), - Multiplication(Vec), +pub enum IntegerExpressionValue { + BinaryOr(Vec), + Multiplication(Vec), LeftBitShift { - bits: Box, - shift: Box, + bits: Box, + shift: Box, }, NamedConst(String), Literal(i128), // allows holding both signed and unsigned 64 bit integers } -impl IntegerExpression { +impl IntegerExpressionValue { pub fn is_flag_set(&self, flag: &str) -> bool { match self { - IntegerExpression::NamedConst(v) => flag == v, - IntegerExpression::BinaryOr(ces) => ces.iter().any(|ce| ce.is_flag_set(flag)), - IntegerExpression::BinaryNot(ce) => !ce.is_flag_set(flag), + IntegerExpressionValue::NamedConst(v) => flag == v, + IntegerExpressionValue::BinaryOr(ces) => ces.iter().any(|ce| ce.is_flag_set(flag)), _ => false, // if it was a flag field, strace would have decoded it with named consts } } pub fn flags(&self) -> Vec { match self { - IntegerExpression::NamedConst(v) => vec![v.clone()], - IntegerExpression::BinaryOr(vs) => vs.iter().flat_map(|v| v.flags()).collect(), + IntegerExpressionValue::NamedConst(v) => vec![v.clone()], + IntegerExpressionValue::BinaryOr(vs) => vs.iter().flat_map(|v| v.flags()).collect(), _ => vec![], } } diff --git a/src/strace/parser.rs b/src/strace/parser.rs deleted file mode 100644 index 935bb7d..0000000 --- a/src/strace/parser.rs +++ /dev/null @@ -1,1746 +0,0 @@ -//! Strace output parser - -use std::collections::HashMap; -use std::io::BufRead; -use std::str; - -use lazy_static::lazy_static; - -use crate::strace::{BufferType, IntegerExpression, Syscall, SyscallArg, SyscallRetVal}; - -pub struct LogParser { - reader: Box, - buf: String, - unfinished_syscalls: Vec, -} - -impl LogParser { - pub fn new(reader: Box) -> anyhow::Result { - Ok(Self { - reader, - buf: String::new(), - unfinished_syscalls: Vec::new(), - }) - } -} - -#[derive(Debug, PartialEq)] -enum ParseResult { - /// This line was ignored - /// (strace sometimes outputs complete garbage like '1008333 0.000045 ???( ') - IgnoredLine, - /// This line describes an unfinished syscall - UnfinishedSyscall(Syscall), - /// This line describes a previously unfinished syscall that is now finished - FinishedSyscall { - sc: Syscall, - unfinished_index: usize, - }, - /// This line describes a complete syscall - Syscall(Syscall), -} - -// See also: -// - https://github.com/rbtcollins/strace-parse.rs/blob/master/src/lib.rs for a nom based parsing approach -// - https://github.com/wookietreiber/strace-analyzer/blob/master/src/analysis.rs for a "1 regex per syscall" approach - -lazy_static! { - static ref LINE_REGEX: regex::Regex = regex::RegexBuilder::new( - r" -^ -(?[0-9]+)\ + -(?[0-9]+\.[0-9]+)\ + -( - ( - (?[a-z0-9_]+) - \( - (?.+)? - ) - | - ( - <\.{3}\ - (?[a-z0-9_]+) - \ resumed>\ - ) -) -( - ( - - \) - \ +=\ - ( - ( - 0x - (?[a-f0-9]+) - ) - | - ( - (?[-0-9]+) - ( - < - (?[^>]+) - > - ( - # (deleted) - \( - [^\)]+ - \) - )? - )? - ) - ) - ( - (\ E[A-Z]+\ \(.*\)) # errno - | - (\ \(.*\)) # interpretation like 'Timeout' - )? - ) - | - (?\ ) -) -$ -" - ) - .ignore_whitespace(true) - .build() - .unwrap(); - static ref ARG_REGEX: regex::Regex = regex::RegexBuilder::new( - r#" -( - ( - (? - [a-zA-Z0-9_]+ - \( - [^\)]+ - \) - ) - ) - | - ( - (? - [0-9x]+ - ( - \* - [0-9x]+ - )+ - ) - ) - | - ( - (?[-0-9]+) - ( - < - (?[^>]+) - > - ( - # (deleted) - \( - [^\)]+ - \) - )? - )? - (\ \/\*\ [A-Za-z0-9_\-\ \+\.\:\?]+\ \*\/)? - ) - | - ( - 0x - (?[a-f0-9]+) - (\ \/\*\ [A-Za-z0-9_\-\ \+\.\:\?]+ \*\/)? - ) - | - ( - \[ - (?[^\]]+) - \] - ) - | - ( - (?[A-Z_|~\[\]\ 0-9<]+) - ( - < - (?[^>]+) - > - )? - ) - | - ( - \{ - (? - ( - [a-z0-9_]+= - ( - ([^\{]+) - | - (\{[^\{]*\}) - ) - ,\ - )* - ( - ( - [a-z0-9_]+= - ( - ([^\{]+) - | - (\{[^\{]*\}) - ) - ) - | - \.{3} - )? - ) - \} - ) - | - ( - (?@)? - " - (?[^"]*) - " - ) -) -( - (,\ ) - | - [\}\]] - | - $ -) -"# - ) - .ignore_whitespace(true) - .build() - .unwrap(); - static ref BYTE_REGEX: regex::bytes::Regex = - regex::bytes::Regex::new(r"\\x[0-9a-f]{2}").unwrap(); -} - -fn parse_buffer(s: &str) -> anyhow::Result> { - // Parse and replace '\x12' escaped bytes - let buf = BYTE_REGEX - .replace_all(s.as_bytes(), |cap: ®ex::bytes::Captures| { - let byte_match = cap.get(0).unwrap().as_bytes(); - let byte = u8::from_str_radix(str::from_utf8(&byte_match[2..]).unwrap(), 16).unwrap(); - vec![byte] - }) - .into_owned(); - Ok(buf) -} - -fn parse_argument(caps: ®ex::Captures) -> anyhow::Result { - if let Some(int) = caps.name("int") { - let metadata = caps - .name("int_metadata") - .map(|m| parse_buffer(m.as_str())) - .map_or(Ok(None), |v| v.map(Some))?; - Ok(SyscallArg::Integer { - value: IntegerExpression::Literal(int.as_str().parse()?), - metadata, - }) - } else if let Some(hex) = caps.name("int_hex") { - Ok(SyscallArg::Integer { - value: IntegerExpression::Literal(i128::from_str_radix(hex.as_str(), 16)?), - metadata: None, - }) - } else if let Some(const_) = caps.name("const_expr") { - // If you read this and are scared by the incomplete expression grammar parsing, lack of generic recursion, etc.: - // don't be, what strace outputs is actually limited to a few simple cases (or'ed flags, const, mask...) - let const_str = const_.as_str(); - if const_str.starts_with('~') { - assert!(!const_str.contains('|')); - assert_eq!(const_str.chars().nth(1), Some('[')); - assert_eq!(const_str.chars().last(), Some(']')); - let name = const_str[2..const_str.len() - 1] - .rsplit(' ') - .next() - .unwrap() - .to_owned(); - Ok(SyscallArg::Integer { - value: IntegerExpression::BinaryNot(Box::new(IntegerExpression::NamedConst(name))), - metadata: None, - }) - } else { - let tokens = const_str.split('|').collect::>(); - if tokens.len() == 1 { - let metadata = caps - .name("const_expr_metadata") - .map(|m| parse_buffer(m.as_str())) - .map_or(Ok(None), |v| v.map(Some))?; - Ok(SyscallArg::Integer { - value: IntegerExpression::NamedConst(tokens[0].to_owned()), - metadata, - }) - } else { - let int_tokens = tokens - .into_iter() - .map(|t| { - if let Some(one_shift) = t.strip_prefix("1<<") { - IntegerExpression::LeftBitShift { - bits: Box::new(IntegerExpression::Literal(1)), - shift: Box::new(IntegerExpression::NamedConst( - one_shift.to_owned(), - )), - } - } else { - IntegerExpression::NamedConst(t.to_owned()) - } - }) - .collect(); - Ok(SyscallArg::Integer { - value: IntegerExpression::BinaryOr(int_tokens), - metadata: None, - }) - } - } - } else if let Some(struct_) = caps.name("struct") { - let mut members = HashMap::new(); - let mut struct_ = struct_.as_str().to_owned(); - while !struct_.is_empty() { - // dbg!(&struct_); - if struct_ == "..." { - // This should not append with our strace options, but still does, strace bug? - log::warn!("Truncated structure in strace output"); - break; - } - let (k, v) = struct_ - .split_once('=') - .ok_or_else(|| anyhow::anyhow!("Unable to extract struct member name"))?; - // dbg!(&k); - // dbg!(&v); - let caps = ARG_REGEX - .captures(v) - .ok_or_else(|| anyhow::anyhow!("Unable to parse struct member value"))?; - let v = parse_argument(&caps)?; - // dbg!(&v); - members.insert(k.to_owned(), v); - #[allow(clippy::assigning_clones)] - { - struct_ = - struct_[k.len() + 1 + caps.get(0).unwrap().len()..struct_.len()].to_owned(); - } - } - Ok(SyscallArg::Struct(members)) - } else if let Some(array) = caps.name("array") { - let members = ARG_REGEX - .captures_iter(array.as_str()) - .map(|a| parse_argument(&a)) - .collect::>()?; - Ok(SyscallArg::Array(members)) - } else if let Some(buf) = caps.name("buf") { - let buf = parse_buffer(buf.as_str())?; - let type_ = if caps.name("buf_abstract_path").is_some() { - BufferType::AbstractPath - } else { - BufferType::Unknown - }; - Ok(SyscallArg::Buffer { value: buf, type_ }) - } else if let Some(macro_) = caps.name("macro") { - let (name, args) = macro_.as_str().split_once('(').unwrap(); - let args = args[..args.len() - 1].to_owned(); - let args = ARG_REGEX - .captures_iter(&args) - .map(|a| parse_argument(&a)) - .collect::>()?; - Ok(SyscallArg::Macro { - name: name.to_owned(), - args, - }) - } else if let Some(multiplication) = caps.name("multiplication") { - let args = multiplication - .as_str() - .split('*') - .map(|a| -> anyhow::Result { - let arg = ARG_REGEX - .captures(a) - .ok_or_else(|| anyhow::anyhow!("Unexpected multiplication argument {a:?}"))?; - match parse_argument(&arg)? { - SyscallArg::Integer { value, .. } => Ok(value), - _ => Err(anyhow::anyhow!("Unexpected multiplication argument {a:?}")), - } - }) - .collect::>()?; - Ok(SyscallArg::Integer { - value: IntegerExpression::Multiplication(args), - metadata: None, - }) - } else { - unreachable!("Argument has no group match") - } -} - -fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { - match LINE_REGEX.captures(line) { - Some(caps) => { - let pid = caps - .name("pid") - .unwrap() - .as_str() - .parse() - .map_err(|e| anyhow::Error::new(e).context("Failed to parse pid"))?; - - let rel_ts = caps - .name("rel_ts") - .unwrap() - .as_str() - .parse() - .map_err(|e| anyhow::Error::new(e).context("Failed to parse timestamp"))?; - - if let Some(name) = caps.name("name") { - let name = name.as_str().to_owned(); - - let args = if let Some(arguments) = caps.name("arguments") { - ARG_REGEX - .captures_iter(arguments.as_str()) - .map(|a| parse_argument(&a)) - .collect::>()? - } else { - Vec::new() - }; - - let ret_val = if let Some(ret_val_int) = caps.name("ret_val_int") { - let s = ret_val_int.as_str(); - s.parse().map_err(|e| { - anyhow::Error::new(e) - .context(format!("Failed to parse integer return value: {s:?}")) - })? - } else if let Some(ret_val_hex) = caps.name("ret_val_hex") { - let s = ret_val_hex.as_str(); - SyscallRetVal::from_str_radix(s, 16).map_err(|e| { - anyhow::Error::new(e) - .context(format!("Failed to parse hexadecimal return value: {s:?}")) - })? - } else if caps.name("unfinished").is_some() { - return Ok(ParseResult::UnfinishedSyscall(Syscall { - pid, - rel_ts, - name, - args, - ret_val: SyscallRetVal::MAX, // Set dummy value we will replace - })); - } else { - unreachable!(); - }; - - let sc = Syscall { - pid, - rel_ts, - name, - args, - ret_val, - }; - Ok(ParseResult::Syscall(sc)) - } else if let Some(name_resumed) = caps.name("name_resumed").map(|c| c.as_str()) { - let ret_val = if let Some(ret_val_int) = caps.name("ret_val_int") { - let s = ret_val_int.as_str(); - s.parse().map_err(|e| { - anyhow::Error::new(e) - .context(format!("Failed to parse integer return value: {s:?}")) - })? - } else if let Some(ret_val_hex) = caps.name("ret_val_hex") { - let s = ret_val_hex.as_str(); - SyscallRetVal::from_str_radix(s, 16).map_err(|e| { - anyhow::Error::new(e) - .context(format!("Failed to parse hexadecimal return value: {s:?}")) - })? - } else { - unreachable!(); - }; - - let (unfinished_index, unfinished_sc) = unfinished_syscalls - .iter() - .enumerate() - .find(|(_i, sc)| (sc.name == name_resumed) && (sc.pid == pid)) - .ok_or_else(|| anyhow::anyhow!("Unabled to find first part of syscall"))?; - let sc = Syscall { - // Update return val and timestamp (to get return time instead of call time) - ret_val, - rel_ts, - ..unfinished_sc.clone() - }; - Ok(ParseResult::FinishedSyscall { - sc, - unfinished_index, - }) - } else { - unreachable!(); - } - } - None => Ok(ParseResult::IgnoredLine), - } -} - -impl Iterator for LogParser { - type Item = anyhow::Result; - - /// Parse strace output lines and yield syscalls - /// Ignore invalid lines, but bubble up errors if the regex matches and we fail subsequent parsing - fn next(&mut self) -> Option { - let sc = loop { - self.buf.clear(); - let line = match self.reader.read_line(&mut self.buf) { - Ok(0) => return None, // EOF - Ok(_) => self.buf.trim_end(), - Err(e) => return Some(Err(anyhow::Error::new(e).context("Failed to read line"))), - }; - - if line.ends_with(" +++") || line.ends_with(" ---") { - // Process exited, or signal received, not a syscall - continue; - } - - match parse_line(line, &self.unfinished_syscalls) { - Ok(ParseResult::Syscall(sc)) => { - log::trace!("Parsed line: {line:?}"); - break sc; - } - Ok(ParseResult::UnfinishedSyscall(sc)) => { - self.unfinished_syscalls.push(sc); - continue; - } - Ok(ParseResult::FinishedSyscall { - sc, - unfinished_index, - }) => { - self.unfinished_syscalls.swap_remove(unfinished_index); // I fucking love Rust <3 - break sc; - } - Ok(ParseResult::IgnoredLine) => { - log::warn!("Ignored line: {line:?}"); - continue; - } - Err(e) => { - // Unfortunately, some versions of strace output inconsistent line format, - // so we have to ignore some parsing errors - // TODO probe strace version and warn if too old? - // log::error!("Failed to parse line: {line:?}"); - // return Some(Err(e)); - log::warn!("Failed to parse line ({e}): {line:?}"); - continue; - } - }; - }; - Some(Ok(sc)) - } -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn test_mmap() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "382944 0.000054 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f52a332e000", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 382944, - rel_ts: 0.000054, - name: "mmap".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(8192), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("PROT_READ".to_owned()), - IntegerExpression::NamedConst("PROT_WRITE".to_owned()), - ]), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("MAP_PRIVATE".to_owned()), - IntegerExpression::NamedConst("MAP_ANONYMOUS".to_owned()), - ]), - metadata:None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(-1), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - - ], - ret_val: 0x7f52a332e000 - }) - ); - - assert_eq!( - parse_line( - "601646 0.000011 mmap(0x7f2fce8dc000, 1396736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f2fce8dc000", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 601646, - rel_ts: 0.000011, - name: "mmap".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(0x7f2fce8dc000), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(1396736), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("PROT_READ".to_owned()), - IntegerExpression::NamedConst("PROT_EXEC".to_owned()), - ]), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("MAP_PRIVATE".to_owned()), - IntegerExpression::NamedConst("MAP_FIXED".to_owned()), - IntegerExpression::NamedConst("MAP_DENYWRITE".to_owned()), - ]), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(3), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0x26000), - metadata: None - }, - ], - ret_val: 0x7f2fce8dc000 - }) - ); - } - - #[test] - fn test_access() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "382944 0.000036 access(\"/etc/ld.so.preload\", R_OK) = -1 ENOENT (No such file or directory)", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 382944, - rel_ts: 0.000036, - name: "access".to_owned(), - args: vec![ - SyscallArg::Buffer { - value: "/etc/ld.so.preload".as_bytes().to_vec(), - type_: BufferType::Unknown - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("R_OK".to_owned()), - metadata: None, - }, - ], - ret_val: -1 - }) - ); - } - - #[test] - fn test_rt_sigaction() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "720313 0.000064 rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7f6da716c510}, NULL, 8) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 720313, - rel_ts: 0.000064, - name: "rt_sigaction".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("SIGTERM".to_owned()), - metadata: None, - }, - SyscallArg::Struct(HashMap::from([ - ( - "sa_handler".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("SIG_DFL".to_owned()), - metadata: None, - }, - ), - ( - "sa_mask".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::BinaryNot(Box::new(IntegerExpression::NamedConst("RT_1".to_owned()))), - metadata: None, - }, - ), - ( - "sa_flags".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("SA_RESTORER".to_owned()), - metadata: None, - }, - ), - ( - "sa_restorer".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0x7f6da716c510), - metadata: None - }, - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(8), - metadata: None - }, - ], - ret_val: 0 - }) - ); - } - - #[test] - fn test_rt_sigprocmask() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line("440663 0.002174 rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP RTMIN RT_1], 8) = 0", &[]).unwrap(), - ParseResult::Syscall(Syscall {pid: 440663, - rel_ts: 0.002174, - name: "rt_sigprocmask".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst( - "SIG_SETMASK".to_owned(), - ), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst( - "[]".to_owned(), - ), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryNot( - Box::new(IntegerExpression::NamedConst( - "RT_1".to_owned(), - )), - ), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal( - 8, - ), - metadata: None, - }, - ], - ret_val: 0, - }) - ); - } - - #[test] - fn test_newfstatat() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "772627 0.000010 newfstatat(AT_FDCWD, \"/a/path\", {st_dev=makedev(0xfd, 0x1), st_ino=26427782, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=1000, st_gid=1000, st_blksize=4096, st_blocks=112, st_size=53248, st_atime=1689948680 /* 2023-07-21T16:11:20.028467954+0200 */, st_atime_nsec=28467954, st_mtime=1692975712 /* 2023-08-25T17:01:52.252908565+0200 */, st_mtime_nsec=252908565, st_ctime=1692975712 /* 2023-08-25T17:01:52.252908565+0200 */, st_ctime_nsec=252908565}, 0) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 772627, - rel_ts: 0.000010, - name: "newfstatat".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AT_FDCWD".to_owned()), - metadata: None, - }, - SyscallArg::Buffer { - value: "/a/path".as_bytes().to_vec(), - type_: BufferType::Unknown - }, - SyscallArg::Struct(HashMap::from([ - ( - "st_dev".to_owned(), - SyscallArg::Macro { - name: "makedev".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(0xfd), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(1), - metadata: None, - }, - ], - }, - ), - ( - "st_ino".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(26427782), - metadata: None - }, - ), - ( - "st_mode".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("S_IFDIR".to_owned()), - IntegerExpression::NamedConst("0755".to_owned()) - ]), - metadata: None, - }, - ), - ( - "st_nlink".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(2), - metadata: None - }, - ), - ( - "st_uid".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1000), - metadata: None - }, - ), - ( - "st_gid".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1000), - metadata: None - }, - ), - ( - "st_blksize".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4096), - metadata: None - }, - ), - ( - "st_blocks".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(112), - metadata: None - }, - ), - ( - "st_size".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(53248), - metadata: None - }, - ), - ( - "st_atime".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1689948680), - metadata: None - }, - ), - ( - "st_atime_nsec".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(28467954), - metadata: None - }, - ), - ( - "st_mtime".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1692975712), - metadata: None - }, - ), - ( - "st_mtime_nsec".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(252908565), - metadata: None - }, - ), - ( - "st_ctime".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1692975712), - metadata: None - }, - ), - ( - "st_ctime_nsec".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(252908565), - metadata: None - }, - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ], - ret_val: 0 - }) - ); - } - - #[test] - fn test_getrandom() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "815537 0.000017 getrandom(\"\\x42\\x18\\x81\\x90\\x40\\x63\\x1a\\x2c\", 8, GRND_NONBLOCK) = 8", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 815537, - rel_ts: 0.000017, - name: "getrandom".to_owned(), - args: vec![ - SyscallArg::Buffer { - value: vec![0x42, 0x18, 0x81, 0x90, 0x40, 0x63, 0x1a, 0x2c], - type_: BufferType::Unknown - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(8), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("GRND_NONBLOCK".to_owned()), - metadata: None, - }, - ], - ret_val: 8 - }) - ); - } - - #[test] - fn test_fstatfs() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "244841 0.000033 fstatfs(6, {f_type=EXT2_SUPER_MAGIC, f_bsize=4096, f_blocks=231830864, f_bfree=38594207, f_bavail=26799417, f_files=58957824, f_ffree=54942232, f_fsid={val=[0x511787a8, 0x92a74a52]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_NOATIME}) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 244841, - rel_ts: 0.000033, - name: "fstatfs".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(6), - metadata: None - }, - SyscallArg::Struct(HashMap::from([ - ( - "f_type".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("EXT2_SUPER_MAGIC".to_owned()), - metadata: None, - }, - ), - ( - "f_bsize".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4096), - metadata: None - }, - ), - ( - "f_blocks".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(231830864), - metadata: None - }, - ), - ( - "f_bfree".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(38594207), - metadata: None - }, - ), - ( - "f_bavail".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(26799417), - metadata: None - }, - ), - ( - "f_files".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(58957824), - metadata: None - }, - ), - ( - "f_ffree".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(54942232), - metadata: None - }, - ), - ( - "f_fsid".to_owned(), - SyscallArg::Struct(HashMap::from([ - ( - "val".to_owned(), - SyscallArg::Array(vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(1360496552), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(2460437074), - metadata: None - }, - ]) - ) - ])) - ), - ( - "f_namelen".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(255), - metadata: None - }, - ), - ( - "f_frsize".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4096), - metadata: None - }, - ), - ( - "f_flags".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("ST_VALID".to_owned()), - IntegerExpression::NamedConst("ST_NOATIME".to_owned()) - ]), - metadata: None, - }, - ), - ])) - ], - ret_val: 0 - }) - ); - - assert_eq!( - parse_line( - "895683 0.000028 fstatfs(3, {f_type=PROC_SUPER_MAGIC, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={val=[0, 0]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_NOSUID|ST_NODEV|ST_NOEXEC|ST_RELATIME}) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 895683, - rel_ts: 0.000028, - name: "fstatfs".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(3), - metadata: None - }, - SyscallArg::Struct(HashMap::from([ - ( - "f_type".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("PROC_SUPER_MAGIC".to_owned()), - metadata: None, - }, - ), - ( - "f_bsize".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4096), - metadata: None - }, - ), - ( - "f_blocks".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ), - ( - "f_bfree".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ), - ( - "f_bavail".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ), - ( - "f_files".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ), - ( - "f_ffree".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ), - ( - "f_fsid".to_owned(), - SyscallArg::Struct(HashMap::from([ - ( - "val".to_owned(), - SyscallArg::Array(vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None - }, - ]) - ) - ])) - ), - ( - "f_namelen".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(255), - metadata: None - }, - ), - ( - "f_frsize".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4096), - metadata: None - }, - ), - ( - "f_flags".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("ST_VALID".to_owned()), - IntegerExpression::NamedConst("ST_NOSUID".to_owned()), - IntegerExpression::NamedConst("ST_NODEV".to_owned()), - IntegerExpression::NamedConst("ST_NOEXEC".to_owned()), - IntegerExpression::NamedConst("ST_RELATIME".to_owned()) - ]), - metadata: None, - }, - ), - ])) - ], - ret_val: 0 - }) - ); - } - - #[test] - fn test_open_relative() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "998518 0.000033 openat(AT_FDCWD<\\x2f\\x68\\x6f\\x6d\\x65\\x2f\\x6d\\x64\\x65\\x2f\\x73\\x72\\x63\\x2f\\x73\\x68\\x68>, \"\\x2e\\x2e\", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3<\\x2f\\x68\\x6f\\x6d\\x65\\x2f\\x6d\\x64\\x65\\x2f\\x73\\x72\\x63>", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 998518, - rel_ts: 0.000033, - name: "openat".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AT_FDCWD".to_owned()), - metadata: Some(vec![0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x6d, 0x64, 0x65, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x73, 0x68, 0x68]), - }, - SyscallArg::Buffer { - value: vec![0x2e, 0x2e], - type_: BufferType::Unknown, - }, - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("O_RDONLY".to_owned()), - IntegerExpression::NamedConst("O_NONBLOCK".to_owned()), - IntegerExpression::NamedConst("O_CLOEXEC".to_owned()), - IntegerExpression::NamedConst("O_DIRECTORY".to_owned()) - ]), - metadata: None, - }, - ], - ret_val: 3 - }) - ); - } - - #[test] - fn test_truncated() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "28707 0.000194 sendto(15<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x35\\x34\\x31\\x38\\x32\\x31\\x33\\x5d>, [{nlmsg_len=20, nlmsg_type=RTM_GETADDR, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1694010548, nlmsg_pid=0}, {ifa_family=AF_UNSPEC, ...}], 20, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 20", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 28707, - rel_ts: 0.000194, - name: "sendto".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(15), - metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 53, 52, 49, 56, 50, 49, 51, 93]) - }, - SyscallArg::Array(vec![ - SyscallArg::Struct(HashMap::from([ - ( - "nlmsg_len".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(20), - metadata: None, - }, - ), - ( - "nlmsg_type".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("RTM_GETADDR".to_owned()), - metadata: None, - }, - ), - ( - "nlmsg_flags".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::BinaryOr(vec![ - IntegerExpression::NamedConst("NLM_F_REQUEST".to_owned()), - IntegerExpression::NamedConst("NLM_F_DUMP".to_owned()), - ]), - metadata: None, - }, - ), - ( - "nlmsg_seq".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1694010548), - metadata: None, - }, - ), - ( - "nlmsg_pid".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - ), - ])), - SyscallArg::Struct(HashMap::from([ - ( - "ifa_family".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AF_UNSPEC".to_owned()), - metadata: None, - }, - ), - ])), - ]), - SyscallArg::Integer { - value: IntegerExpression::Literal(20), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - SyscallArg::Struct(HashMap::from([ - ( - "sa_family".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AF_NETLINK".to_owned()), - metadata: None, - }, - ), - ( - "nl_pid".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - ), - ( - "nl_groups".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::Literal(12), - metadata: None, - }, - ], - ret_val: 20 - }) - ); - } - - #[test] - fn test_invalid() { - let _ = simple_logger::SimpleLogger::new().init(); - - // Bogus output ('{{', note the missing field name) that strace 5.10 can generate - let err = - parse_line( - "57652 0.000071 sendto(19<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x38\\x34\\x38\\x36\\x39\\x32\\x5d>, {{len=20, type=0x16 /* NLMSG_??? */, flags=NLM_F_REQUEST|0x300, seq=1697715709, pid=0}, \"\\x00\\x00\\x00\\x00\"}, 20, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 20", - &[] - ).unwrap_err(); - assert_eq!(&err.to_string(), "Unable to extract struct member name"); - } - - #[test] - fn test_bind() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "688129 0.000023 bind(4<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x34\\x31\\x38\\x34\\x35\\x32\\x32\\x5d>, {sa_family=AF_UNIX, sun_path=@\"\\x62\\x31\\x39\\x33\\x64\\x30\\x62\\x30\\x63\\x63\\x64\\x37\\x30\\x35\\x66\\x39\\x2f\\x62\\x75\\x73\\x2f\\x73\\x79\\x73\\x74\\x65\\x6d\\x63\\x74\\x6c\\x2f\"}, 34) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 688129, - rel_ts: 0.000023, - name: "bind".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: Some("socket:[4184522]".as_bytes().to_vec()) - }, - SyscallArg::Struct(HashMap::from([ - ( - "sa_family".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AF_UNIX".to_owned()), - metadata: None, - }, - ), - ( - "sun_path".to_owned(), - SyscallArg::Buffer { - value: "b193d0b0ccd705f9/bus/systemctl/".as_bytes().to_vec(), - type_: BufferType::AbstractPath, - }, - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::Literal(34), - metadata: None, - }, - ], - ret_val: 0 - }) - ); - - assert_eq!( - parse_line( - "132360 0.000022 bind(6<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x38\\x31\\x35\\x36\\x39\\x33\\x5d>, {sa_family=AF_INET, sin_port=htons(8025), sin_addr=inet_addr(\"\\x31\\x32\\x37\\x2e\\x30\\x2e\\x30\\x2e\\x31\")}, 16) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 132360, - rel_ts: 0.000022, - name: "bind".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(6), - metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 56, 49, 53, 54, 57, 51, 93]), - }, - SyscallArg::Struct(HashMap::from([ - ( - "sa_family".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AF_INET".to_owned()), - metadata: None, - }, - ), - ( - "sin_port".to_owned(), - SyscallArg::Macro { - name: "htons".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(8025), - metadata: None, - }, - ], - } - ), - ( - "sin_addr".to_owned(), - SyscallArg::Macro { - name: "inet_addr".to_owned(), - args: vec![ - SyscallArg::Buffer { - value: vec![49, 50, 55, 46, 48, 46, 48, 46, 49], - type_: BufferType::Unknown, - }, - ], - } - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::Literal(16), - metadata: None, - }, - ], - ret_val: 0 - }) - ); - } - - #[test] - fn test_multiplication() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "85195 0.000038 prlimit64(0, RLIMIT_NOFILE, {rlim_cur=512*1024, rlim_max=512*1024}, NULL) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 85195, - rel_ts: 0.000038, - name: "prlimit64".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("RLIMIT_NOFILE".to_owned()), - metadata: None, - }, - SyscallArg::Struct(HashMap::from([ - ( - "rlim_cur".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Multiplication(vec![ - IntegerExpression::Literal(512), - IntegerExpression::Literal(1024), - ]), - metadata: None, - }, - ), - ( - "rlim_max".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Multiplication(vec![ - IntegerExpression::Literal(512), - IntegerExpression::Literal(1024), - ]), - metadata: None, - }, - ), - ])), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - ], - ret_val: 0 - }) - ); - } - - #[test] - fn test_epoll() { - let _ = simple_logger::SimpleLogger::new().init(); - - assert_eq!( - parse_line( - "114586 0.000075 epoll_ctl(3<\\x61\\x6e\\x6f\\x6e\\x5f\\x69\\x6e\\x6f\\x64\\x65\\x3a\\x5b\\x65\\x76\\x65\\x6e\\x74\\x70\\x6f\\x6c\\x6c\\x5d>, EPOLL_CTL_ADD, 4<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x37\\x33\\x31\\x35\\x39\\x38\\x5d>, {events=EPOLLIN, data={u32=4, u64=4}}) = 0", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 114586, - rel_ts: 0.000075, - name: "epoll_ctl".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(3), - metadata: Some(vec![97, 110, 111, 110, 95, 105, 110, 111, 100, 101, 58, 91, 101, 118, 101, 110, 116, 112, 111, 108, 108, 93]), - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("EPOLL_CTL_ADD".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 55, 51, 49, 53, 57, 56, 93]), - }, - SyscallArg::Struct(HashMap::from([ - ( - "events".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("EPOLLIN".to_owned()), - metadata: None, - }, - ), - ( - "data".to_owned(), - SyscallArg::Struct(HashMap::from([ - ( - "u32".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: None, - } - ), - ( - "u64".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: None, - } - ), - ])) - ), - ])), - ], - ret_val: 0 - }) - ); - - assert_eq!( - parse_line( - "3487 0.000130 epoll_pwait(4<\\x61\\x6e\\x6f\\x6e\\x5f\\x69\\x6e\\x6f\\x64\\x65\\x3a\\x5b\\x65\\x76\\x65\\x6e\\x74\\x70\\x6f\\x6c\\x6c\\x5d>, [{events=EPOLLOUT, data={u32=833093633, u64=9163493471957811201}}, {events=EPOLLOUT, data={u32=800587777, u64=9163493471925305345}}], 128, 0, NULL, 0) = 2", - &[] - ).unwrap(), - ParseResult::Syscall(Syscall { - pid: 3487, - rel_ts: 0.000130, - name: "epoll_pwait".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: Some(vec![0x61, 0x6e, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x3a, 0x5b, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x70, 0x6f, 0x6c, 0x6c, 0x5d]), - }, - SyscallArg::Array(vec![ - SyscallArg::Struct(HashMap::from([ - ( - "events".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("EPOLLOUT".to_owned()), - metadata: None, - }, - ), - ( - "data".to_owned(), - SyscallArg::Struct(HashMap::from([ - ( - "u32".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(833093633), - metadata: None, - } - ), - ( - "u64".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(9163493471957811201), - metadata: None, - } - ), - ])) - ), - ])), - SyscallArg::Struct(HashMap::from([ - ( - "events".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("EPOLLOUT".to_owned()), - metadata: None, - }, - ), - ( - "data".to_owned(), - SyscallArg::Struct(HashMap::from([ - ( - "u32".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(800587777), - metadata: None, - } - ), - ( - "u64".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(9163493471925305345), - metadata: None, - } - ), - ])) - ), - ])), - ]), - SyscallArg::Integer { - value: IntegerExpression::Literal(128), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::Literal(0), - metadata: None, - }, - ], - ret_val: 2 - }) - ); - } - - #[test] - fn test_interleave() { - let _ = simple_logger::SimpleLogger::new().init(); - - let lines = Cursor::new( - "1 0.000001 select(4, [3], NULL, NULL, NULL -2 0.000002 clock_gettime(CLOCK_REALTIME, {tv_sec=1130322148, tv_nsec=3977000}) = 0 -1 0.000003 <... select resumed> ) = 1 (in [3])" - .as_bytes() - .to_vec(), - ); - let parser = LogParser::new(Box::new(lines)).unwrap(); - let syscalls: Vec = parser.into_iter().collect::>().unwrap(); - - assert_eq!( - syscalls, - vec![ - Syscall { - pid: 2, - rel_ts: 0.000002, - name: "clock_gettime".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("CLOCK_REALTIME".to_owned()), - metadata: None, - }, - SyscallArg::Struct(HashMap::from([ - ( - "tv_sec".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(1130322148), - metadata: None, - }, - ), - ( - "tv_nsec".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::Literal(3977000), - metadata: None, - }, - ), - ])), - ], - ret_val: 0 - }, - Syscall { - pid: 1, - rel_ts: 0.000003, - name: "select".to_owned(), - args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(4), - metadata: None, - }, - SyscallArg::Array(vec![SyscallArg::Integer { - value: IntegerExpression::Literal(3), - metadata: None, - },]), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("NULL".to_owned()), - metadata: None, - }, - ], - ret_val: 1 - } - ] - ); - } -} - -#[cfg(all(feature = "nightly", test))] -mod benchs { - extern crate test; - - use super::*; - - use std::iter; - - use test::Bencher; - - #[bench] - fn bench_parse_buffer(b: &mut Bencher) { - let s = format!( - "\"{}\"", - iter::repeat_with(|| format!("\\x{:02x}", fastrand::u8(..))) - .take(512) - .collect::>() - .join("") - ); - - b.iter(|| { - parse_buffer(&s).unwrap(); - }); - } -} diff --git a/src/strace/parser/mod.rs b/src/strace/parser/mod.rs new file mode 100644 index 0000000..ad1ec89 --- /dev/null +++ b/src/strace/parser/mod.rs @@ -0,0 +1,1714 @@ +//! Strace output parser + +use std::io::BufRead; + +use crate::strace::Syscall; + +#[cfg(feature = "parser-peg")] +mod peg; +#[cfg(feature = "parser-regex")] +mod regex; + +#[cfg(feature = "parser-peg")] +use peg::parse_line; +#[cfg(feature = "parser-regex")] +use regex::parse_line; + +pub struct LogParser { + reader: Box, + buf: String, + unfinished_syscalls: Vec, +} + +impl LogParser { + pub fn new(reader: Box) -> anyhow::Result { + Ok(Self { + reader, + buf: String::new(), + unfinished_syscalls: Vec::new(), + }) + } +} + +#[derive(Debug, PartialEq)] +enum ParseResult { + /// This line was ignored + /// (strace sometimes outputs complete garbage like '1008333 0.000045 ???( ') + IgnoredLine, + /// This line describes an unfinished syscall + UnfinishedSyscall(Syscall), + /// This line describes a previously unfinished syscall that is now finished + FinishedSyscall { + sc: Syscall, + unfinished_index: usize, + }, + /// This line describes a complete syscall + Syscall(Syscall), +} + +impl Iterator for LogParser { + type Item = anyhow::Result; + + /// Parse strace output lines and yield syscalls + /// Ignore invalid lines, but bubble up errors if the parsing matches and we fail subsequent parsing + fn next(&mut self) -> Option { + let sc = loop { + self.buf.clear(); + let line = match self.reader.read_line(&mut self.buf) { + Ok(0) => return None, // EOF + Ok(_) => self.buf.trim_end(), + Err(e) => return Some(Err(anyhow::Error::new(e).context("Failed to read line"))), + }; + + if line.ends_with(" +++") || line.ends_with(" ---") { + // Process exited, or signal received, not a syscall + continue; + } + + match parse_line(line, &self.unfinished_syscalls) { + Ok(ParseResult::Syscall(sc)) => { + log::trace!("Parsed line: {line:?}"); + break sc; + } + Ok(ParseResult::UnfinishedSyscall(sc)) => { + self.unfinished_syscalls.push(sc); + continue; + } + Ok(ParseResult::FinishedSyscall { + sc, + unfinished_index, + }) => { + self.unfinished_syscalls.swap_remove(unfinished_index); // I fucking love Rust <3 + break sc; + } + Ok(ParseResult::IgnoredLine) => { + log::warn!("Ignored line: {line:?}"); + continue; + } + Err(e) => { + // Unfortunately, some versions of strace output inconsistent line format, + // so we have to ignore some parsing errors + // TODO probe strace version and warn if too old? + // log::error!("Failed to parse line: {line:?}"); + // return Some(Err(e)); + log::warn!("Failed to parse line ({e}): {line:?}"); + continue; + } + }; + }; + Some(Ok(sc)) + } +} + +#[cfg(test)] +mod tests { + use std::{collections::HashMap, io::Cursor}; + + use pretty_assertions::assert_eq; + + use crate::strace::{ + BufferExpression, BufferType, Expression, IntegerExpression, IntegerExpressionValue, + }; + + use super::*; + + #[test] + fn test_mmap() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "382944 0.000054 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f52a332e000", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 382944, + rel_ts: 0.000054, + name: "mmap".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(8192), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("PROT_READ".to_owned()), + IntegerExpressionValue::NamedConst("PROT_WRITE".to_owned()), + ]), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("MAP_PRIVATE".to_owned()), + IntegerExpressionValue::NamedConst("MAP_ANONYMOUS".to_owned()), + ]), + metadata:None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(-1), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + + ], + ret_val: 0x7f52a332e000 + }) + ); + + assert_eq!( + parse_line( + "601646 0.000011 mmap(0x7f2fce8dc000, 1396736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f2fce8dc000", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 601646, + rel_ts: 0.000011, + name: "mmap".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f2fce8dc000), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1396736), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("PROT_READ".to_owned()), + IntegerExpressionValue::NamedConst("PROT_EXEC".to_owned()), + ]), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("MAP_PRIVATE".to_owned()), + IntegerExpressionValue::NamedConst("MAP_FIXED".to_owned()), + IntegerExpressionValue::NamedConst("MAP_DENYWRITE".to_owned()), + ]), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x26000), + metadata: None + }), + ], + ret_val: 0x7f2fce8dc000 + }) + ); + } + + #[test] + fn test_access() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "382944 0.000036 access(\"/etc/ld.so.preload\", R_OK) = -1 ENOENT (No such file or directory)", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 382944, + rel_ts: 0.000036, + name: "access".to_owned(), + args: vec![ + Expression::Buffer(BufferExpression { + value: "/etc/ld.so.preload".as_bytes().to_vec(), + type_: BufferType::Unknown + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("R_OK".to_owned()), + metadata: None, + }), + ], + ret_val: -1 + }) + ); + } + + #[test] + fn test_rt_sigaction() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "720313 0.000064 rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=~[RTMIN RT_1], sa_flags=SA_RESTORER, sa_restorer=0x7f6da716c510}, NULL, 8) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 720313, + rel_ts: 0.000064, + name: "rt_sigaction".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("SIGTERM".to_owned()), + metadata: None, + }), + Expression::Struct(HashMap::from([ + ( + "sa_handler".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("SIG_DFL".to_owned()), + metadata: None, + }), + ), + ( + "sa_mask".to_owned(), + Expression::Collection { + complement: true, + values: vec![ + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTMIN".to_owned()), + metadata: None + } + ), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RT_1".to_owned()), + metadata: None + } + ), + ] + } + ), + ( + "sa_flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("SA_RESTORER".to_owned()), + metadata: None, + }), + ), + ( + "sa_restorer".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f6da716c510), + metadata: None + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(8), + metadata: None + }), + ], + ret_val: 0 + }) + ); + } + + #[test] + fn test_rt_sigprocmask() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line("440663 0.002174 rt_sigprocmask(SIG_SETMASK, [], ~[KILL STOP RTMIN RT_1], 8) = 0", &[]).unwrap(), + ParseResult::Syscall(Syscall {pid: 440663, + rel_ts: 0.002174, + name: "rt_sigprocmask".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst( + "SIG_SETMASK".to_owned(), + ), + metadata: None, + }), + Expression::Collection { + complement: false, + values: vec![], + }, + Expression::Collection { + complement: true, + values: vec![ + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("KILL".to_owned()), + metadata: None + } + ), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("STOP".to_owned()), + metadata: None + } + ), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTMIN".to_owned()), + metadata: None + } + ), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RT_1".to_owned()), + metadata: None + } + ), + ], + }, + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal( + 8, + ), + metadata: None, + }), + ], + ret_val: 0, + }) + ); + } + + #[test] + fn test_newfstatat() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "772627 0.000010 newfstatat(AT_FDCWD, \"/a/path\", {st_dev=makedev(0xfd, 0x1), st_ino=26427782, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=1000, st_gid=1000, st_blksize=4096, st_blocks=112, st_size=53248, st_atime=1689948680 /* 2023-07-21T16:11:20.028467954+0200 */, st_atime_nsec=28467954, st_mtime=1692975712 /* 2023-08-25T17:01:52.252908565+0200 */, st_mtime_nsec=252908565, st_ctime=1692975712 /* 2023-08-25T17:01:52.252908565+0200 */, st_ctime_nsec=252908565}, 0) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 772627, + rel_ts: 0.000010, + name: "newfstatat".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AT_FDCWD".to_owned()), + metadata: None, + }), + Expression::Buffer(BufferExpression { + value: "/a/path".as_bytes().to_vec(), + type_: BufferType::Unknown + }), + Expression::Struct(HashMap::from([ + ( + "st_dev".to_owned(), + Expression::Macro { + name: "makedev".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0xfd), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1), + metadata: None, + }), + ], + }, + ), + ( + "st_ino".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(26427782), + metadata: None + }), + ), + ( + "st_mode".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("S_IFDIR".to_owned()), + IntegerExpressionValue::Literal(0o755) + ]), + metadata: None, + }), + ), + ( + "st_nlink".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(2), + metadata: None + }), + ), + ( + "st_uid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1000), + metadata: None + }), + ), + ( + "st_gid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1000), + metadata: None + }), + ), + ( + "st_blksize".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4096), + metadata: None + }), + ), + ( + "st_blocks".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(112), + metadata: None + }), + ), + ( + "st_size".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(53248), + metadata: None + }), + ), + ( + "st_atime".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1689948680), + metadata: None + }), + ), + ( + "st_atime_nsec".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(28467954), + metadata: None + }), + ), + ( + "st_mtime".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1692975712), + metadata: None + }), + ), + ( + "st_mtime_nsec".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(252908565), + metadata: None + }), + ), + ( + "st_ctime".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1692975712), + metadata: None + }), + ), + ( + "st_ctime_nsec".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(252908565), + metadata: None + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ], + ret_val: 0 + }) + ); + } + + #[test] + fn test_getrandom() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "815537 0.000017 getrandom(\"\\x42\\x18\\x81\\x90\\x40\\x63\\x1a\\x2c\", 8, GRND_NONBLOCK) = 8", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 815537, + rel_ts: 0.000017, + name: "getrandom".to_owned(), + args: vec![ + Expression::Buffer(BufferExpression { + value: vec![0x42, 0x18, 0x81, 0x90, 0x40, 0x63, 0x1a, 0x2c], + type_: BufferType::Unknown + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(8), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("GRND_NONBLOCK".to_owned()), + metadata: None, + }), + ], + ret_val: 8 + }) + ); + } + + #[test] + fn test_fstatfs() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "244841 0.000033 fstatfs(6, {f_type=EXT2_SUPER_MAGIC, f_bsize=4096, f_blocks=231830864, f_bfree=38594207, f_bavail=26799417, f_files=58957824, f_ffree=54942232, f_fsid={val=[0x511787a8, 0x92a74a52]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_NOATIME}) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 244841, + rel_ts: 0.000033, + name: "fstatfs".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(6), + metadata: None + }), + Expression::Struct(HashMap::from([ + ( + "f_type".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EXT2_SUPER_MAGIC".to_owned()), + metadata: None, + }), + ), + ( + "f_bsize".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4096), + metadata: None + }), + ), + ( + "f_blocks".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(231830864), + metadata: None + }), + ), + ( + "f_bfree".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(38594207), + metadata: None + }), + ), + ( + "f_bavail".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(26799417), + metadata: None + }), + ), + ( + "f_files".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(58957824), + metadata: None + }), + ), + ( + "f_ffree".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(54942232), + metadata: None + }), + ), + ( + "f_fsid".to_owned(), + Expression::Struct(HashMap::from([ + ( + "val".to_owned(), + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1360496552), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(2460437074), + metadata: None + }), + ] + } + ) + ])) + ), + ( + "f_namelen".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(255), + metadata: None + }), + ), + ( + "f_frsize".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4096), + metadata: None + }), + ), + ( + "f_flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("ST_VALID".to_owned()), + IntegerExpressionValue::NamedConst("ST_NOATIME".to_owned()) + ]), + metadata: None, + }), + ), + ])) + ], + ret_val: 0 + }) + ); + + assert_eq!( + parse_line( + "895683 0.000028 fstatfs(3, {f_type=PROC_SUPER_MAGIC, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={val=[0, 0]}, f_namelen=255, f_frsize=4096, f_flags=ST_VALID|ST_NOSUID|ST_NODEV|ST_NOEXEC|ST_RELATIME}) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 895683, + rel_ts: 0.000028, + name: "fstatfs".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None + }), + Expression::Struct(HashMap::from([ + ( + "f_type".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("PROC_SUPER_MAGIC".to_owned()), + metadata: None, + }), + ), + ( + "f_bsize".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4096), + metadata: None + }), + ), + ( + "f_blocks".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ), + ( + "f_bfree".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ), + ( + "f_bavail".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ), + ( + "f_files".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ), + ( + "f_ffree".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ), + ( + "f_fsid".to_owned(), + Expression::Struct(HashMap::from([ + ( + "val".to_owned(), + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ] + } + ) + ])) + ), + ( + "f_namelen".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(255), + metadata: None + }), + ), + ( + "f_frsize".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4096), + metadata: None + }), + ), + ( + "f_flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("ST_VALID".to_owned()), + IntegerExpressionValue::NamedConst("ST_NOSUID".to_owned()), + IntegerExpressionValue::NamedConst("ST_NODEV".to_owned()), + IntegerExpressionValue::NamedConst("ST_NOEXEC".to_owned()), + IntegerExpressionValue::NamedConst("ST_RELATIME".to_owned()) + ]), + metadata: None, + }), + ), + ])) + ], + ret_val: 0 + }) + ); + } + + #[test] + fn test_open_relative() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "998518 0.000033 openat(AT_FDCWD<\\x2f\\x68\\x6f\\x6d\\x65\\x2f\\x6d\\x64\\x65\\x2f\\x73\\x72\\x63\\x2f\\x73\\x68\\x68>, \"\\x2e\\x2e\", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3<\\x2f\\x68\\x6f\\x6d\\x65\\x2f\\x6d\\x64\\x65\\x2f\\x73\\x72\\x63>", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 998518, + rel_ts: 0.000033, + name: "openat".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AT_FDCWD".to_owned()), + metadata: Some(vec![0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x6d, 0x64, 0x65, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x73, 0x68, 0x68]), + }), + Expression::Buffer(BufferExpression { + value: vec![0x2e, 0x2e], + type_: BufferType::Unknown, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("O_RDONLY".to_owned()), + IntegerExpressionValue::NamedConst("O_NONBLOCK".to_owned()), + IntegerExpressionValue::NamedConst("O_CLOEXEC".to_owned()), + IntegerExpressionValue::NamedConst("O_DIRECTORY".to_owned()) + ]), + metadata: None, + }), + ], + ret_val: 3 + }) + ); + } + + #[test] + fn test_truncated() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "28707 0.000194 sendto(15<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x35\\x34\\x31\\x38\\x32\\x31\\x33\\x5d>, [{nlmsg_len=20, nlmsg_type=RTM_GETADDR, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1694010548, nlmsg_pid=0}, {ifa_family=AF_UNSPEC, ...}], 20, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 20", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 28707, + rel_ts: 0.000194, + name: "sendto".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(15), + metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 53, 52, 49, 56, 50, 49, 51, 93]) + }), + Expression::Collection { + complement: false, + values: vec![ + Expression::Struct(HashMap::from([ + ( + "nlmsg_len".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(20), + metadata: None, + }), + ), + ( + "nlmsg_type".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTM_GETADDR".to_owned()), + metadata: None, + }), + ), + ( + "nlmsg_flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("NLM_F_REQUEST".to_owned()), + IntegerExpressionValue::NamedConst("NLM_F_DUMP".to_owned()), + ]), + metadata: None, + }), + ), + ( + "nlmsg_seq".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1694010548), + metadata: None, + }), + ), + ( + "nlmsg_pid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ), + ])), + Expression::Struct(HashMap::from([ + ( + "ifa_family".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_UNSPEC".to_owned()), + metadata: None, + }), + ), + ])), + ] + }, + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(20), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + Expression::Struct(HashMap::from([ + ( + "sa_family".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_NETLINK".to_owned()), + metadata: None, + }), + ), + ( + "nl_pid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ), + ( + "nl_groups".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(12), + metadata: None, + }), + ], + ret_val: 20 + }) + ); + } + + #[test] + fn test_invalid() { + let _ = simple_logger::SimpleLogger::new().init(); + + // Bogus output ('{{', note the missing field name) that strace 5.10 can generate + let res = + parse_line( + "57652 0.000071 sendto(19<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x38\\x34\\x38\\x36\\x39\\x32\\x5d>, {{len=20, type=0x16 /* NLMSG_??? */, flags=NLM_F_REQUEST|0x300, seq=1697715709, pid=0}, \"\\x00\\x00\\x00\\x00\"}, 20, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 20", + &[] + ); + // Give some leeway to the parser behavior, as long at it does not return Ok + match res { + Err(err) => { + assert_eq!(&err.to_string(), "Unable to extract struct member name"); + } + Ok(r) => { + assert_eq!(r, ParseResult::IgnoredLine); + } + } + } + + #[test] + fn test_bind() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "688129 0.000023 bind(4<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x34\\x31\\x38\\x34\\x35\\x32\\x32\\x5d>, {sa_family=AF_UNIX, sun_path=@\"\\x62\\x31\\x39\\x33\\x64\\x30\\x62\\x30\\x63\\x63\\x64\\x37\\x30\\x35\\x66\\x39\\x2f\\x62\\x75\\x73\\x2f\\x73\\x79\\x73\\x74\\x65\\x6d\\x63\\x74\\x6c\\x2f\"}, 34) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 688129, + rel_ts: 0.000023, + name: "bind".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: Some("socket:[4184522]".as_bytes().to_vec()) + }), + Expression::Struct(HashMap::from([ + ( + "sa_family".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_UNIX".to_owned()), + metadata: None, + }), + ), + ( + "sun_path".to_owned(), + Expression::Buffer(BufferExpression { + value: "b193d0b0ccd705f9/bus/systemctl/".as_bytes().to_vec(), + type_: BufferType::AbstractPath, + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(34), + metadata: None, + }), + ], + ret_val: 0 + }) + ); + + assert_eq!( + parse_line( + "132360 0.000022 bind(6<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x38\\x31\\x35\\x36\\x39\\x33\\x5d>, {sa_family=AF_INET, sin_port=htons(8025), sin_addr=inet_addr(\"\\x31\\x32\\x37\\x2e\\x30\\x2e\\x30\\x2e\\x31\")}, 16) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 132360, + rel_ts: 0.000022, + name: "bind".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(6), + metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 56, 49, 53, 54, 57, 51, 93]), + }), + Expression::Struct(HashMap::from([ + ( + "sa_family".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_INET".to_owned()), + metadata: None, + }), + ), + ( + "sin_port".to_owned(), + Expression::Macro { + name: "htons".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(8025), + metadata: None, + }), + ], + } + ), + ( + "sin_addr".to_owned(), + Expression::Macro { + name: "inet_addr".to_owned(), + args: vec![ + Expression::Buffer(BufferExpression { + value: vec![49, 50, 55, 46, 48, 46, 48, 46, 49], + type_: BufferType::Unknown, + }), + ], + } + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(16), + metadata: None, + }), + ], + ret_val: 0 + }) + ); + } + + #[test] + fn test_multiplication() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "85195 0.000038 prlimit64(0, RLIMIT_NOFILE, {rlim_cur=512*1024, rlim_max=512*1024}, NULL) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 85195, + rel_ts: 0.000038, + name: "prlimit64".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("RLIMIT_NOFILE".to_owned()), + metadata: None, + }), + Expression::Struct(HashMap::from([ + ( + "rlim_cur".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Multiplication(vec![ + IntegerExpressionValue::Literal(512), + IntegerExpressionValue::Literal(1024), + ]), + metadata: None, + }), + ), + ( + "rlim_max".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Multiplication(vec![ + IntegerExpressionValue::Literal(512), + IntegerExpressionValue::Literal(1024), + ]), + metadata: None, + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + ], + ret_val: 0 + }) + ); + } + + #[test] + fn test_epoll() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "114586 0.000075 epoll_ctl(3<\\x61\\x6e\\x6f\\x6e\\x5f\\x69\\x6e\\x6f\\x64\\x65\\x3a\\x5b\\x65\\x76\\x65\\x6e\\x74\\x70\\x6f\\x6c\\x6c\\x5d>, EPOLL_CTL_ADD, 4<\\x73\\x6f\\x63\\x6b\\x65\\x74\\x3a\\x5b\\x37\\x33\\x31\\x35\\x39\\x38\\x5d>, {events=EPOLLIN, data={u32=4, u64=4}}) = 0", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 114586, + rel_ts: 0.000075, + name: "epoll_ctl".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: Some(vec![97, 110, 111, 110, 95, 105, 110, 111, 100, 101, 58, 91, 101, 118, 101, 110, 116, 112, 111, 108, 108, 93]), + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLL_CTL_ADD".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 55, 51, 49, 53, 57, 56, 93]), + }), + Expression::Struct(HashMap::from([ + ( + "events".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLLIN".to_owned()), + metadata: None, + }), + ), + ( + "data".to_owned(), + Expression::Struct(HashMap::from([ + ( + "u32".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: None, + }), + ), + ( + "u64".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: None, + }), + ), + ])) + ), + ])), + ], + ret_val: 0 + }) + ); + + assert_eq!( + parse_line( + "3487 0.000130 epoll_pwait(4<\\x61\\x6e\\x6f\\x6e\\x5f\\x69\\x6e\\x6f\\x64\\x65\\x3a\\x5b\\x65\\x76\\x65\\x6e\\x74\\x70\\x6f\\x6c\\x6c\\x5d>, [{events=EPOLLOUT, data={u32=833093633, u64=9163493471957811201}}, {events=EPOLLOUT, data={u32=800587777, u64=9163493471925305345}}], 128, 0, NULL, 0) = 2", + &[] + ).unwrap(), + ParseResult::Syscall(Syscall { + pid: 3487, + rel_ts: 0.000130, + name: "epoll_pwait".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: Some(vec![0x61, 0x6e, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x3a, 0x5b, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x70, 0x6f, 0x6c, 0x6c, 0x5d]), + }), + Expression::Collection { + complement: false, + values: vec![ + Expression::Struct(HashMap::from([ + ( + "events".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), + metadata: None, + }), + ), + ( + "data".to_owned(), + Expression::Struct(HashMap::from([ + ( + "u32".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(833093633), + metadata: None, + }), + ), + ( + "u64".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(9163493471957811201), + metadata: None, + }), + ), + ])) + ), + ])), + Expression::Struct(HashMap::from([ + ( + "events".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), + metadata: None, + }), + ), + ( + "data".to_owned(), + Expression::Struct(HashMap::from([ + ( + "u32".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(800587777), + metadata: None, + }), + ), + ( + "u64".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(9163493471925305345), + metadata: None, + }), + ), + ])) + ), + ])), + ] + }, + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(128), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ], + ret_val: 2 + }) + ); + } + + #[test] + fn test_interleave() { + let _ = simple_logger::SimpleLogger::new().init(); + + let lines = Cursor::new( + "1 0.000001 select(4, [3], NULL, NULL, NULL +2 0.000002 clock_gettime(CLOCK_REALTIME, {tv_sec=1130322148, tv_nsec=3977000}) = 0 +1 0.000003 <... select resumed> ) = 1 (in [3])" + .as_bytes() + .to_vec(), + ); + let parser = LogParser::new(Box::new(lines)).unwrap(); + let syscalls: Vec = parser.into_iter().collect::>().unwrap(); + + assert_eq!( + syscalls, + vec![ + Syscall { + pid: 2, + rel_ts: 0.000002, + name: "clock_gettime".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("CLOCK_REALTIME".to_owned()), + metadata: None, + }), + Expression::Struct(HashMap::from([ + ( + "tv_sec".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1130322148), + metadata: None, + }), + ), + ( + "tv_nsec".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3977000), + metadata: None, + }), + ), + ])), + ], + ret_val: 0 + }, + Syscall { + pid: 1, + rel_ts: 0.000003, + name: "select".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: None, + }), + Expression::Collection { + complement: false, + values: vec![Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None, + })] + }, + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + ], + ret_val: 1 + } + ] + ); + } + + #[test] + fn test_getpid() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line("641342 0.000022 getpid() = 641314", &[]).unwrap(), + ParseResult::Syscall(Syscall { + pid: 641342, + rel_ts: 0.000022, + name: "getpid".to_owned(), + args: vec![], + ret_val: 641314 + }) + ); + } + + #[test] + fn test_execve() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "1234 0.000000 execve(\"\\x12\", [\"\\x34\"], [\"\\x56\"]) = 0", + &[] + ) + .unwrap(), + ParseResult::Syscall(Syscall { + pid: 1234, + rel_ts: 0.000000, + name: "execve".to_owned(), + args: vec![ + Expression::Buffer(BufferExpression { + value: vec![18], + type_: BufferType::Unknown + }), + Expression::Collection { + complement: false, + values: vec![Expression::Buffer(BufferExpression { + value: vec![0x34], + type_: BufferType::Unknown + })] + }, + Expression::Collection { + complement: false, + values: vec![Expression::Buffer(BufferExpression { + value: vec![0x56], + type_: BufferType::Unknown + })] + }, + ], + ret_val: 0 + }) + ); + } + + #[cfg_attr( + feature = "parser-regex", + ignore = "in/out arguments not supported by regex parser" + )] + #[test] + fn test_in_out_args() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "664767 0.000014 clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f3b7c000990, parent_tid=0x7f3b7c000990, exit_signal=0, stack=0x7f3b7b800000, stack_size=0x7ff880, tls=0x7f3b7c0006c0} => {parent_tid=[664773]}, 88) = 664773", + &[] + ) + .unwrap(), + ParseResult::Syscall(Syscall { + pid: 664767, + rel_ts: 0.000014, + name: "clone3".to_owned(), + args: vec![ + Expression::Struct(HashMap::from([ + ( + "flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("CLONE_VM".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_FS".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_FILES".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_SIGHAND".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_THREAD".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_SYSVSEM".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_SETTLS".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_PARENT_SETTID".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_CHILD_CLEARTID".to_owned()), + ]), + metadata: None + }), + ), + ( + "child_tid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f3b7c000990), + metadata: None, + }), + ), + ( + "parent_tid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f3b7c000990), + metadata: None, + }), + ), + ( + "exit_signal".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ), + ( + "stack".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f3b7b800000), + metadata: None, + }), + ), + ( + "stack_size".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7ff880), + metadata: None, + }), + ), + ( + "tls".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f3b7c0006c0), + metadata: None, + }), + ), + ])), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(88), + metadata: None, + }), + ], + ret_val: 664773 + }) + ); + } + + #[cfg_attr( + feature = "parser-regex", + ignore = "named arguments not supported by regex parser" + )] + #[test] + fn test_named_args() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "714433 0.000035 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f3f3c2f5090) = 714434", + &[] + ) + .unwrap(), + ParseResult::Syscall(Syscall { + pid: 714433, + rel_ts: 0.000035, + name: "clone".to_owned(), + args: vec![ + Expression::Struct(HashMap::from([ + ( + "child_stack".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("NULL".to_owned()), + metadata: None, + }), + ), + ( + "flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("CLONE_CHILD_CLEARTID".to_owned()), + IntegerExpressionValue::NamedConst("CLONE_CHILD_SETTID".to_owned()), + IntegerExpressionValue::NamedConst("SIGCHLD".to_owned()), + ]), + metadata: None + }), + ), + ( + "child_tidptr".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0x7f3f3c2f5090), + metadata: None, + }), + ), + ])), + ], + ret_val: 714434 + }) + ); + } + + #[cfg_attr( + feature = "parser-regex", + ignore = "bit shifts are broken with regex parser" + )] + #[test] + fn test_bitshift() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "794046 0.000024 capset({version=_LINUX_CAPABILITY_VERSION_3, pid=0}, {effective=1<" ~ EOI } +syscall_line_end = { SOI ~ pid ~ " "+ ~ rel_ts ~ " <... " ~ name ~ " resumed> ) " ~ " "* ~ "= " ~ ret_val ~ EOI } + + +// Main line tokens + +pid = { ASCII_DIGIT+ } + +rel_ts = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ } + +name = { symbol_name } + +arguments = { + #named = named_arguments | + #unnamed = unnamed_arguments +} + +ret_val = { int ~ (" " ~ ANY*)? } + + +// Subrules + +named_affectation = { symbol_name ~ "=" ~ expression } +named_arguments = { named_affectation ~ (", " ~ named_affectation)* } +unnamed_arguments = { (argument ~ (", " ~ argument)*)? } + +argument = { + #in_out = in_out_argument | + #in = in_argument +} +in_argument = { expression } +in_out_argument = { + ("[" ~ expression ~ " => " ~ expression ~ "]" ) | + (expression ~ " => " ~ expression) +} + +expression = { + ( + #macro = macro | + #int = int | + #struct = struct | + #buf = buffer | + #set = set | + #array = array + ) ~ + comment? +} + +symbol_name = { (ASCII_ALPHA | ASCII_DIGIT | "_")+ } +comment = { " /* " ~ (!" */" ~ ANY)+ ~ " */" } + +int = { + #or = or | + #mul = multiplication | + #lshift = left_bit_shift | + #lit = literal_int | + #named = named_constant +} +literal_int = { + #oct = literal_int_oct | + #hex = literal_int_hex | + #dec = literal_int_dec +} +or = { named_constant ~ ("|" ~ int)+ } +left_bit_shift = { literal_int ~ "<<" ~ int } +multiplication = { literal_int ~ ("*" ~ int)+ } +named_constant = { symbol_name ~ metadata? } + +literal_int_oct = { "0" ~ ASCII_OCT_DIGIT+ } +literal_int_hex = { "0x" ~ ASCII_HEX_DIGIT+ } +literal_int_dec = { literal_int_dec_val ~ metadata? } +literal_int_dec_val = { "-"? ~ ASCII_DIGIT+ } +metadata = { + "<" ~ + buffer_byte+ ~ + ">" ~ + ( + "(" ~ + ASCII_ALPHA_LOWER+ ~ + ")" + )? +} + +struct = { + "{" ~ + ( + struct_member ~ + (", " ~ struct_member)* + )? ~ + ", ..."? ~ + "}" +} +struct_member = { + #member_named = named_affectation | + #macro_addr = macro +} + +buffer = { + "@"? ~ + "\"" ~ + ( + buffer_byte+ | + buffer_char+ + )? ~ + "\"" ~ + "..."? +} +buffer_char = { !"\"" ~ ANY } +buffer_byte = { "\\x" ~ ASCII_HEX_DIGIT{2} } + +macro = { symbol_name ~ "(" ~ macro_arguments ~ ")" } +macro_arguments = { (macro_argument ~ (", " ~ macro_argument)*)? } +macro_argument = { + #addr = pseudo_addr | + #expr = expression +} +pseudo_addr = { "&" ~ symbol_name } + +array = { "[" ~ (expression ~ (", " ~ expression)*)? ~ "]" } + +set = { + "~"? ~ + "[" ~ + ( + int ~ + ( + " " ~ + int + )* + )? ~ + "]" +} + diff --git a/src/strace/parser/peg.rs b/src/strace/parser/peg.rs new file mode 100644 index 0000000..1c321e1 --- /dev/null +++ b/src/strace/parser/peg.rs @@ -0,0 +1,406 @@ +//! PEG based strace output parser + +use itertools::Itertools; +use pest::iterators::Pair; +use pest::Parser as _; + +use crate::strace::{ + BufferExpression, BufferType, Expression, IntegerExpression, IntegerExpressionValue, Syscall, +}; + +use super::ParseResult; + +#[derive(pest_derive::Parser)] +#[grammar = "strace/parser/peg.pest"] +struct PegParser; + +pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { + let pair = match PegParser::parse(Rule::syscall_line, line) { + Err(_) => return Ok(ParseResult::IgnoredLine), + Ok(mut p) => pair_descend(p.next().unwrap(), 1).unwrap(), + }; + log::trace!("{:#?}", pair); + match pair.as_node_tag() { + Some("complete") => Ok(ParseResult::Syscall(pair.try_into()?)), + Some("start") => Ok(ParseResult::UnfinishedSyscall(pair.try_into()?)), + Some("end") => { + let sc_end: Syscall = pair.try_into()?; + let (unfinished_index, sc_start) = unfinished_syscalls + .iter() + .enumerate() + .find(|(_i, sc)| (sc.name == sc_end.name) && (sc.pid == sc_end.pid)) + .ok_or_else(|| anyhow::anyhow!("Unabled to find first part of syscall"))?; + let sc_merged = Syscall { + // Update return val and timestamp (to get return time instead of call time) + ret_val: sc_end.ret_val, + rel_ts: sc_end.rel_ts, + ..sc_start.clone() + }; + Ok(ParseResult::FinishedSyscall { + sc: sc_merged, + unfinished_index, + }) + } + _ => anyhow::bail!("Unhandled pair: {pair:?}"), + } +} + +fn pair_descend(pair: Pair<'_, Rule>, levels: usize) -> anyhow::Result> { + let mut pair = pair; + let mut levels = levels; + while levels > 0 { + if let Some(below_pair) = pair.clone().into_inner().next() { + pair = below_pair; + } else { + anyhow::bail!("Missing child node for {pair:?}"); + } + levels -= 1; + } + Ok(pair) +} + +impl TryFrom> for Expression { + type Error = anyhow::Error; + + fn try_from(pair: Pair) -> Result { + match pair.as_node_tag() { + Some("int") => Ok(Expression::Integer(pair_descend(pair, 1)?.try_into()?)), + Some("buf") => Ok(Expression::Buffer(pair.try_into()?)), + Some("struct") => Ok(Expression::Struct( + pair.into_inner() + .map(|m| -> anyhow::Result<_> { + let m = pair_descend(m, 1)?; + match m.as_node_tag() { + Some("member_named") => { + let (name_pair, val_pair) = + m.into_inner().next_tuple().ok_or_else(|| { + anyhow::anyhow!("Missing struct member name/value") + })?; + let val: Expression = pair_descend(val_pair, 1)?.try_into()?; + Ok((name_pair.as_str().to_owned(), val)) + } + Some("macro_addr") => { + let macro_: Expression = m.try_into()?; + let member_name = if let Expression::Macro { args, .. } = ¯o_ { + args.iter() + .find_map(|a| { + if let Expression::DestinationAddress(n) = a { + Some(n.to_owned()) + } else { + None + } + }) + .ok_or_else(|| { + anyhow::anyhow!("Missing macro destination address") + })? + } else { + anyhow::bail!("Missing macro"); + }; + Ok((member_name, macro_)) + } + _ => anyhow::bail!("Unhandled pair: {m:?}"), + } + }) + .collect::>()?, + )), + Some("macro") | Some("macro_addr") => { + let (name, args) = pair + .into_inner() + .next_tuple() + .ok_or_else(|| anyhow::anyhow!("Missing macro child nodes"))?; + Ok(Expression::Macro { + name: name.as_str().to_owned(), + args: args + .into_inner() + .map(|p| { + let p = pair_descend(p, 1)?; + match p.as_node_tag() { + Some("expr") => Expression::try_from(pair_descend(p, 1)?), + Some("addr") => Ok(Expression::DestinationAddress( + pair_descend(p, 1)?.as_str().to_owned(), + )), + _ => anyhow::bail!("Unhandled pair: {p:?}"), + } + }) + .collect::>()?, + }) + } + Some("array") => Ok(Expression::Collection { + complement: false, + values: pair + .into_inner() + .map(|p| Expression::try_from(pair_descend(p, 1)?)) + .collect::>()?, + }), + Some("set") => { + let complement = pair.as_str().starts_with('~'); + Ok(Expression::Collection { + complement, + values: pair + .into_inner() + .map(|p| -> anyhow::Result<_> { + Ok(Expression::Integer(IntegerExpression::try_from( + pair_descend(p, 1)?, + )?)) + }) + .collect::>()?, + }) + } + _ => anyhow::bail!("Unhandled pair: {pair:?}"), + } + } +} + +impl TryFrom> for BufferExpression { + type Error = anyhow::Error; + + fn try_from(pair: Pair) -> Result { + let type_ = if pair.as_str().starts_with('@') { + BufferType::AbstractPath + } else { + BufferType::Unknown + }; + Ok(BufferExpression { + value: pair + .into_inner() + .map(|b| { + let s = b.as_str(); + if let Some(s2) = s.strip_prefix("\\x") { + debug_assert_eq!(s.len(), 4); + u8::from_str_radix(s2, 16).map_err(anyhow::Error::new) + } else { + debug_assert_eq!(s.len(), 1); + Ok(s.as_bytes()[0]) + } + }) + .collect::>()?, + type_, + }) + } +} + +/// Helper to parse 'literal' pair +fn lit_pair(pair: Pair) -> anyhow::Result { + let (val, metadata) = match pair.as_node_tag() { + Some("oct") => (i128::from_str_radix(pair.as_str(), 8)?, None), + Some("hex") => ( + pair.as_str() + .strip_prefix("0x") + .map(|s| i128::from_str_radix(s, 16)) + .unwrap()?, + None, + ), + Some("dec") => { + let mut children = pair.into_inner(); + let val_pair = children + .next() + .ok_or_else(|| anyhow::anyhow!("Missing dec value node"))?; + let mut metadata_pair = children.next(); + // TODO use Option::take_if if it gets stable + if metadata_pair + .as_ref() + .is_some_and(|p| p.as_node_tag() == Some("com")) + { + metadata_pair = None; + } + ( + val_pair.as_str().parse()?, + metadata_pair + .map(|p| BufferExpression::try_from(p).map(|e| e.value)) + .map_or(Ok(None), |v| v.map(Some))?, + ) + } + _ => anyhow::bail!("Unhandled pair: {pair:?}"), + }; + Ok(IntegerExpression { + value: IntegerExpressionValue::Literal(val), + metadata, + }) +} + +impl TryFrom> for IntegerExpression { + type Error = anyhow::Error; + + fn try_from(pair: Pair) -> Result { + match pair.as_node_tag() { + Some("lit") => { + let pair = pair_descend(pair, 1)?; + lit_pair(pair) + } + Some("named") => { + let mut children = pair.into_inner(); + let val_pair = children + .next() + .ok_or_else(|| anyhow::anyhow!("Missing named const name"))?; + let metadata_pair = children.next(); + Ok(IntegerExpression { + value: IntegerExpressionValue::NamedConst(val_pair.as_str().to_owned()), + metadata: metadata_pair + .map(|p| BufferExpression::try_from(p).map(|e| e.value)) + .map_or(Ok(None), |v| v.map(Some))?, + }) + } + Some("or") => { + let mut children = pair.into_inner(); + let mut or_elems = Vec::with_capacity(children.len()); + or_elems.push(IntegerExpressionValue::NamedConst( + children + .next() + .ok_or_else(|| anyhow::anyhow!("Missing or first node"))? + .as_str() + .to_owned(), + )); + or_elems.extend( + children + .map(|c| IntegerExpression::try_from(pair_descend(c, 1)?).map(|e| e.value)) + .collect::, _>>()? + .into_iter() + .flat_map(|e| { + // Flatten or child expressions + if let IntegerExpressionValue::BinaryOr(es) = e { + es.into_iter() + } else { + vec![e].into_iter() + } + }), + ); + Ok(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(or_elems), + metadata: None, + }) + } + Some("mul") => { + let mut children = pair.into_inner(); + let mut mul_elems = Vec::with_capacity(children.len()); + mul_elems.push( + lit_pair(pair_descend( + children + .next() + .ok_or_else(|| anyhow::anyhow!("Missing multiplication first node"))?, + 1, + )?)? + .value, + ); + mul_elems.append( + &mut children + .map(|c| IntegerExpression::try_from(pair_descend(c, 1)?).map(|e| e.value)) + .collect::, _>>()?, + ); + Ok(IntegerExpression { + value: IntegerExpressionValue::Multiplication(mul_elems), + metadata: None, + }) + } + Some("lshift") => { + let (left_pair, right_pair) = pair + .into_inner() + .next_tuple() + .ok_or_else(|| anyhow::anyhow!("Missing bit shift nodes"))?; + let left: IntegerExpression = lit_pair(pair_descend(left_pair, 1)?)?; + let right: IntegerExpression = pair_descend(right_pair, 1)?.try_into()?; + Ok(IntegerExpression { + value: IntegerExpressionValue::LeftBitShift { + bits: Box::new(left.value), + shift: Box::new(right.value), + }, + metadata: None, + }) + } + _ => anyhow::bail!("Unhandled pair: {pair:?}"), + } + } +} + +impl TryFrom> for Syscall { + type Error = anyhow::Error; + + fn try_from(pair: Pair) -> Result { + let pair_tag = pair + .as_node_tag() + .ok_or_else(|| anyhow::anyhow!("Unhandled pair: {pair:?}"))? + .to_owned(); + let mut subpairs = pair.into_inner(); + // Note if the grammar is correct, we should *never* panic below + let pid = subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing pid node"))? + .as_str() + .parse()?; + let rel_ts = subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing ts node"))? + .as_str() + .parse()?; + let name = subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing name node"))? + .as_str() + .to_owned(); + + let args = if pair_tag.as_str() != "end" { + let args_pair = subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing arguments node"))?; + let args_pair = pair_descend(args_pair, 1)?; + match args_pair.as_node_tag() { + Some("unnamed") => args_pair + .into_inner() + .map(|p| { + let p = pair_descend(p, 1)?; + match p.as_node_tag() { + Some("in") => pair_descend(p, 2)?.try_into(), + Some("in_out") => { + // Only take the 'in' part, ignore the rest + pair_descend(p, 2)?.try_into() + } + _ => anyhow::bail!("Unhandled pair: {p:?}"), + } + }) + .collect::>()?, + Some("named") => { + // Handle name arguments as a single struct + vec![Expression::Struct( + args_pair + .into_inner() + .map(|p| -> anyhow::Result<_> { + let (name, val) = p.into_inner().next_tuple().ok_or_else(|| { + anyhow::anyhow!("Missing name arguments nodes") + })?; + Ok((name.as_str().to_owned(), pair_descend(val, 1)?.try_into()?)) + }) + .collect::>()?, + )] + } + _ => anyhow::bail!("Unhandled pair: {args_pair:?}"), + } + } else { + vec![] + }; + let ret_val = match pair_tag.as_str() { + "complete" | "end" => { + let ret_val_pair = pair_descend( + subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing return value node"))?, + 2, + )?; + if let IntegerExpressionValue::Literal(val) = + IntegerExpression::try_from(ret_val_pair)?.value + { + val + } else { + anyhow::bail!("Return value is not a literal int"); + } + } + "start" => i128::MAX, + tag => anyhow::bail!("Unhandled pair tag: {tag:?}"), + }; + Ok(Syscall { + pid, + rel_ts, + name, + args, + ret_val, + }) + } +} diff --git a/src/strace/parser/regex.rs b/src/strace/parser/regex.rs new file mode 100644 index 0000000..3ab09ad --- /dev/null +++ b/src/strace/parser/regex.rs @@ -0,0 +1,477 @@ +//! Regex based strace output parser + +use std::collections::HashMap; +use std::str; + +use lazy_static::lazy_static; + +use crate::strace::{ + BufferExpression, BufferType, Expression, IntegerExpression, IntegerExpressionValue, Syscall, + SyscallRetVal, +}; + +use super::ParseResult; + +// See also: +// - https://github.com/rbtcollins/strace-parse.rs/blob/master/src/lib.rs for a nom based parsing approach +// - https://github.com/wookietreiber/strace-analyzer/blob/master/src/analysis.rs for a "1 regex per syscall" approach + +lazy_static! { + static ref LINE_REGEX: regex::Regex = regex::RegexBuilder::new( + r" +^ +(?[0-9]+)\ + +(?[0-9]+\.[0-9]+)\ + +( + ( + (?[a-z0-9_]+) + \( + (?.+)? + ) + | + ( + <\.{3}\ + (?[a-z0-9_]+) + \ resumed>\ + ) +) +( + ( + + \) + \ +=\ + ( + ( + 0x + (?[a-f0-9]+) + ) + | + ( + (?[-0-9]+) + ( + < + (?[^>]+) + > + ( + # (deleted) + \( + [^\)]+ + \) + )? + )? + ) + ) + ( + (\ E[A-Z]+\ \(.*\)) # errno + | + (\ \(.*\)) # interpretation like 'Timeout' + )? + ) + | + (?\ ) +) +$ +" + ) + .ignore_whitespace(true) + .build() + .unwrap(); + static ref ARG_REGEX: regex::Regex = regex::RegexBuilder::new( + r#" +( + ( + (? + [a-zA-Z0-9_]+ + \( + [^\)]+ + \) + ) + ) + | + ( + (? + [0-9x]+ + ( + \* + [0-9x]+ + )+ + ) + ) + | + ( + (?[-0-9]+) + ( + < + (?[^>]+) + > + ( + # (deleted) + \( + [^\)]+ + \) + )? + )? + (\ \/\*\ [A-Za-z0-9_\-\ \+\.\:\?]+\ \*\/)? + ) + | + ( + 0x + (?[a-f0-9]+) + (\ \/\*\ [A-Za-z0-9_\-\ \+\.\:\?]+ \*\/)? + ) + | + ( + \[ + (?[^\]]+) + \] + ) + | + ( + (?[A-Z_|~\[\]\ 0-9<]+) + ( + < + (?[^>]+) + > + )? + ) + | + ( + \{ + (? + ( + [a-z0-9_]+= + ( + ([^\{]+) + | + (\{[^\{]*\}) + ) + ,\ + )* + ( + ( + [a-z0-9_]+= + ( + ([^\{]+) + | + (\{[^\{]*\}) + ) + ) + | + \.{3} + )? + ) + \} + ) + | + ( + (?@)? + " + (?[^"]*) + " + ) +) +( + (,\ ) + | + [\}\]] + | + $ +) +"# + ) + .ignore_whitespace(true) + .build() + .unwrap(); + static ref BYTE_REGEX: regex::bytes::Regex = + regex::bytes::Regex::new(r"\\x[0-9a-f]{2}").unwrap(); +} + +pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { + match LINE_REGEX.captures(line) { + Some(caps) => { + let pid = caps + .name("pid") + .unwrap() + .as_str() + .parse() + .map_err(|e| anyhow::Error::new(e).context("Failed to parse pid"))?; + + let rel_ts = caps + .name("rel_ts") + .unwrap() + .as_str() + .parse() + .map_err(|e| anyhow::Error::new(e).context("Failed to parse timestamp"))?; + + if let Some(name) = caps.name("name") { + let name = name.as_str().to_owned(); + + let args = if let Some(arguments) = caps.name("arguments") { + ARG_REGEX + .captures_iter(arguments.as_str()) + .map(|a| parse_argument(&a)) + .collect::>()? + } else { + Vec::new() + }; + + let ret_val = if let Some(ret_val_int) = caps.name("ret_val_int") { + let s = ret_val_int.as_str(); + s.parse().map_err(|e| { + anyhow::Error::new(e) + .context(format!("Failed to parse integer return value: {s:?}")) + })? + } else if let Some(ret_val_hex) = caps.name("ret_val_hex") { + let s = ret_val_hex.as_str(); + SyscallRetVal::from_str_radix(s, 16).map_err(|e| { + anyhow::Error::new(e) + .context(format!("Failed to parse hexadecimal return value: {s:?}")) + })? + } else if caps.name("unfinished").is_some() { + return Ok(ParseResult::UnfinishedSyscall(Syscall { + pid, + rel_ts, + name, + args, + ret_val: SyscallRetVal::MAX, // Set dummy value we will replace + })); + } else { + unreachable!(); + }; + + let sc = Syscall { + pid, + rel_ts, + name, + args, + ret_val, + }; + Ok(ParseResult::Syscall(sc)) + } else if let Some(name_resumed) = caps.name("name_resumed").map(|c| c.as_str()) { + let ret_val = if let Some(ret_val_int) = caps.name("ret_val_int") { + let s = ret_val_int.as_str(); + s.parse().map_err(|e| { + anyhow::Error::new(e) + .context(format!("Failed to parse integer return value: {s:?}")) + })? + } else if let Some(ret_val_hex) = caps.name("ret_val_hex") { + let s = ret_val_hex.as_str(); + SyscallRetVal::from_str_radix(s, 16).map_err(|e| { + anyhow::Error::new(e) + .context(format!("Failed to parse hexadecimal return value: {s:?}")) + })? + } else { + unreachable!(); + }; + + let (unfinished_index, unfinished_sc) = unfinished_syscalls + .iter() + .enumerate() + .find(|(_i, sc)| (sc.name == name_resumed) && (sc.pid == pid)) + .ok_or_else(|| anyhow::anyhow!("Unabled to find first part of syscall"))?; + let sc = Syscall { + // Update return val and timestamp (to get return time instead of call time) + ret_val, + rel_ts, + ..unfinished_sc.clone() + }; + Ok(ParseResult::FinishedSyscall { + sc, + unfinished_index, + }) + } else { + unreachable!(); + } + } + None => Ok(ParseResult::IgnoredLine), + } +} + +fn parse_argument(caps: ®ex::Captures) -> anyhow::Result { + if let Some(int) = caps.name("int") { + let metadata = caps + .name("int_metadata") + .map(|m| parse_buffer(m.as_str())) + .map_or(Ok(None), |v| v.map(Some))?; + Ok(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(int.as_str().parse()?), + metadata, + })) + } else if let Some(hex) = caps.name("int_hex") { + Ok(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(i128::from_str_radix(hex.as_str(), 16)?), + metadata: None, + })) + } else if let Some(const_) = caps.name("const_expr") { + let const_str = const_.as_str(); + if (const_str.ends_with(']')) && (const_str.starts_with('[') || const_str.starts_with("~[")) + { + assert!(!const_str.contains('|')); + let complement = const_str.starts_with('~'); + let values_str = + const_str[if complement { 2 } else { 1 }..const_str.len() - 1].to_owned(); + let values = if values_str.is_empty() { + vec![] + } else { + values_str + .split(' ') + .map(|v| { + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst(v.to_owned()), + metadata: None, + }) + }) + .collect() + }; + Ok(Expression::Collection { complement, values }) + } else { + let tokens = const_str.split('|').collect::>(); + if tokens.len() == 1 { + let metadata = caps + .name("const_expr_metadata") + .map(|m| parse_buffer(m.as_str())) + .map_or(Ok(None), |v| v.map(Some))?; + Ok(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst(tokens[0].to_owned()), + metadata, + })) + } else { + let int_tokens = tokens + .into_iter() + .map(|t| -> anyhow::Result<_> { + if let Some(one_shift) = t.strip_prefix("1<<") { + Ok(IntegerExpressionValue::LeftBitShift { + bits: Box::new(IntegerExpressionValue::Literal(1)), + shift: Box::new(IntegerExpressionValue::NamedConst( + one_shift.to_owned(), + )), + }) + } else if t.starts_with("0") { + Ok(IntegerExpressionValue::Literal(i128::from_str_radix(t, 8)?)) + } else { + Ok(IntegerExpressionValue::NamedConst(t.to_owned())) + } + }) + .collect::>()?; + Ok(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(int_tokens), + metadata: None, + })) + } + } + } else if let Some(struct_) = caps.name("struct") { + let mut members = HashMap::new(); + let mut struct_ = struct_.as_str().to_owned(); + while !struct_.is_empty() { + // dbg!(&struct_); + if struct_ == "..." { + // This should not append with our strace options, but still does, strace bug? + log::warn!("Truncated structure in strace output"); + break; + } + let (k, v) = struct_ + .split_once('=') + .ok_or_else(|| anyhow::anyhow!("Unable to extract struct member name"))?; + // dbg!(&k); + // dbg!(&v); + let caps = ARG_REGEX + .captures(v) + .ok_or_else(|| anyhow::anyhow!("Unable to parse struct member value"))?; + let v = parse_argument(&caps)?; + // dbg!(&v); + members.insert(k.to_owned(), v); + #[allow(clippy::assigning_clones)] + { + struct_ = + struct_[k.len() + 1 + caps.get(0).unwrap().len()..struct_.len()].to_owned(); + } + } + Ok(Expression::Struct(members)) + } else if let Some(array) = caps.name("array") { + let members = ARG_REGEX + .captures_iter(array.as_str()) + .map(|a| parse_argument(&a)) + .collect::>()?; + Ok(Expression::Collection { + complement: false, + values: members, + }) + } else if let Some(buf) = caps.name("buf") { + let buf = parse_buffer(buf.as_str())?; + let type_ = if caps.name("buf_abstract_path").is_some() { + BufferType::AbstractPath + } else { + BufferType::Unknown + }; + Ok(Expression::Buffer(BufferExpression { value: buf, type_ })) + } else if let Some(macro_) = caps.name("macro") { + let (name, args) = macro_.as_str().split_once('(').unwrap(); + let args = args[..args.len() - 1].to_owned(); + let args = ARG_REGEX + .captures_iter(&args) + .map(|a| parse_argument(&a)) + .collect::>()?; + Ok(Expression::Macro { + name: name.to_owned(), + args, + }) + } else if let Some(multiplication) = caps.name("multiplication") { + let args = multiplication + .as_str() + .split('*') + .map(|a| -> anyhow::Result { + let arg = ARG_REGEX + .captures(a) + .ok_or_else(|| anyhow::anyhow!("Unexpected multiplication argument {a:?}"))?; + match parse_argument(&arg)? { + Expression::Integer(IntegerExpression { value, .. }) => Ok(value), + _ => Err(anyhow::anyhow!("Unexpected multiplication argument {a:?}")), + } + }) + .collect::>()?; + Ok(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Multiplication(args), + metadata: None, + })) + } else { + unreachable!("Argument has no group match") + } +} + +fn parse_buffer(s: &str) -> anyhow::Result> { + // Parse and replace '\x12' escaped bytes + let buf = BYTE_REGEX + .replace_all(s.as_bytes(), |cap: ®ex::bytes::Captures| { + let byte_match = cap.get(0).unwrap().as_bytes(); + let byte = u8::from_str_radix(str::from_utf8(&byte_match[2..]).unwrap(), 16).unwrap(); + vec![byte] + }) + .into_owned(); + Ok(buf) +} + +#[cfg(all(feature = "nightly", test))] +mod benchs { + extern crate test; + + use super::*; + + use std::iter; + + use test::Bencher; + + #[bench] + fn bench_parse_buffer(b: &mut Bencher) { + let s = format!( + "\"{}\"", + iter::repeat_with(|| format!("\\x{:02x}", fastrand::u8(..))) + .take(512) + .collect::>() + .join("") + ); + + b.iter(|| { + parse_buffer(&s).unwrap(); + }); + } +} diff --git a/src/summarize.rs b/src/summarize.rs index 79f521d..8696342 100644 --- a/src/summarize.rs +++ b/src/summarize.rs @@ -7,7 +7,9 @@ use std::path::{Path, PathBuf}; use lazy_static::lazy_static; -use crate::strace::{BufferType, IntegerExpression, Syscall, SyscallArg}; +use crate::strace::{ + BufferExpression, BufferType, Expression, IntegerExpression, IntegerExpressionValue, Syscall, +}; use crate::systemd::{SocketFamily, SocketProtocol}; /// A high level program runtime action @@ -191,13 +193,13 @@ fn is_fd_pseudo_path(path: &[u8]) -> bool { /// Extract path for socket address structure if it's a non abstract one fn socket_address_uds_path( - members: &HashMap, + members: &HashMap, syscall: &Syscall, ) -> Option { - if let Some(SyscallArg::Buffer { + if let Some(Expression::Buffer(BufferExpression { value: b, type_: BufferType::Unknown, - }) = members.get("sun_path") + })) = members.get("sun_path") { resolve_path(&PathBuf::from(OsStr::from_bytes(b)), None, syscall) } else { @@ -230,11 +232,11 @@ where flags_idx, }) => { let (mut path, flags) = if let ( - Some(SyscallArg::Buffer { + Some(Expression::Buffer(BufferExpression { value: b, type_: BufferType::Unknown, - }), - Some(SyscallArg::Integer { value: e, .. }), + })), + Some(Expression::Integer(IntegerExpression { value: e, .. })), ) = (syscall.args.get(*path_idx), syscall.args.get(*flags_idx)) { @@ -270,14 +272,14 @@ where flags_idx, }) => { let (mut path_src, mut path_dst) = if let ( - Some(SyscallArg::Buffer { + Some(Expression::Buffer(BufferExpression { value: b1, type_: BufferType::Unknown, - }), - Some(SyscallArg::Buffer { + })), + Some(Expression::Buffer(BufferExpression { value: b2, type_: BufferType::Unknown, - }), + })), ) = ( syscall.args.get(*path_src_idx), syscall.args.get(*path_dst_idx), @@ -300,8 +302,10 @@ where }; let exchange = if let Some(flags_idx) = flags_idx { - let flags = if let Some(SyscallArg::Integer { value: flags, .. }) = - syscall.args.get(*flags_idx) + let flags = if let Some(Expression::Integer(IntegerExpression { + value: flags, + .. + })) = syscall.args.get(*flags_idx) { flags } else { @@ -340,10 +344,10 @@ where relfd_idx, path_idx, }) => { - let mut path = if let Some(SyscallArg::Buffer { + let mut path = if let Some(Expression::Buffer(BufferExpression { value: b, type_: BufferType::Unknown, - }) = syscall.args.get(*path_idx) + })) = syscall.args.get(*path_idx) { PathBuf::from(OsStr::from_bytes(b)) } else { @@ -358,11 +362,11 @@ where } Some(SyscallInfo::Network { sockaddr_idx }) => { let (af, addr) = - if let Some(SyscallArg::Struct(members)) = syscall.args.get(*sockaddr_idx) { - let af = if let Some(SyscallArg::Integer { - value: IntegerExpression::NamedConst(af), + if let Some(Expression::Struct(members)) = syscall.args.get(*sockaddr_idx) { + let af = if let Some(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst(af), .. - }) = members.get("sa_family") + })) = members.get("sa_family") { af } else { @@ -385,10 +389,10 @@ where } if name == "bind" { - let fd = if let Some(SyscallArg::Integer { - value: IntegerExpression::Literal(fd), + let fd = if let Some(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(fd), .. - }) = syscall.args.first() + })) = syscall.args.first() { fd } else { @@ -406,7 +410,9 @@ where } } Some(SyscallInfo::SetScheduler) => { - let policy = if let Some(SyscallArg::Integer { value, .. }) = syscall.args.get(1) { + let policy = if let Some(Expression::Integer(IntegerExpression { value, .. })) = + syscall.args.get(1) + { value } else { anyhow::bail!("Unexpected args for {}: {:?}", name, syscall.args); @@ -416,10 +422,10 @@ where } } Some(SyscallInfo::Socket) => { - let af = if let Some(SyscallArg::Integer { - value: IntegerExpression::NamedConst(af), + let af = if let Some(Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst(af), .. - }) = syscall.args.first() + })) = syscall.args.first() { af.to_owned() } else { @@ -428,7 +434,9 @@ where actions.push(ProgramAction::NetworkActivity { af }); let proto_flags = - if let Some(SyscallArg::Integer { value, .. }) = syscall.args.get(1) { + if let Some(Expression::Integer(IntegerExpression { value, .. })) = + syscall.args.get(1) + { value.flags() } else { anyhow::bail!("Unexpected args for {}: {:?}", name, syscall.args); @@ -441,13 +449,14 @@ where } } Some(SyscallInfo::Mmap { prot_idx }) => { - let prot = if let Some(SyscallArg::Integer { value: prot, .. }) = - syscall.args.get(*prot_idx) - { - prot - } else { - anyhow::bail!("Unexpected args for {}: {:?}", name, syscall.args); - }; + let prot = + if let Some(Expression::Integer(IntegerExpression { value: prot, .. })) = + syscall.args.get(*prot_idx) + { + prot + } else { + anyhow::bail!("Unexpected args for {}: {:?}", name, syscall.args); + }; if prot.is_flag_set("PROT_WRITE") && prot.is_flag_set("PROT_EXEC") { actions.push(ProgramAction::WriteExecuteMemoryMapping); } @@ -496,26 +505,26 @@ mod tests { rel_ts: 0.000083, name: "renameat".to_owned(), args: vec![ - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AT_FDCWD".to_owned()), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AT_FDCWD".to_owned()), metadata: Some(temp_dir_src.path().as_os_str().as_bytes().to_vec()), - }, - SyscallArg::Buffer { + }), + Expression::Buffer(BufferExpression { value: "a".as_bytes().to_vec(), type_: BufferType::Unknown, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AT_FDCWD".to_owned()), + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AT_FDCWD".to_owned()), metadata: Some(temp_dir_dst.path().as_os_str().as_bytes().to_vec()), - }, - SyscallArg::Buffer { + }), + Expression::Buffer(BufferExpression { value: "b".as_bytes().to_vec(), type_: BufferType::Unknown, - }, - SyscallArg::Integer { - value: IntegerExpression::NamedConst("RENAME_NOREPLACE".to_owned()), + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("RENAME_NOREPLACE".to_owned()), metadata: None, - }, + }), ], ret_val: 0, })]; @@ -540,30 +549,30 @@ mod tests { rel_ts: 0.000036, name: "connect".to_owned(), args: vec![ - SyscallArg::Integer { - value: IntegerExpression::Literal(4), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), metadata: Some("/run/user/1000/systemd/private".as_bytes().to_vec()), - }, - SyscallArg::Struct(HashMap::from([ + }), + Expression::Struct(HashMap::from([ ( "sa_family".to_owned(), - SyscallArg::Integer { - value: IntegerExpression::NamedConst("AF_UNIX".to_owned()), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_UNIX".to_owned()), metadata: None, - }, + }), ), ( "sun_path".to_owned(), - SyscallArg::Buffer { + Expression::Buffer(BufferExpression { value: "/run/user/1000/systemd/private".as_bytes().to_vec(), type_: BufferType::Unknown, - }, + }), ), ])), - SyscallArg::Integer { - value: IntegerExpression::Literal(33), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(33), metadata: None, - }, + }), ], ret_val: 0, })];