diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 468f0b4..e18ed57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,26 +8,35 @@ env: jobs: build: runs-on: ubuntu-latest + strategy: + matrix: + strace-parser: [combinator, peg, regex] steps: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable - - run: cargo build --verbose + - run: cargo build --no-default-features --features strace-parser-${{matrix.strace-parser}} --verbose test: runs-on: ubuntu-latest + strategy: + matrix: + strace-parser: [combinator, peg, regex] steps: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable - - run: cargo test --bins --verbose + - run: cargo test --bins --no-default-features --features strace-parser-${{matrix.strace-parser}} --verbose clippy: runs-on: ubuntu-latest + strategy: + matrix: + strace-parser: [combinator, peg, regex] steps: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 @@ -35,7 +44,7 @@ jobs: profile: minimal toolchain: stable components: clippy - - run: cargo clippy -- -D warnings + - run: cargo clippy --no-default-features --features strace-parser-${{matrix.strace-parser}} -- -D warnings fmt: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 5c04150..c956bb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -292,6 +292,21 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +[[package]] +name = "function_name" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1ab577a896d09940b5fe12ec5ae71f9d8211fff62c919c03a3750a9901e98a7" +dependencies = [ + "function_name-proc-macro", +] + +[[package]] +name = "function_name-proc-macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673464e1e314dd67a0fd9544abc99e8eb28d0c7e3b69b033bcff9b2d00b87333" + [[package]] name = "generic-array" version = "0.14.7" @@ -370,6 +385,12 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -390,6 +411,16 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "object" version = "0.32.2" @@ -635,10 +666,12 @@ dependencies = [ "bincode", "clap", "fastrand", + "function_name", "itertools", "lazy_static", "log", "nix", + "nom", "pest", "pest_derive", "predicates", diff --git a/Cargo.toml b/Cargo.toml index 3d72de0..775b6e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,10 +15,12 @@ strip = true anyhow = { version = "1.0.72", default-features = false, features = ["std", "backtrace"] } bincode = { version = "1.3.3", default-features = false } clap = { version = "4.3.17", default-features = false, features = ["std", "color", "help", "usage", "error-context", "suggestions", "derive"] } +function_name = { version = "0.3.0", default-features = false, optional = true } itertools = { version = "0.11.0", default-features = false, features = ["use_std"] } lazy_static = { version = "1.4.0", default-features = false } log = { version = "0.4.19", default-features = false, features = ["max_level_trace", "release_max_level_info"] } nix = { version = "0.26.2", default-features = false, features = ["fs"] } +nom = { version = "7.1.3", default-features = false, features = ["std"], optional = true } pest = { version = "2.7.10", default-features = false, features = ["std", "memchr"], optional = true } pest_derive = { version = "2.7.10", default-features = false, features = ["std", "grammar-extras"], optional = true} rand = { version = "0.8.5", default-features = false, features = ["std", "std_rng"] } @@ -37,11 +39,12 @@ predicates = { version = "3.0.3", default-features = false, features = ["color"] pretty_assertions = { version = "1.4.0", default-features = false, features = ["std"] } [features] -default = ["parser-peg"] +default = ["strace-parser-combinator"] as-root = [] # for tests only nightly = [] # for benchmarks only -parser-peg = ["dep:pest", "dep:pest_derive"] -parser-regex = [] +strace-parser-combinator = ["dep:function_name", "dep:nom"] +strace-parser-peg = ["dep:pest", "dep:pest_derive"] +strace-parser-regex = [] [lints.rust] missing_docs = "warn" diff --git a/src/strace/mod.rs b/src/strace/mod.rs index 00cba9a..68716e3 100644 --- a/src/strace/mod.rs +++ b/src/strace/mod.rs @@ -50,6 +50,7 @@ pub enum Expression { args: Vec, }, // Only used for strace pseudo macro invocations, see `test_macro_addr_arg` for an example + #[cfg_attr(feature = "strace-parser-regex", allow(dead_code))] DestinationAddress(String), } diff --git a/src/strace/parser/combinator.rs b/src/strace/parser/combinator.rs new file mode 100644 index 0000000..b29ff8a --- /dev/null +++ b/src/strace/parser/combinator.rs @@ -0,0 +1,519 @@ +//! Combinator based strace output parser + +use std::iter; + +use nom::{ + branch::alt, + bytes::complete::{tag, take, take_until}, + character::complete::{ + self, alpha1, alphanumeric1, char, digit1, hex_digit1, oct_digit1, space1, + }, + combinator::{map, map_opt, map_res, opt, recognize}, + multi::{many0_count, many_till, separated_list0, separated_list1}, + number::complete::double, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + IResult, +}; + +use crate::strace::{ + BufferExpression, BufferType, Expression, IntegerExpression, IntegerExpressionValue, Syscall, +}; + +use super::ParseResult; + +macro_rules! dbg_parser { + ($input:expr) => { + log::trace!("{}:{}\ninput: {:?}", function_name!(), line!(), $input,); + }; +} + +pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { + match parse_syscall_line(line).map(|s| s.1) { + Err(nom::Err::Incomplete(_)) | Err(nom::Err::Error(_)) => Ok(ParseResult::IgnoredLine), + Err(nom::Err::Failure(e)) => Err(anyhow::anyhow!("{e}")), + Ok(ParseResult::FinishedSyscall { sc: sc_end, .. }) => { + let (unfinished_index, sc_start) = unfinished_syscalls + .iter() + .enumerate() + .find(|(_i, sc)| (sc.name == sc_end.name) && (sc.pid == sc_end.pid)) + .ok_or_else(|| anyhow::anyhow!("Unabled to find first part of syscall"))?; + let sc_merged = Syscall { + // Update return val and timestamp (to get return time instead of call time) + ret_val: sc_end.ret_val, + rel_ts: sc_end.rel_ts, + ..sc_start.clone() + }; + Ok(ParseResult::FinishedSyscall { + sc: sc_merged, + unfinished_index, + }) + } + Ok(res) => Ok(res), + } +} + +// Main line token parsers + +#[function_name::named] +fn parse_syscall_line(i: &str) -> IResult<&str, ParseResult> { + dbg_parser!(i); + alt(( + // Complete syscall + map( + tuple(( + parse_pid, + parse_rel_ts, + parse_name, + parse_args_complete, + parse_ret_val, + )), + |(pid, rel_ts, name, args, ret_val)| { + ParseResult::Syscall(Syscall { + pid, + rel_ts, + name: name.to_owned(), + args, + ret_val, + }) + }, + ), + // Syscall start + map( + tuple((parse_pid, parse_rel_ts, parse_name, parse_args_incomplete)), + |(pid, rel_ts, name, args)| { + ParseResult::UnfinishedSyscall(Syscall { + pid, + rel_ts, + name: name.to_owned(), + args, + ret_val: i128::MAX, + }) + }, + ), + // Syscall end + map( + tuple(( + parse_pid, + parse_rel_ts, + delimited( + tag("<... "), + parse_name, + tuple((tag(" resumed> )"), space1)), + ), + parse_ret_val, + )), + |(pid, rel_ts, name, ret_val)| ParseResult::FinishedSyscall { + sc: Syscall { + pid, + rel_ts, + name: name.to_owned(), + args: vec![], + ret_val, + }, + unfinished_index: usize::MAX, + }, + ), + ))(i) +} + +#[function_name::named] +fn parse_pid(i: &str) -> IResult<&str, u32> { + dbg_parser!(i); + terminated(map_res(digit1, str::parse), space1)(i) +} + +#[function_name::named] +fn parse_rel_ts(i: &str) -> IResult<&str, f64> { + dbg_parser!(i); + terminated(double, space1)(i) +} + +#[function_name::named] +fn parse_name(i: &str) -> IResult<&str, &str> { + dbg_parser!(i); + parse_symbol(i) +} + +#[function_name::named] +fn parse_args_complete(i: &str) -> IResult<&str, Vec> { + dbg_parser!(i); + delimited(char('('), parse_args_inner, terminated(char(')'), space1))(i) +} + +#[function_name::named] +fn parse_args_incomplete(i: &str) -> IResult<&str, Vec> { + dbg_parser!(i); + delimited(char('('), parse_args_inner, tag(" "))(i) +} + +#[function_name::named] +fn parse_args_inner(i: &str) -> IResult<&str, Vec> { + dbg_parser!(i); + alt(( + map(separated_list1(tag(", "), parse_struct_member), |e| { + // Named arguments are stuffed in a single struct + vec![Expression::Struct( + e.into_iter().map(|(n, e)| (n.to_owned(), e)).collect(), + )] + }), + separated_list0(tag(", "), alt((parse_in_out_argument, parse_expression))), + ))(i) +} + +#[function_name::named] +fn parse_in_out_argument(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + separated_pair(parse_expression, tag(" => "), parse_expression), + |(i, _o)| i, + )(i) +} + +#[function_name::named] +fn parse_ret_val(i: &str) -> IResult<&str, i128> { + dbg_parser!(i); + map_res( + preceded(terminated(char('='), space1), parse_int_literal), + |e| { + if let IntegerExpressionValue::Literal(v) = e.value { + Ok(v) + } else { + Err("Failed to get return value: {e:?}") + } + }, + )(i) +} + +// Shared parsers + +#[function_name::named] +fn parse_symbol(i: &str) -> IResult<&str, &str> { + dbg_parser!(i); + recognize(pair( + alt((alpha1, tag("_"))), + many0_count(alt((alphanumeric1, tag("_")))), + ))(i) +} + +#[function_name::named] +fn parse_comment(i: &str) -> IResult<&str, &str> { + dbg_parser!(i); + delimited(tag(" /* "), take_until(" */"), tag(" */"))(i) +} + +// Expression + +#[function_name::named] +fn parse_expression(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + pair( + alt(( + parse_expression_macro, + parse_expression_int, + parse_expression_struct, + parse_expression_buf, + parse_expression_set, + parse_expression_array, + )), + opt(parse_comment), + ), + |(u, _)| u, + )(i) +} + +#[function_name::named] +fn parse_expression_macro(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + pair( + parse_symbol, + delimited( + char('('), + separated_list0( + tag(", "), + alt((parse_expression_macro_pseudo_address, parse_expression)), + ), + char(')'), + ), + ), + |(n, args)| Expression::Macro { + name: n.to_owned(), + args, + }, + )(i) +} + +#[function_name::named] +fn parse_expression_macro_pseudo_address(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map(preceded(char('&'), parse_symbol), |s| { + Expression::DestinationAddress(s.to_owned()) + })(i) +} + +#[function_name::named] +fn parse_expression_int(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map(parse_int, Expression::Integer)(i) +} + +#[function_name::named] +fn parse_expression_struct(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + delimited( + char('{'), + separated_list0( + tag(", "), + alt(( + map(parse_struct_member, |(n, e)| (n.to_owned(), e)), + map_opt(parse_expression_macro, |e| { + if let Expression::Macro { args, .. } = &e { + args.iter().find_map(|a| { + if let Expression::DestinationAddress(n) = a { + Some((n.to_owned(), e.to_owned())) + } else { + None + } + }) + } else { + None + } + }), + )), + ), + tuple((opt(tag(", ...")), char('}'))), + ), + |m| Expression::Struct(m.into_iter().collect()), + )(i) +} + +#[function_name::named] +fn parse_expression_buf(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map(parse_buffer, Expression::Buffer)(i) +} + +#[function_name::named] +fn parse_expression_set(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + pair( + opt(char('~')), + delimited(char('['), separated_list0(char(' '), parse_int), char(']')), + ), + |(neg, values)| Expression::Collection { + complement: neg.is_some(), + values: values.into_iter().map(Expression::Integer).collect(), + }, + )(i) +} + +#[function_name::named] +fn parse_expression_array(i: &str) -> IResult<&str, Expression> { + dbg_parser!(i); + map( + delimited( + char('['), + separated_list0(tag(", "), parse_expression), + char(']'), + ), + |values| Expression::Collection { + complement: false, + values, + }, + )(i) +} + +// Int expression + +#[function_name::named] +fn parse_int(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + alt(( + parse_int_bit_or, + parse_int_multiplication, + parse_int_left_shift, + parse_int_literal, + parse_int_named, + ))(i) +} + +#[function_name::named] +fn parse_int_bit_or(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + map( + separated_pair( + parse_int_named, + char('|'), + separated_list1(char('|'), parse_int), + ), + |(f, r)| IntegerExpression { + value: IntegerExpressionValue::BinaryOr( + iter::once(f.value) + .chain(r.into_iter().map(|r| r.value).flat_map(|e| { + // Flatten child expressions + if let IntegerExpressionValue::BinaryOr(es) = e { + es.into_iter() + } else { + vec![e].into_iter() + } + })) + .collect(), + ), + metadata: None, + }, + )(i) +} + +#[function_name::named] +fn parse_int_multiplication(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + map( + separated_pair( + parse_int_literal, + char('*'), + separated_list1(char('*'), parse_int), + ), + |(f, r)| IntegerExpression { + value: IntegerExpressionValue::Multiplication( + iter::once(f.value) + .chain(r.into_iter().map(|r| r.value).flat_map(|e| { + // Flatten child expressions + if let IntegerExpressionValue::Multiplication(es) = e { + es.into_iter() + } else { + vec![e].into_iter() + } + })) + .collect(), + ), + metadata: None, + }, + )(i) +} + +#[function_name::named] +fn parse_int_literal(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + map( + tuple(( + alt(( + parse_int_literal_hexa, + parse_int_literal_oct, + parse_int_literal_dec, + )), + parse_int_metadata, + )), + |(v, m)| IntegerExpression { + value: IntegerExpressionValue::Literal(v), + metadata: m, + }, + )(i) +} + +#[function_name::named] +fn parse_int_left_shift(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + map( + separated_pair(parse_int_literal, tag("<<"), parse_int), + |(b, s)| IntegerExpression { + value: IntegerExpressionValue::LeftBitShift { + bits: Box::new(b.value), + shift: Box::new(s.value), + }, + metadata: None, + }, + )(i) +} + +#[function_name::named] +fn parse_int_named(i: &str) -> IResult<&str, IntegerExpression> { + dbg_parser!(i); + map( + tuple((parse_symbol, parse_int_metadata)), + |(e, metadata)| IntegerExpression { + value: IntegerExpressionValue::NamedConst(e.to_owned()), + metadata, + }, + )(i) +} + +#[function_name::named] +fn parse_int_metadata(i: &str) -> IResult<&str, Option>> { + dbg_parser!(i); + opt(map( + preceded(char('<'), many_till(parse_buffer_byte, char('>'))), + |r| r.0, + ))(i) +} + +// Int literal + +#[function_name::named] +fn parse_int_literal_hexa(i: &str) -> IResult<&str, i128> { + dbg_parser!(i); + preceded( + tag("0x"), + map_res(hex_digit1, |s| i128::from_str_radix(s, 16)), + )(i) +} + +#[function_name::named] +fn parse_int_literal_oct(i: &str) -> IResult<&str, i128> { + dbg_parser!(i); + preceded( + char('0'), + map_res(oct_digit1, |s| i128::from_str_radix(s, 8)), + )(i) +} + +#[function_name::named] +fn parse_int_literal_dec(i: &str) -> IResult<&str, i128> { + dbg_parser!(i); + complete::i128(i) +} + +// Buffer + +#[function_name::named] +fn parse_buffer(i: &str) -> IResult<&str, BufferExpression> { + dbg_parser!(i); + map( + terminated( + pair( + opt(char('@')), + preceded(char('"'), many_till(parse_buffer_byte, char('"'))), + ), + opt(tag("...")), + ), + |(a, r)| BufferExpression { + value: r.0, + type_: if a.is_some() { + BufferType::AbstractPath + } else { + BufferType::Unknown + }, + }, + )(i) +} + +#[function_name::named] +fn parse_buffer_byte(i: &str) -> IResult<&str, u8> { + dbg_parser!(i); + alt(( + map_res(preceded(tag("\\x"), take(2_usize)), |s| { + u8::from_str_radix(s, 16) + }), + map(take(1_usize), |s: &str| s.as_bytes()[0]), + ))(i) +} + +// Struct + +#[function_name::named] +fn parse_struct_member(i: &str) -> IResult<&str, (&str, Expression)> { + dbg_parser!(i); + separated_pair(parse_symbol, char('='), parse_expression)(i) +} diff --git a/src/strace/parser/mod.rs b/src/strace/parser/mod.rs index 0965f14..6b33a12 100644 --- a/src/strace/parser/mod.rs +++ b/src/strace/parser/mod.rs @@ -8,14 +8,18 @@ use std::{ use crate::strace::Syscall; -#[cfg(feature = "parser-peg")] +#[cfg(feature = "strace-parser-combinator")] +mod combinator; +#[cfg(feature = "strace-parser-peg")] mod peg; -#[cfg(feature = "parser-regex")] +#[cfg(feature = "strace-parser-regex")] mod regex; -#[cfg(feature = "parser-peg")] +#[cfg(feature = "strace-parser-combinator")] +use combinator::parse_line; +#[cfg(feature = "strace-parser-peg")] use peg::parse_line; -#[cfg(feature = "parser-regex")] +#[cfg(feature = "strace-parser-regex")] use regex::parse_line; pub struct LogParser { @@ -960,6 +964,35 @@ mod tests { ret_val: 20 }) ); + + #[cfg(not(feature = "strace-parser-regex"))] + assert_eq!( + parse_line( + "215947 0.000022 read(3, \"\\x12\\xef\"..., 832) = 832", + &[] + ) + .unwrap(), + ParseResult::Syscall(Syscall { + pid: 215947, + rel_ts: 0.000022, + name: "read".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None, + }), + Expression::Buffer(BufferExpression { + value: vec![0x12, 0xef], + type_: BufferType::Unknown, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(832), + metadata: None, + }), + ], + ret_val: 832 + }) + ); } #[test] @@ -1389,6 +1422,76 @@ mod tests { ); } + #[cfg_attr( + feature = "strace-parser-regex", + ignore = "bit sets are buggy with regex parser" + )] + #[test] + fn test_sched_getaffinity() { + let _ = simple_logger::SimpleLogger::new().init(); + + assert_eq!( + parse_line( + "231196 0.000017 sched_getaffinity(0, 512, [0 1 2 3 4 5 6 7]) = 8", + &[] + ) + .unwrap(), + ParseResult::Syscall(Syscall { + pid: 231196, + rel_ts: 0.000017, + name: "sched_getaffinity".to_owned(), + args: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(512), + metadata: None, + }), + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(2), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(4), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(5), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(6), + metadata: None, + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(7), + metadata: None, + }), + ] + }, + ], + ret_val: 8 + }) + ); + } + #[test] fn test_execve() { let _ = simple_logger::SimpleLogger::new().init(); @@ -1429,7 +1532,7 @@ mod tests { } #[cfg_attr( - feature = "parser-regex", + feature = "strace-parser-regex", ignore = "in/out arguments not supported by regex parser" )] #[test] @@ -1519,7 +1622,7 @@ mod tests { } #[cfg_attr( - feature = "parser-regex", + feature = "strace-parser-regex", ignore = "named arguments not supported by regex parser" )] #[test] @@ -1571,7 +1674,7 @@ mod tests { } #[cfg_attr( - feature = "parser-regex", + feature = "strace-parser-regex", ignore = "bit shifts are broken with regex parser" )] #[test] @@ -1641,7 +1744,7 @@ mod tests { } #[cfg_attr( - feature = "parser-regex", + feature = "strace-parser-regex", ignore = "macro address argument not supported by regex parser" )] #[test] diff --git a/src/strace/parser/regex.rs b/src/strace/parser/regex.rs index 5c6382f..5ff9067 100644 --- a/src/strace/parser/regex.rs +++ b/src/strace/parser/regex.rs @@ -345,7 +345,7 @@ fn parse_argument(caps: ®ex::Captures) -> anyhow::Result { one_shift.to_owned(), )), }) - } else if t.starts_with("0") { + } else if t.starts_with('0') { Ok(IntegerExpressionValue::Literal(i128::from_str_radix(t, 8)?)) } else { Ok(IntegerExpressionValue::NamedConst(t.to_owned()))