diff --git a/src/strace/mod.rs b/src/strace/mod.rs index 65bb3bf..3f31ed5 100644 --- a/src/strace/mod.rs +++ b/src/strace/mod.rs @@ -43,10 +43,11 @@ pub enum Expression { Buffer(BufferExpression), Integer(IntegerExpression), Struct(HashMap), - Array(Vec), - Set { + // The strace syntax can be ambiguous between array and set (ie sigset_t in sigprocmask), + // so store both in this, and let the summary interpret + Collection { complement: bool, - values: Vec, + values: Vec, }, Macro { name: String, diff --git a/src/strace/parser/mod.rs b/src/strace/parser/mod.rs index 654c6e2..52f140e 100644 --- a/src/strace/parser/mod.rs +++ b/src/strace/parser/mod.rs @@ -265,11 +265,21 @@ mod tests { ), ( "sa_mask".to_owned(), - Expression::Set { + Expression::Collection { complement: true, values: vec![ - IntegerExpressionValue::NamedConst("RTMIN".to_owned()), - IntegerExpressionValue::NamedConst("RT_1".to_owned()) + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTMIN".to_owned()), + metadata: None + } + ), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RT_1".to_owned()), + metadata: None + } + ), ] } ), @@ -318,24 +328,36 @@ mod tests { ), metadata: None, }), - Expression::Set { + Expression::Collection { complement: false, values: vec![], }, - Expression::Set { + Expression::Collection { complement: true, values: vec![ - IntegerExpressionValue::NamedConst( - "KILL".to_owned() + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("KILL".to_owned()), + metadata: None + } ), - IntegerExpressionValue::NamedConst( - "STOP".to_owned(), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("STOP".to_owned()), + metadata: None + } ), - IntegerExpressionValue::NamedConst( - "RTMIN".to_owned(), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTMIN".to_owned()), + metadata: None + } ), - IntegerExpressionValue::NamedConst( - "RT_1".to_owned(), + Expression::Integer( + IntegerExpression { + value: IntegerExpressionValue::NamedConst("RT_1".to_owned()), + metadata: None + } ), ], }, @@ -607,16 +629,19 @@ mod tests { Expression::Struct(HashMap::from([ ( "val".to_owned(), - Expression::Array(vec![ - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(1360496552), - metadata: None - }), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(2460437074), - metadata: None - }), - ]) + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1360496552), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(2460437074), + metadata: None + }), + ] + } ) ])) ), @@ -719,16 +744,19 @@ mod tests { Expression::Struct(HashMap::from([ ( "val".to_owned(), - Expression::Array(vec![ - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(0), - metadata: None - }), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(0), - metadata: None - }), - ]) + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None + }), + ] + } ) ])) ), @@ -821,57 +849,60 @@ mod tests { value: IntegerExpressionValue::Literal(15), metadata: Some(vec![115, 111, 99, 107, 101, 116, 58, 91, 53, 52, 49, 56, 50, 49, 51, 93]) }), - Expression::Array(vec![ - Expression::Struct(HashMap::from([ - ( - "nlmsg_len".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(20), - metadata: None, - }), - ), - ( - "nlmsg_type".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::NamedConst("RTM_GETADDR".to_owned()), - metadata: None, - }), - ), - ( - "nlmsg_flags".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::BinaryOr(vec![ - IntegerExpressionValue::NamedConst("NLM_F_REQUEST".to_owned()), - IntegerExpressionValue::NamedConst("NLM_F_DUMP".to_owned()), - ]), - metadata: None, - }), - ), - ( - "nlmsg_seq".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(1694010548), - metadata: None, - }), - ), - ( - "nlmsg_pid".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(0), - metadata: None, - }), - ), - ])), - Expression::Struct(HashMap::from([ - ( - "ifa_family".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::NamedConst("AF_UNSPEC".to_owned()), - metadata: None, - }), - ), - ])), - ]), + Expression::Collection { + complement: false, + values: vec![ + Expression::Struct(HashMap::from([ + ( + "nlmsg_len".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(20), + metadata: None, + }), + ), + ( + "nlmsg_type".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("RTM_GETADDR".to_owned()), + metadata: None, + }), + ), + ( + "nlmsg_flags".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::BinaryOr(vec![ + IntegerExpressionValue::NamedConst("NLM_F_REQUEST".to_owned()), + IntegerExpressionValue::NamedConst("NLM_F_DUMP".to_owned()), + ]), + metadata: None, + }), + ), + ( + "nlmsg_seq".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(1694010548), + metadata: None, + }), + ), + ( + "nlmsg_pid".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(0), + metadata: None, + }), + ), + ])), + Expression::Struct(HashMap::from([ + ( + "ifa_family".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("AF_UNSPEC".to_owned()), + metadata: None, + }), + ), + ])), + ] + }, Expression::Integer(IntegerExpression { value: IntegerExpressionValue::Literal(20), metadata: None, @@ -1161,64 +1192,67 @@ mod tests { value: IntegerExpressionValue::Literal(4), metadata: Some(vec![0x61, 0x6e, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x3a, 0x5b, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x70, 0x6f, 0x6c, 0x6c, 0x5d]), }), - Expression::Array(vec![ - Expression::Struct(HashMap::from([ - ( - "events".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), - metadata: None, - }), - ), - ( - "data".to_owned(), - Expression::Struct(HashMap::from([ - ( - "u32".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(833093633), - metadata: None, - }), - ), - ( - "u64".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(9163493471957811201), - metadata: None, - }), - ), - ])) - ), - ])), - Expression::Struct(HashMap::from([ - ( - "events".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), - metadata: None, - }), - ), - ( - "data".to_owned(), - Expression::Struct(HashMap::from([ - ( - "u32".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(800587777), - metadata: None, - }), - ), - ( - "u64".to_owned(), - Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(9163493471925305345), - metadata: None, - }), - ), - ])) - ), - ])), - ]), + Expression::Collection { + complement: false, + values: vec![ + Expression::Struct(HashMap::from([ + ( + "events".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), + metadata: None, + }), + ), + ( + "data".to_owned(), + Expression::Struct(HashMap::from([ + ( + "u32".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(833093633), + metadata: None, + }), + ), + ( + "u64".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(9163493471957811201), + metadata: None, + }), + ), + ])) + ), + ])), + Expression::Struct(HashMap::from([ + ( + "events".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst("EPOLLOUT".to_owned()), + metadata: None, + }), + ), + ( + "data".to_owned(), + Expression::Struct(HashMap::from([ + ( + "u32".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(800587777), + metadata: None, + }), + ), + ( + "u64".to_owned(), + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(9163493471925305345), + metadata: None, + }), + ), + ])) + ), + ])), + ] + }, Expression::Integer(IntegerExpression { value: IntegerExpressionValue::Literal(128), metadata: None, @@ -1295,10 +1329,15 @@ mod tests { value: IntegerExpressionValue::Literal(4), metadata: None, }), - Expression::Array(vec![Expression::Integer(IntegerExpression { - value: IntegerExpressionValue::Literal(3), - metadata: None, - })]), + Expression::Collection { + complement: false, + values: vec![ + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::Literal(3), + metadata: None, + }) + ] + }, Expression::Integer(IntegerExpression { value: IntegerExpressionValue::NamedConst("NULL".to_owned()), metadata: None, diff --git a/src/strace/parser/peg.pest b/src/strace/parser/peg.pest index 7373d0f..7009c65 100644 --- a/src/strace/parser/peg.pest +++ b/src/strace/parser/peg.pest @@ -1,8 +1,18 @@ // Pest grammar for strace output (some stuff only works with our strace output arguments) -// Main line tokens +// Line formats + +syscall_line = { + #complete = syscall_line_complete | + #start = syscall_line_start | + #end = syscall_line_end +} +syscall_line_complete = { SOI ~ pid ~ " "+ ~ rel_ts ~ " " ~ name ~ "(" ~ arguments ~ ")" ~ " = " ~ ret_val ~ EOI } +syscall_line_start = { SOI ~ pid ~ " "+ ~ rel_ts ~ " " ~ name ~ "(" ~ arguments ~ " " ~ EOI } +syscall_line_end = { SOI ~ pid ~ " "+ ~ rel_ts ~ " <... " ~ name ~ " resumed> ) " ~ " "* ~ "= " ~ ret_val ~ EOI } -syscall_line = { SOI ~ pid ~ " "+ ~ rel_ts ~ " " ~ name ~ arguments ~ " = " ~ ret_val ~ EOI } + +// Main line tokens pid = { ASCII_DIGIT+ } @@ -10,7 +20,7 @@ rel_ts = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ } name = { symbol_name } -arguments = { "(" ~ (expression ~ (", " ~ expression)*)? ~ ")" } +arguments = { (expression ~ (", " ~ expression)*)? } ret_val = { int ~ (" " ~ ANY*)? } @@ -79,7 +89,7 @@ buffer = { buffer_char = { !"\"" ~ ANY } buffer_byte = { "\\x" ~ ASCII_HEX_DIGIT{2} } -macro = { symbol_name ~ arguments } +macro = { symbol_name ~ "(" ~ arguments ~ ")" } array = { "[" ~ (expression ~ (", " ~ expression)+)? ~ "]" } diff --git a/src/strace/parser/peg.rs b/src/strace/parser/peg.rs index 2e91d9d..ab776a3 100644 --- a/src/strace/parser/peg.rs +++ b/src/strace/parser/peg.rs @@ -17,11 +17,32 @@ struct PegParser; pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { let pair = match PegParser::parse(Rule::syscall_line, line) { Err(_) => return Ok(ParseResult::IgnoredLine), - Ok(mut p) => p.next().unwrap(), + Ok(mut p) => pair_descend(p.next().unwrap(), 1).unwrap(), }; - log::trace!("{:?}", pair); - let sc = pair.try_into()?; - Ok(ParseResult::Syscall(sc)) + log::trace!("{:#?}", pair); + match pair.as_node_tag() { + Some("complete") => Ok(ParseResult::Syscall(pair.try_into()?)), + Some("start") => Ok(ParseResult::UnfinishedSyscall(pair.try_into()?)), + Some("end") => { + let sc_end: Syscall = pair.try_into()?; + let (unfinished_index, sc_start) = unfinished_syscalls + .iter() + .enumerate() + .find(|(_i, sc)| (sc.name == sc_end.name) && (sc.pid == sc_end.pid)) + .ok_or_else(|| anyhow::anyhow!("Unabled to find first part of syscall"))?; + let sc_merged = Syscall { + // Update return val and timestamp (to get return time instead of call time) + ret_val: sc_end.ret_val, + rel_ts: sc_end.rel_ts, + ..sc_start.clone() + }; + Ok(ParseResult::FinishedSyscall { + sc: sc_merged, + unfinished_index, + }) + } + _ => anyhow::bail!("Unhandled pair: {pair:?}"), + } } fn pair_descend(pair: Pair<'_, Rule>, levels: usize) -> anyhow::Result> { @@ -70,18 +91,24 @@ impl TryFrom> for Expression { .collect::>()?, }) } - Some("array") => Ok(Expression::Array( - pair.into_inner() + Some("array") => Ok(Expression::Collection { + complement: false, + values: pair + .into_inner() .map(|p| Expression::try_from(pair_descend(p, 1)?)) .collect::>()?, - )), + }), Some("set") => { let complement = pair.as_str().starts_with('~'); - Ok(Expression::Set { + Ok(Expression::Collection { complement, values: pair .into_inner() - .map(|p| IntegerExpression::try_from(pair_descend(p, 1)?).map(|e| e.value)) + .map(|p| -> anyhow::Result<_> { + Ok(Expression::Integer(IntegerExpression::try_from( + pair_descend(p, 1)?, + )?)) + }) .collect::>()?, }) } @@ -149,7 +176,7 @@ fn lit_pair(pair: Pair) -> anyhow::Result { .map_or(Ok(None), |v| v.map(Some))?, ) } - _ => unreachable!("{pair:?}"), + _ => anyhow::bail!("Unhandled pair: {pair:?}"), }; Ok(IntegerExpression { value: IntegerExpressionValue::Literal(val), @@ -230,7 +257,7 @@ impl TryFrom> for IntegerExpression { metadata: None, }) } - _ => unreachable!("{pair:?}"), + _ => anyhow::bail!("Unhandled pair: {pair:?}"), } } } @@ -239,6 +266,10 @@ impl TryFrom> for Syscall { type Error = anyhow::Error; fn try_from(pair: Pair) -> Result { + let pair_tag = pair + .as_node_tag() + .ok_or_else(|| anyhow::anyhow!("Unhandled pair: {pair:?}"))? + .to_owned(); let mut subpairs = pair.into_inner(); // Note if the grammar is correct, we should *never* panic below let pid = subpairs @@ -256,24 +287,35 @@ impl TryFrom> for Syscall { .ok_or_else(|| anyhow::anyhow!("Missing name node"))? .as_str() .to_owned(); - let args = subpairs - .next() - .ok_or_else(|| anyhow::anyhow!("Missing arguments node"))? - .into_inner() - .map(|p| pair_descend(p, 1)?.try_into()) - .collect::>()?; - let ret_val_pair = pair_descend( + + let args = if pair_tag.as_str() != "end" { subpairs .next() - .ok_or_else(|| anyhow::anyhow!("Missing return value node"))?, - 2, - )?; - let ret_val = if let IntegerExpressionValue::Literal(val) = - IntegerExpression::try_from(ret_val_pair)?.value - { - val + .ok_or_else(|| anyhow::anyhow!("Missing arguments node"))? + .into_inner() + .map(|p| pair_descend(p, 1)?.try_into()) + .collect::>()? } else { - anyhow::bail!("Return value is not a literal int"); + vec![] + }; + let ret_val = match pair_tag.as_str() { + "complete" | "end" => { + let ret_val_pair = pair_descend( + subpairs + .next() + .ok_or_else(|| anyhow::anyhow!("Missing return value node"))?, + 2, + )?; + if let IntegerExpressionValue::Literal(val) = + IntegerExpression::try_from(ret_val_pair)?.value + { + val + } else { + anyhow::bail!("Return value is not a literal int"); + } + } + "start" => i128::MAX, + tag => anyhow::bail!("Unhandled pair tag: {tag:?}"), }; Ok(Syscall { pid, diff --git a/src/strace/parser/regex.rs b/src/strace/parser/regex.rs index 9800c69..84b893f 100644 --- a/src/strace/parser/regex.rs +++ b/src/strace/parser/regex.rs @@ -315,10 +315,15 @@ fn parse_argument(caps: ®ex::Captures) -> anyhow::Result { } else { values_str .split(' ') - .map(|v| IntegerExpressionValue::NamedConst(v.to_owned())) + .map(|v| { + Expression::Integer(IntegerExpression { + value: IntegerExpressionValue::NamedConst(v.to_owned()), + metadata: None, + }) + }) .collect() }; - Ok(Expression::Set { complement, values }) + Ok(Expression::Collection { complement, values }) } else { let tokens = const_str.split('|').collect::>(); if tokens.len() == 1 { @@ -385,7 +390,10 @@ fn parse_argument(caps: ®ex::Captures) -> anyhow::Result { .captures_iter(array.as_str()) .map(|a| parse_argument(&a)) .collect::>()?; - Ok(Expression::Array(members)) + Ok(Expression::Collection { + complement: false, + values: members, + }) } else if let Some(buf) = caps.name("buf") { let buf = parse_buffer(buf.as_str())?; let type_ = if caps.name("buf_abstract_path").is_some() {