From 7332a7b4e57864dd9c4e44a0bed01d6f68c31301 Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Fri, 2 Aug 2024 12:47:36 -0400 Subject: [PATCH] feat: add support for rule metadata to be output in scan mode (#170) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the -m flag (print metadata) so that rule metadata is printed when a scan matches. Currently only outputs in text form, json will be next. Also, if a string is using the xor modifier we now display the xor information (key and plaintext) in both text and json output modes. ``` wxs@mbp yara-x % ./target/debug/yr scan -o ndjson -s rules/a.yara ~/src/yara/tests/data/xor.out | jq . { "path": "/Users/wxs/src/yara/tests/data/xor.out", "rules": [ { "identifier": "freebsd", "strings": [ { "identifier": "$a", "start": 28, "length": 19, "data": "Uihr!qsnfs`l!b`oonu", "xor_key": 1, "plaintext": "This program cannot" }, { "identifier": "$a", "start": 52, "length": 19, "data": "Vjkq\\\"rpmepco\\\"acllmv", "xor_key": 2, "plaintext": "This program cannot" }, { "identifier": "$b", "start": 4, "length": 19, "data": "This program cannot" } ] } ] } wxs@mbp yara-x % ./target/debug/yr scan -s rules/a.yara ~/src/yara/tests/data/xor.out freebsd /Users/wxs/src/yara/tests/data/xor.out 0x1c:19:$a xor(0x1,This program cannot): Uihr!qsnfs`l!b`oonu 0x34:19:$a xor(0x2,This program cannot): Vjkq\"rpmepco\"acllmv 0x4:19:$b: This program cannot ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── 1 file(s) scanned in 0.0s. 1 file(s) matched. wxs@mbp yara-x % ``` When using --print-strings-limit it looks like this in text mode: ``` wxs@mbp yara-x % ./target/debug/yr scan -s --print-strings-limit 5 rules/a.yara ~/src/yara/tests/data/xor.out freebsd /Users/wxs/src/yara/tests/data/xor.out 0x1c:19:$a xor(0x1,This ): Uihr! ... 14 more bytes 0x34:19:$a xor(0x2,This ): Vjkq\" ... 14 more bytes 0x4:19:$b: This ... 14 more bytes ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── 1 file(s) scanned in 0.0s. 1 file(s) matched. wxs@mbp yara-x % ``` --------- Co-authored-by: Victor M. Alvarez --- Cargo.lock | 1 + cli/Cargo.toml | 1 + cli/src/commands/scan.rs | 112 ++++++++++++++++++++++++++++++++------- 3 files changed, 94 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d0bacc156..92aba6915 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4902,6 +4902,7 @@ dependencies = [ "encoding_rs", "env_logger", "globwalk", + "itertools 0.13.0", "log", "pprof", "protobuf", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c35e5a099..aaf1607e0 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -41,6 +41,7 @@ anyhow = { workspace = true } clap = { workspace = true, features = ["cargo", "derive"] } clap_complete = { workspace = true } globwalk = { workspace = true } +itertools = { workspace = true } enable-ansi-support = { workspace = true } env_logger = { workspace = true, optional = true, features = ["auto-color"] } log = { workspace = true, optional = true } diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index 6f1ec425b..c8735ebf1 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -9,11 +9,12 @@ use std::time::{Duration, Instant}; use anyhow::{bail, Context, Error}; use clap::{arg, value_parser, ArgAction, ArgMatches, Command, ValueEnum}; use crossbeam::channel::Sender; +use itertools::Itertools; use superconsole::style::Stylize; use superconsole::{Component, Line, Lines, Span}; use yansi::Color::{Cyan, Red, Yellow}; use yansi::Paint; -use yara_x::{Rule, Rules, ScanError, Scanner}; +use yara_x::{MetaValue, Rule, Rules, ScanError, Scanner}; use crate::commands::{ compile_rules, external_var_parser, truncate_with_ellipsis, @@ -47,6 +48,13 @@ pub fn scan() -> Command { .help("Path to the file or directory that will be scanned") .value_parser(value_parser!(PathBuf)) ) + .arg( + arg!(-o --"output-format" ) + .help("Output format for results") + .long_help(help::OUTPUT_FORMAT_LONG_HELP) + .required(false) + .value_parser(value_parser!(OutputFormats)) + ) .arg( arg!(-e --"print-namespace") .help("Print rule namespace") @@ -60,6 +68,10 @@ pub fn scan() -> Command { .help("Print matching patterns, limited to the first N bytes") .value_parser(value_parser!(usize)) ) + .arg( + arg!(-m --"print-meta") + .help("Print rule metadata") + ) .arg( arg!(--"disable-console-logs") .help("Disable printing console log messages") @@ -123,13 +135,6 @@ pub fn scan() -> Command { .value_parser(external_var_parser) .action(ArgAction::Append) ) - .arg( - arg!(-o --"output-format" ) - .help("Output format for results") - .long_help(help::OUTPUT_FORMAT_LONG_HELP) - .required(false) - .value_parser(value_parser!(OutputFormats)) - ) } pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> { @@ -350,6 +355,7 @@ fn print_rules_as_json( output: &Sender, ) { let print_namespace = args.get_flag("print-namespace"); + let print_meta = args.get_flag("print-meta"); let print_strings = args.get_flag("print-strings"); let print_strings_limit = args.get_one::("print-strings-limit"); @@ -374,10 +380,14 @@ fn print_rules_as_json( }) }; + if print_meta { + json_rule["meta"] = matching_rule.metadata().into_json(); + } + if print_strings || print_strings_limit.is_some() { let limit = print_strings_limit.unwrap_or(&STRINGS_LIMIT); + let mut match_vec: Vec = Vec::new(); for p in matching_rule.patterns() { - let mut match_vec: Vec = Vec::new(); for m in p.matches() { let match_range = m.range(); let match_data = m.data(); @@ -399,12 +409,24 @@ fn print_rules_as_json( .as_str(), ); } - let match_json = serde_json::json!({ + + let mut match_json = serde_json::json!({ "identifier": p.identifier(), "start": match_range.start, "length": match_range.len(), "data": s.as_str() }); + + if let Some(k) = m.xor_key() { + let mut p = String::with_capacity(s.len()); + for b in &match_data[..min(match_data.len(), *limit)] { + for c in (b ^ k).escape_ascii() { + p.push_str(format!("{}", c as char).as_str()); + } + } + match_json["xor_key"] = serde_json::json!(k); + match_json["plaintext"] = serde_json::json!(p); + } match_vec.push(match_json); } json_rule["strings"] = serde_json::json!(match_vec); @@ -425,6 +447,7 @@ fn print_rules_as_text( output: &Sender, ) { let print_namespace = args.get_flag("print-namespace"); + let print_meta = args.get_flag("print-meta"); let print_strings = args.get_flag("print-strings"); let print_strings_limit = args.get_one::("print-strings-limit"); @@ -433,20 +456,50 @@ fn print_rules_as_text( // `the `by_ref` method cannot be invoked on a trait object` #[allow(clippy::while_let_on_iterator)] while let Some(matching_rule) = rules.next() { - let line = if print_namespace { + let mut line = if print_namespace { format!( - "{}:{} {}", + "{}:{}", matching_rule.namespace().paint(Cyan).bold(), - matching_rule.identifier().paint(Cyan).bold(), - file_path.display(), + matching_rule.identifier().paint(Cyan).bold() ) } else { - format!( - "{} {}", - matching_rule.identifier().paint(Cyan).bold(), - file_path.display() - ) + format!("{}", matching_rule.identifier().paint(Cyan).bold()) }; + + let metadata = matching_rule.metadata(); + + if print_meta && !metadata.is_empty() { + line.push_str(" ["); + for (pos, (m, v)) in metadata.with_position() { + match v { + MetaValue::Bool(v) => { + line.push_str(&format!("{}={}", m, v)) + } + MetaValue::Integer(v) => { + line.push_str(&format!("{}={}", m, v)) + } + MetaValue::Float(v) => { + line.push_str(&format!("{}={}", m, v)) + } + MetaValue::String(v) => { + line.push_str(&format!("{}=\"{}\"", m, v)) + } + MetaValue::Bytes(v) => line.push_str(&format!( + "{}=\"{}\"", + m, + v.escape_ascii() + )), + }; + if !matches!(pos, itertools::Position::Last) { + line.push(','); + } + } + line.push(']'); + } + + line.push(' '); + line.push_str(&file_path.display().to_string()); + output.send(Message::Info(line)).unwrap(); if print_strings || print_strings_limit.is_some() { @@ -457,12 +510,31 @@ fn print_rules_as_text( let match_data = m.data(); let mut msg = format!( - "{:#x}:{}:{}: ", + "{:#x}:{}:{}", match_range.start, match_range.len(), p.identifier(), ); + match m.xor_key() { + Some(k) => { + msg.push_str(format!(" xor({:#x},", k).as_str()); + for b in + &match_data[..min(match_data.len(), *limit)] + { + for c in (b ^ k).escape_ascii() { + msg.push_str( + format!("{}", c as char).as_str(), + ); + } + } + msg.push_str("): "); + } + _ => { + msg.push_str(": "); + } + } + for b in &match_data[..min(match_data.len(), *limit)] { for c in b.escape_ascii() { msg.push_str(format!("{}", c as char).as_str());