Skip to content

Commit

Permalink
feat(cli): implement --profiling option for the scan command.
Browse files Browse the repository at this point in the history
This option prints information about the most expensive rules when YARA-X is compiled with the `rules-profiling` feature enabled.
  • Loading branch information
plusvic committed Oct 22, 2024
1 parent 812a8f7 commit 65ae908
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 9 deletions.
4 changes: 4 additions & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ debug-cmd = []
# Example: RUST_LOG=info ./yr scan some_rule.yar some_file
logging = ["dep:log", "dep:env_logger"]

# Enables rules profiling. Notice that profiling has an impact on scan
# performance.
rules-profiling = ["yara-x/rules-profiling"]


[dependencies]
ascii_tree = { workspace = true }
Expand Down
5 changes: 5 additions & 0 deletions cli/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ pub fn exec_check(args: &ArgMatches) -> anyhow::Result<()> {

w.walk(
CheckState::new(),
// Initialization
|_, _| {},
// Action
|state, output, file_path, _| {
let src = fs::read(file_path.clone())
.with_context(|| {
Expand Down Expand Up @@ -129,6 +131,9 @@ pub fn exec_check(args: &ArgMatches) -> anyhow::Result<()> {

Ok(())
},
// Finalization
|_, _| {},
// Error handling
|err, output| {
let _ = output.send(Message::Error(format!(
"{} {}",
Expand Down
5 changes: 5 additions & 0 deletions cli/src/commands/fix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ pub fn exec_fix_encoding(args: &ArgMatches) -> anyhow::Result<()> {

w.walk(
FixEncodingState::new(),
// Initialization
|_, _| {},
// Action
|state, output, file_path, _| {
let src = fs::read(&file_path).with_context(|| {
format!("can not read `{}`", file_path.display())
Expand Down Expand Up @@ -122,6 +124,9 @@ pub fn exec_fix_encoding(args: &ArgMatches) -> anyhow::Result<()> {

Ok(())
},
// Finalization
|_, _| {},
// Error handling
|err, output| {
let _ = output.send(Message::Error(format!(
"{} {}",
Expand Down
83 changes: 83 additions & 0 deletions cli/src/commands/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ pub fn scan() -> Command {
arg!(--"path-as-namespace")
.help("Use file path as rule namespace")
)
.arg(
arg!(--"profiling")
.help("Show profiling information")
)
.arg(
arg!(-m --"print-meta")
.help("Print rule metadata")
Expand Down Expand Up @@ -177,13 +181,37 @@ pub fn scan() -> Command {

}

#[cfg(feature = "rules-profiling")]
struct ProfilingData {
pub namespace: String,
pub rule: String,
pub condition_exec_time: Duration,
pub pattern_matching_time: Duration,
pub total_time: Duration,
}

#[cfg(feature = "rules-profiling")]
impl From<yara_x::ProfilingData<'_>> for ProfilingData {
fn from(value: yara_x::ProfilingData) -> Self {
Self {
namespace: value.namespace.to_string(),
rule: value.rule.to_string(),
condition_exec_time: value.condition_exec_time,
pattern_matching_time: value.pattern_matching_time,
total_time: value.condition_exec_time
+ value.pattern_matching_time,
}
}
}

pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
let mut rules_path = args
.get_many::<(Option<String>, PathBuf)>("[NAMESPACE:]RULES_PATH")
.unwrap();

let target_path = args.get_one::<PathBuf>("TARGET_PATH").unwrap();
let compiled_rules = args.get_flag("compiled-rules");
let profiling = args.get_flag("profiling");
let num_threads = args.get_one::<u8>("threads");
let skip_larger = args.get_one::<u64>("skip-larger");
let disable_console_logs = args.get_flag("disable-console-logs");
Expand All @@ -204,6 +232,13 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
// collect to eagerly call the parser on each element
.collect::<Vec<_>>();

if profiling && !cfg!(feature = "rules-profiling") {
bail!(
"{} requires that YARA-X is built with profiling support. Use `cargo build --features=rules-profiling`.",
Paint::bold("--profiling")
);
}

if recursive.is_some() && target_path.is_file() {
bail!(
"can't use '{}' when <TARGET_PATH> is a file",
Expand Down Expand Up @@ -282,6 +317,10 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
all_metadata
};

#[cfg(feature = "rules-profiling")]
let most_expensive_rules: Mutex<Vec<ProfilingData>> =
Mutex::new(Vec::new());

w.walk(
state,
// Initialization
Expand Down Expand Up @@ -361,6 +400,27 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {

Ok(())
},
// Finalization
#[cfg(feature = "rules-profiling")]
|scanner, _output| {
if profiling {
let mut mer = most_expensive_rules.lock().unwrap();
for er in scanner.most_expensive_rules(1000) {
if let Some(r) = mer.iter_mut().find(|r| {
r.rule == er.rule && r.namespace == er.namespace
}) {
r.condition_exec_time += er.condition_exec_time;
r.pattern_matching_time += er.pattern_matching_time;
r.total_time +=
er.condition_exec_time + er.pattern_matching_time;
} else {
mer.push(er.into());
}
}
}
},
#[cfg(not(feature = "rules-profiling"))]
|_, _| {},
// Error handler
|err, output| {
let error = err.to_string();
Expand Down Expand Up @@ -390,6 +450,29 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
)
.unwrap();

#[cfg(feature = "rules-profiling")]
if profiling {
let mut mer = most_expensive_rules.lock().unwrap();
// Sort by total time in descending order.
mer.sort_by(|a, b| b.total_time.cmp(&a.total_time));
println!("\nMost expensive rules:");
for r in mer.iter().take(10) {
println!(
r#"
+ rule name: {}
namespace: {}
pattern matching: {:?}
condition evaluation: {:?}
total: {:?}"#,
r.rule,
r.namespace,
r.pattern_matching_time,
r.condition_exec_time,
r.total_time
);
}
}

Ok(())
}

Expand Down
26 changes: 17 additions & 9 deletions cli/src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,11 @@ impl<'a> Walker<'a> {
/// |state, output, file_path, scanner| {
/// scanner.scan_file(file_path);
/// }
/// /// This function is called by each thread after every file is
/// /// scanned.
/// |scanner| {
/// // Do some final action with the scanner before it is released.
/// }
/// // This function is called with every error that occurs during the
/// // walk.
/// |err, output| {
Expand Down Expand Up @@ -369,23 +374,25 @@ impl<'a> ParWalker<'a> {
self
}

/// Runs `func` on every file.
/// Runs `action` on every file.
///
/// See [`ParWalker`] for details.
pub fn walk<S, T, I, F, E>(
pub fn walk<S, T, I, A, F, E>(
self,
state: S,
init: I,
func: F,
e: E,
action: A,
finalize: F,
error: E,
) -> thread::Result<()>
where
S: Component + Send + Sync,
I: Fn(&S, &Sender<Message>) -> T + Send + Copy + Sync,
F: Fn(&S, &Sender<Message>, PathBuf, &mut T) -> anyhow::Result<()>
A: Fn(&S, &Sender<Message>, PathBuf, &mut T) -> anyhow::Result<()>
+ Send
+ Sync
+ Copy,
F: Fn(&T, &Sender<Message>) + Send + Copy + Sync,
E: Fn(anyhow::Error, &Sender<Message>) -> anyhow::Result<()>
+ Send
+ Copy,
Expand Down Expand Up @@ -422,19 +429,20 @@ impl<'a> ParWalker<'a> {
threads.push(s.spawn(move |_| {
let mut per_thread_obj = init(&state, &msg_send);
for path in paths_recv {
let res = func(
let res = action(
&state,
&msg_send,
path.to_path_buf(),
&mut per_thread_obj,
);
if let Err(err) = res {
if e(err, &msg_send).is_err() {
if error(err, &msg_send).is_err() {
let _ = msg_send.send(Message::Abort);
break;
}
}
}
finalize(&per_thread_obj, &msg_send);
}));
}

Expand All @@ -452,7 +460,7 @@ impl<'a> ParWalker<'a> {

// Invoke the error callback and abort the walk if the
// callback returns error.
if let Err(err) = e(err, &msg_send) {
if let Err(err) = error(err, &msg_send) {
let _ = msg_send.send(Message::Abort);
return Err(err);
}
Expand All @@ -463,7 +471,7 @@ impl<'a> ParWalker<'a> {
);

if let Err(err) = res {
if e(err, &msg_send).is_err() {
if error(err, &msg_send).is_err() {
let _ = msg_send.send(Message::Abort);
}
}
Expand Down

0 comments on commit 65ae908

Please sign in to comment.