Add bloom filter for duplicate execution of the same inputs (#2771)

* fixing empty multipart name * fixing clippy * New rules for the contributing (#2752) * Rules * more * aa * Improve Flexibility of DumpToDiskStage (#2753) * fixing empty multipart name * fixing clippy * improve flexibility of DumpToDiskStage * adding note to MIGRATION.md * Update bindgen requirement from 0.70.1 to 0.71.1 (#2756) Updates the requirements on [bindgen](https://github.com/rust-lang/rust-bindgen) to permit the latest version. - [Release notes](https://github.com/rust-lang/rust-bindgen/releases) - [Changelog](https://github.com/rust-lang/rust-bindgen/blob/main/CHANGELOG.md) - [Commits](rust-lang/rust-bindgen@v0.70.1...v0.71.1) --- updated-dependencies: - dependency-name: bindgen dependency-type: direct:production ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * No Use* from stages (#2745) * no from stage * fixer * doc fix * how was this working???? * more fixes * delete more * rq * cargo-fuzz * m * aa * Update CONTRIBUTING.md MIGRATION.md (#2762) * No Uses* from `fuzzer` (#2761) * go * fixing stuf * hello from windows * more * lolg * lolf * fix * a --------- Co-authored-by: Your Name <[email protected]> * Remove useless cfgs (#2764) * Link libresolv on all Apple OSs (#2767) * Somewhat ugly CI fix... (#2768) * Maybe fix CI * does this help? * Very dirty 'fix' * Add Input Types and Mutators for Numeric Types (#2760) * fixing empty multipart name * fixing clippy * New rules for the contributing (#2752) * Rules * more * aa * Improve Flexibility of DumpToDiskStage (#2753) * fixing empty multipart name * fixing clippy * improve flexibility of DumpToDiskStage * adding note to MIGRATION.md * Introduce WrappingMutator * introducing mutators for int types * fixing no_std * random fixes * Add hash derivation for WrappingInput * Revert fixes that broke things * Derive Default on WrappingInput * Add unit tests * Fixes according to code review * introduce mappable ValueInputs * remove unnecessary comments * Elide more lifetimes * remove dead code * simplify hashing * improve docs * improve randomization * rename method to align with standard library * add typedefs for int types for ValueMutRefInput * rename test * add safety notice to trait function * improve randomize performance for i128/u128 * rename macro * improve comment * actually check return values in test * make 128 bit int randomize even more efficient * shifting signed values --------- Co-authored-by: Dongjia "toka" Zhang <[email protected]> Co-authored-by: Dominik Maier <[email protected]> * Add HashMutator * Fix docs * Fix docs again * introducing bloom filter * fix tests * Implement evaluate_filtered * Add macros to libafl_bolts tuples for mapping and merging types (#2788) * Add macros * Use the macros for havoc_mutations * Fix docs * improve merge_tuple_list_type to accept n items * libafl_cc: Automatically find llvm_ar path (#2790) * imemory_ondisk: Don't fail write under any circumstances if locking is disabled (#2791) * imemory_ondisk: Don't fail write under any circumstances if locking is disabled * fmt * inmemory_ondisk: Add a log message on failure * clippy' * micro optimization * Revert changes to global Cargo.toml * Hide std-dependent dependency behind std feature * Fix example fuzzer * Rename constructor for filtered fuzzer * Reorder generics alphabetically * Rename HashingMutator, add note to MutationResult about filtered fuzzers * Improve StdFuzzer according to feedback * rename hashing mutator * Fix english in comment * Cleanup of old PRs that break the CI * Fix more CI bugs * Code cleanup * Remove unnecessary comments --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Dongjia "toka" Zhang <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Your Name <[email protected]> Co-authored-by: Sharad Khanna <[email protected]> Co-authored-by: Dominik Maier <[email protected]> Co-authored-by: s1341 <[email protected]>
AFLplusplus · Dec 28, 2024 · d8ec991 · d8ec991
1 parent 2a79ee5
commit d8ec991
Show file tree

Hide file tree

Showing 18 changed files with 298 additions and 47 deletions.
diff --git a/fuzzers/baby/baby_fuzzer_custom_executor/Cargo.toml b/fuzzers/baby/baby_fuzzer_custom_executor/Cargo.toml
@@ -8,8 +8,9 @@ authors = [
 edition = "2021"
 
 [features]
-default = ["std"]
+default = ["std", "bloom_input_filter"]
 tui = ["libafl/tui_monitor"]
+bloom_input_filter = ["std"]
 std = []
 
 [profile.dev]

diff --git a/fuzzers/baby/baby_fuzzer_custom_executor/src/main.rs b/fuzzers/baby/baby_fuzzer_custom_executor/src/main.rs
@@ -134,7 +134,11 @@ pub fn main() {
     let scheduler = QueueScheduler::new();
 
     // A fuzzer with feedbacks and a corpus scheduler
+    #[cfg(not(feature = "bloom_input_filter"))]
     let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
+    #[cfg(feature = "bloom_input_filter")]
+    let mut fuzzer =
+        StdFuzzer::with_bloom_input_filter(scheduler, feedback, objective, 10_000_000, 0.001);
 
     // Create the executor for an in-process function with just one observer
     let executor = CustomExecutor::new(&state);

diff --git a/libafl/Cargo.toml b/libafl/Cargo.toml
@@ -58,6 +58,7 @@ std = [
   "serial_test",
   "libafl_bolts/std",
   "typed-builder",
+  "fastbloom",
 ]
 
 ## Tracks the Feedbacks and the Objectives that were interesting for a Testcase
@@ -291,6 +292,7 @@ document-features = { workspace = true, optional = true }
 clap = { workspace = true, optional = true }
 num_enum = { workspace = true, optional = true }
 libipt = { workspace = true, optional = true }
+fastbloom = { version = "0.8.0", optional = true }
 
 [lints]
 workspace = true

diff --git a/libafl/src/executors/inprocess/mod.rs b/libafl/src/executors/inprocess/mod.rs
@@ -557,7 +557,7 @@ mod tests {
         let mut mgr = NopEventManager::new();
         let mut state =
             StdState::new(rand, corpus, solutions, &mut feedback, &mut objective).unwrap();
-        let mut fuzzer = StdFuzzer::<_, _, _>::new(sche, feedback, objective);
+        let mut fuzzer = StdFuzzer::new(sche, feedback, objective);
 
         let mut in_process_executor = InProcessExecutor::new(
             &mut harness,

diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs
@@ -2,7 +2,11 @@
 
 use alloc::{string::ToString, vec::Vec};
 use core::{fmt::Debug, time::Duration};
+#[cfg(feature = "std")]
+use std::hash::Hash;
 
+#[cfg(feature = "std")]
+use fastbloom::BloomFilter;
 use libafl_bolts::{current_time, tuples::MatchName};
 use serde::Serialize;
 
@@ -138,6 +142,16 @@ pub trait EvaluatorObservers<E, EM, I, S> {
 
 /// Evaluate an input modifying the state of the fuzzer
 pub trait Evaluator<E, EM, I, S> {
+    /// Runs the input if it was (likely) not previously run and triggers observers and feedback and adds the input to the previously executed list
+    /// returns if is interesting an (option) the index of the new [`crate::corpus::Testcase`] in the corpus
+    fn evaluate_filtered(
+        &mut self,
+        state: &mut S,
+        executor: &mut E,
+        manager: &mut EM,
+        input: I,
+    ) -> Result<(ExecuteInputResult, Option<CorpusId>), Error>;
+
     /// Runs the input and triggers observers and feedback,
     /// returns if is interesting an (option) the index of the new [`crate::corpus::Testcase`] in the corpus
     fn evaluate_input(
@@ -242,13 +256,14 @@ pub enum ExecuteInputResult {
 
 /// Your default fuzzer instance, for everyday use.
 #[derive(Debug)]
-pub struct StdFuzzer<CS, F, OF> {
+pub struct StdFuzzer<CS, F, IF, OF> {
     scheduler: CS,
     feedback: F,
     objective: OF,
+    input_filter: IF,
 }
 
-impl<CS, F, OF, S> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF>
+impl<CS, F, IF, OF, S> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, IF, OF>
 where
     S: HasCorpus,
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
@@ -264,7 +279,7 @@ where
     }
 }
 
-impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> {
+impl<CS, F, IF, OF> HasFeedback for StdFuzzer<CS, F, IF, OF> {
     type Feedback = F;
 
     fn feedback(&self) -> &Self::Feedback {
@@ -276,7 +291,7 @@ impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> {
     }
 }
 
-impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> {
+impl<CS, F, IF, OF> HasObjective for StdFuzzer<CS, F, IF, OF> {
     type Objective = OF;
 
     fn objective(&self) -> &OF {
@@ -288,8 +303,8 @@ impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> {
     }
 }
 
-impl<CS, EM, F, OF, OT, S> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, EM, F, IF, OF, OT, S> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S>
+    for StdFuzzer<CS, F, IF, OF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     EM: EventFirer<State = S>,
@@ -494,8 +509,8 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, IF, OF, S> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, IF, OF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: HasObservers + Executor<EM, Self, State = S>,
@@ -532,7 +547,48 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF>
+trait InputFilter<I> {
+    fn should_execute(&mut self, input: &I) -> bool;
+}
+
+/// A pseudo-filter that will execute each input.
+#[derive(Debug)]
+pub struct NopInputFilter;
+impl<I> InputFilter<I> for NopInputFilter {
+    #[inline]
+    #[must_use]
+    fn should_execute(&mut self, _input: &I) -> bool {
+        true
+    }
+}
+
+/// A filter that probabilistically prevents duplicate execution of the same input based on a bloom filter.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+pub struct BloomInputFilter {
+    bloom: BloomFilter,
+}
+
+#[cfg(feature = "std")]
+impl BloomInputFilter {
+    #[must_use]
+    fn new(items_count: usize, fp_p: f64) -> Self {
+        let bloom = BloomFilter::with_false_pos(fp_p).expected_items(items_count);
+        Self { bloom }
+    }
+}
+
+#[cfg(feature = "std")]
+impl<I: Hash> InputFilter<I> for BloomInputFilter {
+    #[inline]
+    #[must_use]
+    fn should_execute(&mut self, input: &I) -> bool {
+        !self.bloom.insert(input)
+    }
+}
+
+impl<CS, E, EM, F, IF, OF, S> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, IF, OF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: HasObservers + Executor<EM, Self, State = S>,
@@ -549,7 +605,22 @@ where
         + UsesInput<Input = <S::Corpus as Corpus>::Input>,
     <S::Corpus as Corpus>::Input: Input,
     S::Solutions: Corpus<Input = <S::Corpus as Corpus>::Input>,
+    IF: InputFilter<<S::Corpus as Corpus>::Input>,
 {
+    fn evaluate_filtered(
+        &mut self,
+        state: &mut S,
+        executor: &mut E,
+        manager: &mut EM,
+        input: <S::Corpus as Corpus>::Input,
+    ) -> Result<(ExecuteInputResult, Option<CorpusId>), Error> {
+        if self.input_filter.should_execute(&input) {
+            self.evaluate_input(state, executor, manager, input)
+        } else {
+            Ok((ExecuteInputResult::None, None))
+        }
+    }
+
     /// Process one input, adding to the respective corpora if needed and firing the right events
     #[inline]
     fn evaluate_input_events(
@@ -562,6 +633,7 @@ where
     ) -> Result<(ExecuteInputResult, Option<CorpusId>), Error> {
         self.evaluate_input_with_observers(state, executor, manager, input, send_events)
     }
+
     fn add_disabled_input(
         &mut self,
         state: &mut S,
@@ -573,6 +645,7 @@ where
         let id = state.corpus_mut().add_disabled(testcase)?;
         Ok(id)
     }
+
     /// Adds an input, even if it's not considered `interesting` by any of the executors
     fn add_input(
         &mut self,
@@ -672,7 +745,7 @@ where
     }
 }
 
-impl<CS, E, EM, F, OF, S, ST> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, IF, OF, S, ST> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, IF, OF>
 where
     CS: Scheduler<S::Input, S>,
     E: UsesState<State = S>,
@@ -796,17 +869,44 @@ where
     }
 }
 
-impl<CS, F, OF> StdFuzzer<CS, F, OF> {
-    /// Create a new `StdFuzzer` with standard behavior.
-    pub fn new(scheduler: CS, feedback: F, objective: OF) -> Self {
+impl<CS, F, IF, OF> StdFuzzer<CS, F, IF, OF> {
+    /// Create a new [`StdFuzzer`] with standard behavior and the provided duplicate input execution filter.
+    pub fn with_input_filter(scheduler: CS, feedback: F, objective: OF, input_filter: IF) -> Self {
         Self {
             scheduler,
             feedback,
             objective,
+            input_filter,
         }
     }
 }
 
+impl<CS, F, OF> StdFuzzer<CS, F, NopInputFilter, OF> {
+    /// Create a new [`StdFuzzer`] with standard behavior and no duplicate input execution filtering.
+    pub fn new(scheduler: CS, feedback: F, objective: OF) -> Self {
+        Self::with_input_filter(scheduler, feedback, objective, NopInputFilter)
+    }
+}
+
+#[cfg(feature = "std")] // hashing requires std
+impl<CS, F, OF> StdFuzzer<CS, F, BloomInputFilter, OF> {
+    /// Create a new [`StdFuzzer`], which, with a certain certainty, executes each input only once.
+    ///
+    /// This is achieved by hashing each input and using a bloom filter to differentiate inputs.
+    ///
+    /// Use this implementation if hashing each input is very fast compared to executing potential duplicate inputs.
+    pub fn with_bloom_input_filter(
+        scheduler: CS,
+        feedback: F,
+        objective: OF,
+        items_count: usize,
+        fp_p: f64,
+    ) -> Self {
+        let input_filter = BloomInputFilter::new(items_count, fp_p);
+        Self::with_input_filter(scheduler, feedback, objective, input_filter)
+    }
+}
+
 /// Structs with this trait will execute an input
 pub trait ExecutesInput<E, EM, I, S> {
     /// Runs the input and triggers observers and feedback
@@ -819,8 +919,8 @@ pub trait ExecutesInput<E, EM, I, S> {
     ) -> Result<ExitKind, Error>;
 }
 
-impl<CS, E, EM, F, OF, S> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S>
-    for StdFuzzer<CS, F, OF>
+impl<CS, E, EM, F, IF, OF, S> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S>
+    for StdFuzzer<CS, F, IF, OF>
 where
     CS: Scheduler<<S::Corpus as Corpus>::Input, S>,
     E: Executor<EM, Self, State = S> + HasObservers,
@@ -913,3 +1013,63 @@ where
         unimplemented!("NopFuzzer cannot fuzz");
     }
 }
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+    use core::cell::RefCell;
+
+    use libafl_bolts::rands::StdRand;
+
+    use super::{Evaluator, StdFuzzer};
+    use crate::{
+        corpus::InMemoryCorpus,
+        events::NopEventManager,
+        executors::{ExitKind, InProcessExecutor},
+        inputs::BytesInput,
+        schedulers::StdScheduler,
+        state::StdState,
+    };
+
+    #[test]
+    fn filtered_execution() {
+        let execution_count = RefCell::new(0);
+        let scheduler = StdScheduler::new();
+        let mut fuzzer = StdFuzzer::with_bloom_input_filter(scheduler, (), (), 100, 1e-4);
+        let mut state = StdState::new(
+            StdRand::new(),
+            InMemoryCorpus::new(),
+            InMemoryCorpus::new(),
+            &mut (),
+            &mut (),
+        )
+        .unwrap();
+        let mut manager = NopEventManager::new();
+        let mut harness = |_input: &BytesInput| {
+            *execution_count.borrow_mut() += 1;
+            ExitKind::Ok
+        };
+        let mut executor =
+            InProcessExecutor::new(&mut harness, (), &mut fuzzer, &mut state, &mut manager)
+                .unwrap();
+        let input = BytesInput::new(vec![1, 2, 3]);
+        assert!(fuzzer
+            .evaluate_input(&mut state, &mut executor, &mut manager, input.clone())
+            .is_ok());
+        assert_eq!(1, *execution_count.borrow()); // evaluate_input does not add it to the filter
+
+        assert!(fuzzer
+            .evaluate_filtered(&mut state, &mut executor, &mut manager, input.clone())
+            .is_ok());
+        assert_eq!(2, *execution_count.borrow()); // at to the filter
+
+        assert!(fuzzer
+            .evaluate_filtered(&mut state, &mut executor, &mut manager, input.clone())
+            .is_ok());
+        assert_eq!(2, *execution_count.borrow()); // the harness is not called
+
+        assert!(fuzzer
+            .evaluate_input(&mut state, &mut executor, &mut manager, input.clone())
+            .is_ok());
+        assert_eq!(3, *execution_count.borrow()); // evaluate_input ignores filters
+    }
+}