-
-
Notifications
You must be signed in to change notification settings - Fork 327
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filter Duplicate Input Execution #2771
base: main
Are you sure you want to change the base?
Changes from 20 commits
aefb8e3
a98c981
7acf5a3
2da6dc5
1e571a0
d020b9e
e1d0b92
c842eda
31d9b56
c9eb2a8
93b64f9
294d2f1
c170986
bab9890
71fc1c6
a2fa10c
30e1db4
025a56a
63b9ac9
92c3f08
6395df9
17c63fe
61120bf
8757a33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,9 @@ authors = [ | |
edition = "2021" | ||
|
||
[features] | ||
default = ["std"] | ||
default = ["std", "bloom_filter"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The feature flag should be called by the name of the feature, not by implementation detail. Maybe something like. "reexecution_filter" or similar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
tui = ["libafl/tui_monitor"] | ||
bloom_filter = ["std"] | ||
std = [] | ||
|
||
[profile.dev] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,7 +133,12 @@ pub fn main() { | |
let scheduler = QueueScheduler::new(); | ||
|
||
// A fuzzer with feedbacks and a corpus scheduler | ||
#[cfg(not(feature = "bloom_filter"))] | ||
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); | ||
#[cfg(feature = "bloom_filter")] | ||
let mut fuzzer = | ||
StdFuzzer::new_with_bloom_filter(scheduler, feedback, objective, 10_000_000, 0.001) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
.unwrap(); | ||
|
||
// Create the executor for an in-process function with just one observer | ||
let executor = CustomExecutor::new(&state); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -291,6 +291,7 @@ document-features = { workspace = true, optional = true } | |
clap = { workspace = true, optional = true } | ||
num_enum = { workspace = true, optional = true } | ||
libipt = { workspace = true, optional = true } | ||
bloomfilter = "3.0.1" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you look at |
||
|
||
[lints] | ||
workspace = true | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,10 @@ | |
|
||
use alloc::{string::ToString, vec::Vec}; | ||
use core::{fmt::Debug, time::Duration}; | ||
#[cfg(feature = "std")] | ||
use std::hash::Hash; | ||
|
||
use bloomfilter::Bloom; | ||
use libafl_bolts::{current_time, tuples::MatchName}; | ||
use serde::Serialize; | ||
|
||
|
@@ -243,13 +246,14 @@ pub enum ExecuteInputResult { | |
|
||
/// Your default fuzzer instance, for everyday use. | ||
#[derive(Debug)] | ||
pub struct StdFuzzer<CS, F, OF> { | ||
pub struct StdFuzzer<CS, F, OF, IF> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always sort generics alphabetically |
||
scheduler: CS, | ||
feedback: F, | ||
objective: OF, | ||
input_filter: IF, | ||
} | ||
|
||
impl<CS, F, OF, S> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF> | ||
impl<CS, F, OF, S, IF> HasScheduler<<S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF, IF> | ||
where | ||
S: HasCorpus, | ||
CS: Scheduler<<S::Corpus as Corpus>::Input, S>, | ||
|
@@ -265,7 +269,7 @@ where | |
} | ||
} | ||
|
||
impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> { | ||
impl<CS, F, OF, IF> HasFeedback for StdFuzzer<CS, F, OF, IF> { | ||
type Feedback = F; | ||
|
||
fn feedback(&self) -> &Self::Feedback { | ||
|
@@ -277,7 +281,7 @@ impl<CS, F, OF> HasFeedback for StdFuzzer<CS, F, OF> { | |
} | ||
} | ||
|
||
impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> { | ||
impl<CS, F, OF, IF> HasObjective for StdFuzzer<CS, F, OF, IF> { | ||
type Objective = OF; | ||
|
||
fn objective(&self) -> &OF { | ||
|
@@ -289,8 +293,8 @@ impl<CS, F, OF> HasObjective for StdFuzzer<CS, F, OF> { | |
} | ||
} | ||
|
||
impl<CS, EM, F, OF, OT, S> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S> | ||
for StdFuzzer<CS, F, OF> | ||
impl<CS, EM, F, OF, OT, S, IF> ExecutionProcessor<EM, <S::Corpus as Corpus>::Input, OT, S> | ||
for StdFuzzer<CS, F, OF, IF> | ||
where | ||
CS: Scheduler<<S::Corpus as Corpus>::Input, S>, | ||
EM: EventFirer<State = S>, | ||
|
@@ -491,8 +495,8 @@ where | |
} | ||
} | ||
|
||
impl<CS, E, EM, F, OF, S> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S> | ||
for StdFuzzer<CS, F, OF> | ||
impl<CS, E, EM, F, OF, S, IF> EvaluatorObservers<E, EM, <S::Corpus as Corpus>::Input, S> | ||
for StdFuzzer<CS, F, OF, IF> | ||
where | ||
CS: Scheduler<<S::Corpus as Corpus>::Input, S>, | ||
E: HasObservers + Executor<EM, Self, State = S>, | ||
|
@@ -528,7 +532,43 @@ where | |
} | ||
} | ||
|
||
impl<CS, E, EM, F, OF, S> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S> for StdFuzzer<CS, F, OF> | ||
trait InputFilter<I> { | ||
fn should_execute(&mut self, input: &I) -> bool; | ||
} | ||
|
||
/// A pseudo-filter that will execute each input. | ||
#[derive(Debug)] | ||
pub struct NopInputFilter; | ||
impl<I> InputFilter<I> for NopInputFilter { | ||
fn should_execute(&mut self, _input: &I) -> bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
true | ||
} | ||
} | ||
|
||
/// A filter that probabilistically prevents duplicate execution of the same input based on a bloom filter. | ||
#[cfg(feature = "std")] | ||
#[derive(Debug)] | ||
pub struct BloomInputFilter<I> { | ||
bloom: Bloom<I>, | ||
} | ||
|
||
#[cfg(feature = "std")] | ||
impl<I> BloomInputFilter<I> { | ||
fn new(items_count: usize, fp_p: f64) -> Result<Self, Error> { | ||
let bloom = Bloom::new_for_fp_rate(items_count, fp_p).map_err(Error::illegal_argument)?; | ||
Ok(Self { bloom }) | ||
} | ||
} | ||
|
||
#[cfg(feature = "std")] | ||
impl<I: Hash> InputFilter<I> for BloomInputFilter<I> { | ||
fn should_execute(&mut self, input: &I) -> bool { | ||
!self.bloom.check_and_set(input) | ||
} | ||
} | ||
|
||
impl<CS, E, EM, F, OF, S, IF> Evaluator<E, EM, <S::Corpus as Corpus>::Input, S> | ||
for StdFuzzer<CS, F, OF, IF> | ||
where | ||
CS: Scheduler<<S::Corpus as Corpus>::Input, S>, | ||
E: HasObservers + Executor<EM, Self, State = S>, | ||
|
@@ -545,6 +585,7 @@ where | |
+ UsesInput<Input = <S::Corpus as Corpus>::Input>, | ||
<S::Corpus as Corpus>::Input: Input, | ||
S::Solutions: Corpus<Input = <S::Corpus as Corpus>::Input>, | ||
IF: InputFilter<<S::Corpus as Corpus>::Input>, | ||
{ | ||
/// Process one input, adding to the respective corpora if needed and firing the right events | ||
#[inline] | ||
|
@@ -556,7 +597,11 @@ where | |
input: <S::Corpus as Corpus>::Input, | ||
send_events: bool, | ||
) -> Result<(ExecuteInputResult, Option<CorpusId>), Error> { | ||
self.evaluate_input_with_observers(state, executor, manager, input, send_events) | ||
if self.input_filter.should_execute(&input) { | ||
self.evaluate_input_with_observers(state, executor, manager, input, send_events) | ||
} else { | ||
Ok((ExecuteInputResult::None, None)) | ||
} | ||
} | ||
fn add_disabled_input( | ||
&mut self, | ||
|
@@ -668,7 +713,7 @@ where | |
} | ||
} | ||
|
||
impl<CS, E, EM, F, OF, S, ST> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, OF> | ||
impl<CS, E, EM, F, OF, S, ST, IF> Fuzzer<E, EM, S, ST> for StdFuzzer<CS, F, OF, IF> | ||
where | ||
CS: Scheduler<S::Input, S>, | ||
E: UsesState<State = S>, | ||
|
@@ -792,16 +837,40 @@ where | |
} | ||
} | ||
|
||
impl<CS, F, OF> StdFuzzer<CS, F, OF> { | ||
impl<CS, F, OF> StdFuzzer<CS, F, OF, NopInputFilter> { | ||
/// Create a new `StdFuzzer` with standard behavior. | ||
pub fn new(scheduler: CS, feedback: F, objective: OF) -> Self { | ||
Self { | ||
scheduler, | ||
feedback, | ||
objective, | ||
input_filter: NopInputFilter, | ||
} | ||
} | ||
} | ||
impl<CS, F, OF, I> StdFuzzer<CS, F, OF, BloomInputFilter<I>> { | ||
/// Create a new [`StdFuzzer`], which, with a certain certainty, executes each input only once. | ||
/// | ||
/// This is achieved by hashing each input and using a bloom filter to differentiate inputs. | ||
/// | ||
/// Use this implementation if hashing each input is very fast compared to executing potential duplicate inputs. | ||
pub fn new_with_bloom_filter( | ||
scheduler: CS, | ||
feedback: F, | ||
objective: OF, | ||
items_count: usize, | ||
fp_p: f64, | ||
) -> Result<Self, Error> { | ||
let input_filter = BloomInputFilter::new(items_count, fp_p)?; | ||
|
||
Ok(Self { | ||
scheduler, | ||
feedback, | ||
objective, | ||
input_filter, | ||
}) | ||
} | ||
} | ||
|
||
/// Structs with this trait will execute an input | ||
pub trait ExecutesInput<E, EM, I, S> { | ||
|
@@ -815,8 +884,8 @@ pub trait ExecutesInput<E, EM, I, S> { | |
) -> Result<ExitKind, Error>; | ||
} | ||
|
||
impl<CS, E, EM, F, OF, S> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S> | ||
for StdFuzzer<CS, F, OF> | ||
impl<CS, E, EM, F, OF, S, IF> ExecutesInput<E, EM, <S::Corpus as Corpus>::Input, S> | ||
for StdFuzzer<CS, F, OF, IF> | ||
where | ||
CS: Scheduler<<S::Corpus as Corpus>::Input, S>, | ||
E: Executor<EM, Self, State = S> + HasObservers, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
//! A wrapper around a [`Mutator`] that ensures an input really changed [`MutationResult::Mutated`] | ||
//! by hashing pre- and post-mutation | ||
use std::{borrow::Cow, hash::Hash}; | ||
|
||
use libafl_bolts::{generic_hash_std, Error, Named}; | ||
|
||
use super::{MutationResult, Mutator}; | ||
|
||
/// A wrapper around a [`Mutator`] that ensures an input really changed [`MutationResult::Mutated`] | ||
/// by hashing pre- and post-mutation | ||
#[derive(Debug)] | ||
pub struct HashMutator<M> { | ||
inner: M, | ||
name: Cow<'static, str>, | ||
} | ||
|
||
impl<M> HashMutator<M> | ||
where | ||
M: Named, | ||
{ | ||
/// Create a new [`HashMutator`] | ||
pub fn new(inner: M) -> Self { | ||
let name = Cow::Owned(format!("HashMutator<{}>", inner.name().clone())); | ||
Self { inner, name } | ||
} | ||
} | ||
|
||
impl<M, I, S> Mutator<I, S> for HashMutator<M> | ||
where | ||
I: Hash, | ||
M: Mutator<I, S>, | ||
{ | ||
fn mutate(&mut self, state: &mut S, input: &mut I) -> Result<MutationResult, Error> { | ||
let before = generic_hash_std(input); | ||
self.inner.mutate(state, input)?; | ||
if before == generic_hash_std(input) { | ||
Ok(MutationResult::Skipped) | ||
} else { | ||
Ok(MutationResult::Mutated) | ||
} | ||
} | ||
} | ||
|
||
impl<M> Named for HashMutator<M> { | ||
fn name(&self) -> &Cow<'static, str> { | ||
&self.name | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::{ | ||
inputs::BytesInput, | ||
mutators::{BytesSetMutator, HashMutator, MutationResult, Mutator}, | ||
state::NopState, | ||
}; | ||
|
||
#[test] | ||
fn not_mutated() { | ||
let mut state: NopState<BytesInput> = NopState::new(); | ||
let mut inner = BytesSetMutator::new(); | ||
|
||
let mut input = BytesInput::new(vec![0; 5]); | ||
|
||
// nothing changed, yet `MutationResult::Mutated` was reported | ||
assert_eq!( | ||
MutationResult::Mutated, | ||
inner.mutate(&mut state, &mut input).unwrap() | ||
); | ||
assert_eq!(BytesInput::new(vec![0; 5]), input); | ||
|
||
// now it is correctly reported as `MutationResult::Skipped` | ||
let mut hash_mutator = HashMutator::new(inner); | ||
assert_eq!( | ||
MutationResult::Skipped, | ||
hash_mutator.mutate(&mut state, &mut input).unwrap() | ||
); | ||
assert_eq!(BytesInput::new(vec![0; 5]), input); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,11 @@ pub use mapping::*; | |
pub mod tuneable; | ||
pub use tuneable::*; | ||
|
||
#[cfg(feature = "std")] | ||
pub mod hash; | ||
#[cfg(feature = "std")] | ||
pub use hash::*; | ||
|
||
#[cfg(feature = "unicode")] | ||
pub mod unicode; | ||
#[cfg(feature = "unicode")] | ||
|
@@ -84,12 +89,14 @@ impl From<i32> for MutationId { | |
} | ||
} | ||
|
||
/// The result of a mutation. | ||
/// If the mutation got skipped, the target | ||
/// will not be executed with the returned input. | ||
/// Result of the mutation. | ||
/// | ||
/// [`MutationResult::Skipped`] does not necessarily mean that the input changed, | ||
/// just that the mutator did something. For slow targets, consider wrapping your | ||
/// mutator in a [`hash::HashMutator`]. | ||
#[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||
pub enum MutationResult { | ||
/// The [`Mutator`] mutated this `Input`. | ||
/// The [`Mutator`] executed on this `Input`. It may still be the same. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe something like: You could even reference the bloom filter feature here |
||
Mutated, | ||
/// The [`Mutator`] did not mutate this `Input`. It was `Skipped`. | ||
Skipped, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's just testing for I assume?