diff --git a/Cargo.toml b/Cargo.toml index d0352f178c..5d3e1a922b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "libafl_tinyinst", "libafl_sugar", "libafl_nyx", + "libafl_unicorn", "libafl_concolic/symcc_runtime", "libafl_concolic/symcc_libafl", "libafl_concolic/test/dump_constraints", diff --git a/fuzzers/unicorn/Cargo.toml b/fuzzers/unicorn/Cargo.toml new file mode 100644 index 0000000000..81a47a6e3e --- /dev/null +++ b/fuzzers/unicorn/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "unicorn" +version = "0.1.0" +edition = "2021" + +[dependencies] +libafl = { path = "../../libafl/" } +libafl_unicorn = { path = "../../libafl_unicorn/" } +libafl_targets = { path = "../../libafl_targets" } + +unicorn-engine = "2.0.1" +iced-x86 = "1.18.0" \ No newline at end of file diff --git a/fuzzers/unicorn/bin/Makefile b/fuzzers/unicorn/bin/Makefile new file mode 100644 index 0000000000..3c32847919 --- /dev/null +++ b/fuzzers/unicorn/bin/Makefile @@ -0,0 +1,31 @@ +arm64="aarch64-linux-gnu" +arm="arm-linux-gnueabihf" +x64="x86_64-linux-gnu" +assembly_arm64: + $(arm64)-gcc -O2 -S -c foo.c -o foo_arm64.s + +binary_arm64: + $(arm64)-as foo_arm64.s -o foo_arm64 + +assembly_arm: + $(arm)-gcc -O2 -S -c foo.c -o foo_arm.s + +binary_arm: + $(arm)-as foo_arm.s -o foo_arm + +assembly_x86: + $(x64)-gcc -O2 -S -c foo.c -o foo_x86.s + +binary_x86: + $(x64)-as foo_x86.s -o foo_x86 + +build_arm: assembly_arm binary_arm +build_arm64: assembly_arm64 binary_arm64 +build_x86: assembly_x86 binary_x86 + +clean: + rm foo_* + + +all: build_arm build_arm64 build_x86 +# sudo apt install gcc-arm-linux-gnueabihf gcc-aarch64-linux-gnu \ No newline at end of file diff --git a/fuzzers/unicorn/bin/foo.c b/fuzzers/unicorn/bin/foo.c new file mode 100644 index 0000000000..efabee26fc --- /dev/null +++ b/fuzzers/unicorn/bin/foo.c @@ -0,0 +1,31 @@ +#include +#define len 2 + +int main() { + volatile unsigned char a; // = 0x1; + volatile unsigned char b; // = 0x0; + volatile unsigned char c = 0; // The result, so should be initialized at 0; + + /*volatile unsigned char f[len]; + + for(int i = 0; i< len; i++){ + f[i] = i; + }*/ + c = 0x1; + if (a > b) { + c = 0x2; + if (a > 0x20) { + c = 0x3; + if (a == 0x50) { + c = 0x4; + if (b == 0x24) { c = 0x5; } + } + } + } + /* + a = 0xDE; + b = 0xEA; + c = 0xBE; + */ + return c; +} \ No newline at end of file diff --git a/fuzzers/unicorn/src/main.rs b/fuzzers/unicorn/src/main.rs new file mode 100644 index 0000000000..a33ee5e67f --- /dev/null +++ b/fuzzers/unicorn/src/main.rs @@ -0,0 +1,255 @@ +use libafl_unicorn::emu::{Emulator, CODE_ADDRESS}; +use std::{env, path::PathBuf, time::Duration}; + +use libafl::{ + bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice}, + corpus::{InMemoryCorpus, OnDiskCorpus}, + events::SimpleEventManager, + executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor}, + feedback_or, feedback_or_fast, + feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback}, + fuzzer::{Fuzzer, StdFuzzer}, + generators::RandBytesGenerator, + inputs::{BytesInput, HasTargetBytes}, + monitors::MultiMonitor, + mutators::scheduled::{havoc_mutations, StdScheduledMutator}, + observers::{ConstMapObserver, HitcountsMapObserver, TimeObserver}, + schedulers::{IndexesLenTimeMinimizerScheduler, QueueScheduler}, + stages::mutational::StdMutationalStage, + state::StdState, +}; +pub use libafl_targets::{EDGES_MAP_PTR, EDGES_MAP_SIZE}; +use unicorn_engine::unicorn_const::{Arch, MemType, SECOND_SCALE}; + +pub const MAX_INPUT_SIZE: usize = 0x8000; //1048576; // 1MB +pub const DEBUG: bool = false; + +// emulating +fn fuzzer(should_emulate: bool) { + let arch = Arch::X86; + let input_addr_end: u64 = 0x8000; + let input_addr_start: u64 = input_addr_end - MAX_INPUT_SIZE as u64; + let emu = &mut Emulator::new(arch); + emu.setup( + input_addr_start, + MAX_INPUT_SIZE, + match arch { + Arch::ARM => "bin/foo_arm", + Arch::ARM64 => "bin/foo_arm64", + Arch::X86 => "bin/foo_x86", + _ => "", + }, + ); + emu.set_code_hook(); + //emu.set_memory_hook(input_addr_start, MAX_INPUT_SIZE, callback); + + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let mut buf = target.as_slice(); + let len = buf.len(); + if len > MAX_INPUT_SIZE { + buf = &buf[0..MAX_INPUT_SIZE]; + } + + emu.write_mem(input_addr_end - buf.len() as u64, buf); + + emu.init_registers(input_addr_end); + + let result = emu.emu_start( + match emu.get_arch() { + Arch::ARM64 => CODE_ADDRESS + 0x40, // Position of main: 0x40 TODO: see if possible to get the main position from header file. Seems weird doing so + _ => CODE_ADDRESS, + }, + CODE_ADDRESS + emu.get_code_len(), + 10 * SECOND_SCALE, + 0x1000, + ); + + match result { + Ok(_) => { + // never hapens + panic!("huh"); + } + Err(err) => { + let mut instruction = [0]; + + let pc = emu.pc_read().unwrap(); + let sp = emu.get_stack_pointer(); + + if emu.get_arch() == Arch::X86 { + emu.mem_read(pc, &mut instruction) + .expect("could not read at pointer address"); + } + + if pc == 0 || instruction[0] == 0xC3 { + // Did we reached the beginning of the stack or is it a return ? + if DEBUG { + println!("Reached start"); + } + + // check output + let mut buf: [u8; 1] = [0]; + + emu.mem_read(sp - 1, &mut buf) + .expect("Could not read memory"); + + // check result + if buf[0] != 0x5 { + // didn't found the correct value + if DEBUG { + println!("Incorrect output found!"); + println!("Output: {:#}", buf[0]); + + emu.memory_dump(2); + } + return ExitKind::Ok; + } + + // success + println!("Correct input found"); + println!("Output: {:#}", buf[0]); + emu.memory_dump(2); + + panic!("Success :)"); + } else { + emu.debug_print(err); + } + } + } + + return ExitKind::Ok; + }; + + if should_emulate { + println!("Starting emulation:"); + let mem_data: Vec = vec![0x50, 0x24, 0x0]; + harness(&BytesInput::from(mem_data)); + return; + } + + let timeout = Duration::from_secs(1); + + let monitor = MultiMonitor::new(|s| println!("{s}")); + // The event manager handle the various events generated during the fuzzing loop + // such as the notification of the addition of a new item to the corpus + let mut mgr = SimpleEventManager::new(monitor); + + let edges_observer = unsafe { + HitcountsMapObserver::new(ConstMapObserver::<_, EDGES_MAP_SIZE>::from_mut_ptr( + "edges", + EDGES_MAP_PTR, + )) + }; + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_or!( + // New maximization map feedback linked to the edges observer and the feedback state + MaxMapFeedback::new_tracking(&edges_observer, true, false), + // Time feedback, this one does not need a feedback state + TimeFeedback::with_observer(&time_observer) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = feedback_or_fast!(CrashFeedback::new(), TimeoutFeedback::new()); + + // create a State from scratch + let mut state = StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + InMemoryCorpus::new(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + OnDiskCorpus::new(PathBuf::from("./crashes")).unwrap(), + // States of the feedbacks. + // The feedbacks can report the data that should persist in the State. + &mut feedback, + // Same for objective feedbacks + &mut objective, + ) + .unwrap(); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new(QueueScheduler::new()); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + let executor = InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, time_observer), + &mut fuzzer, + &mut state, + &mut mgr, + ) + .expect("Failed to create the executor"); + + let mut executor = TimeoutExecutor::new(executor, timeout); + + // Generator of printable bytearrays of max size 32 + let mut generator = RandBytesGenerator::new(4); + + // Generate 8 initial inputs + state + .generate_initial_inputs(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 8) + .expect("Failed to generate the initial corpus"); + + // Setup a mutational stage with a basic bytes mutator + let mutator = StdScheduledMutator::new(havoc_mutations()); + let mut stages = tuple_list!(StdMutationalStage::new(mutator)); + + fuzzer + .fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr) + .expect("Error in the fuzzing loop"); +} + +fn callback( + emu: &mut unicorn_engine::Unicorn<()>, + mem: MemType, + address: u64, + size: usize, + value: i64, +) -> bool { + if DEBUG { + match mem { + MemType::WRITE => println!( + "0x{:X}\tMemory is being WRITTEN at adress: {:X} size: {} value: {}", + emu.pc_read().unwrap(), + address, + size, + value + ), + MemType::READ => println!( + "0x{}\tMemory is being READ at adress: {:X} size: {}", + emu.pc_read().unwrap(), + address, + size + ), + _ => println!( + "0x{}\tMemory access type: {:?} adress: {:X} size: {} value: {}", + emu.pc_read().unwrap(), + mem, + address, + size, + value + ), + } + } + + return true; +} + +fn main() { + let args: Vec<_> = env::args().collect(); + let mut emu = false; + if args.len() > 1 { + if args[1] == "emu" { + emu = true; + } + } + fuzzer(emu); +} diff --git a/libafl_unicorn/Cargo.toml b/libafl_unicorn/Cargo.toml new file mode 100644 index 0000000000..febbd38fdd --- /dev/null +++ b/libafl_unicorn/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "libafl_unicorn" +version.workspace = true +authors = [""] +description = "Unicorn backend library for LibAFL" +documentation = "https://docs.rs/" +repository = "https://github.com/AFLplusplus/" +readme = "../README.md" +license = "MIT OR Apache-2.0" +keywords = ["fuzzing", "unicorn"] +edition = "2021" +categories = ["development-tools::testing", "emulators", "embedded", "os", "no-std"] + +[dependencies] +libafl = { path = "../libafl", default-features = false, features = ["std", "derive", "llmp_compression"] } +libafl_targets = { path = "../libafl_targets" } +hashbrown = { version = "0.12", features = ["serde", "ahash-compile-time-rng"] } # A faster hashmap, nostd compatible + +unicorn-engine = "2.0.1" +iced-x86 = "1.18.0" + +[lib] +name = "libafl_unicorn" +crate-type = ["cdylib", "rlib"] \ No newline at end of file diff --git a/libafl_unicorn/src/emu.rs b/libafl_unicorn/src/emu.rs new file mode 100644 index 0000000000..558c4e588b --- /dev/null +++ b/libafl_unicorn/src/emu.rs @@ -0,0 +1,273 @@ +use std::{fs::File, io::Read}; + +use iced_x86::{Decoder, DecoderOptions, Formatter, Instruction, NasmFormatter}; +pub use libafl_targets::{edges_max_num, EDGES_MAP, EDGES_MAP_PTR, EDGES_MAP_SIZE, MAX_EDGES_NUM}; +use unicorn_engine::{ + unicorn_const::{uc_error, Arch, HookType, MemType, Mode, Permission}, + RegisterARM, RegisterARM64, RegisterX86, Unicorn, +}; + +pub static CODE_ADDRESS: u64 = 0x9000; +static HEXBYTES_COLUMN_BYTE_LENGTH: usize = 10; + +use crate::{helper::get_stack_pointer, hooks::block_hook}; + +pub struct Emulator { + emu: unicorn_engine::Unicorn<'static, ()>, + code_len: u64, +} + +impl Emulator { + pub fn new(arch: Arch) -> Emulator { + let emu = unicorn_engine::Unicorn::new( + arch, + match arch { + Arch::ARM => Mode::ARM, + Arch::ARM64 => Mode::ARM, + Arch::X86 => Mode::MODE_64, + _ => Mode::MODE_64, + }, + ) + .expect("failed to initialize Unicorn instance"); + Emulator { emu, code_len: 0 } + } + + pub fn setup(&mut self, input_addr: u64, input_size: usize, code_path: &str) { + self.code_len = load_code(&mut self.emu, CODE_ADDRESS, code_path); + // TODO: For some reason, the compiled program start by substracting 0x10 to SP + self.emu + .mem_map(input_addr, input_size, Permission::WRITE | Permission::READ) + .expect("failed to map data page"); + } + + pub fn get_code_len(&self) -> u64 { + self.code_len + } + + pub fn write_mem(&mut self, addr: u64, buf: &[u8]) { + //println!("{} -> {}", addr, addr + (buf.len() as u64)); + self.emu + .mem_write(addr, &buf) + .expect("failed to write instructions"); + } + + pub fn set_memory_hook(&mut self, addr: u64, length: usize, callback: F) + where + F: FnMut(&mut Unicorn<()>, MemType, u64, usize, i64) -> bool, + { + self.emu + .add_mem_hook(HookType::MEM_ALL, addr, addr + length as u64, callback) + .expect("Could not set memory hooks"); + } + + pub fn set_code_hook(&mut self) { + self.emu + .add_block_hook(block_hook) + .expect("Failed to register code hook"); + } + + pub fn reg_write(&mut self, regid: T, value: u64) + where + T: Into, + { + self.emu + .reg_write(regid, value) + .expect("Could not set registry"); + } + + pub fn reg_read(&self, regid: T) -> Result + where + T: Into, + { + self.emu.reg_read(regid) + } + + pub fn init_registers(&mut self, sp: u64) { + match self.emu.get_arch() { + Arch::ARM => { + self.emu + .reg_write(RegisterARM::SP, sp) + .expect("Could not setup register"); + } + Arch::ARM64 => { + self.emu + .reg_write(RegisterARM64::SP, sp) + .expect("Could not setup register"); + } + Arch::X86 => { + // clean emulator state + for i in 1..259 { + self.emu.reg_write(i, 0).expect("Could not clean register"); + } + + self.emu + .reg_write(RegisterX86::ESP, sp) + .expect("Could not setup register"); + } + _ => {} + } + } + + pub fn mem_read(&self, address: u64, buf: &mut [u8]) -> Result<(), uc_error> { + self.emu.mem_read(address, buf) + } + + pub fn get_stack_pointer(&mut self) -> u64 { + get_stack_pointer(&mut self.emu) + } + + pub fn pc_read(&self) -> Result { + self.emu.pc_read() + } + + pub fn get_arch(&self) -> Arch { + return self.emu.get_arch(); + } + + pub fn memory_dump(&mut self, len: u64) { + let sp = get_stack_pointer(&mut self.emu); + for i in 0..len { + let pos = sp + i * 4 - len * 4; + + let data = self.emu.mem_read_as_vec(pos, 4).unwrap(); + + println!( + "{:X}:\t {:02X} {:02X} {:02X} {:02X} {:08b} {:08b} {:08b} {:08b}", + pos, data[0], data[1], data[2], data[3], data[0], data[1], data[2], data[3] + ); + } + } + + pub fn emu_start( + &mut self, + begin: u64, + until: u64, + timeout: u64, + count: usize, + ) -> Result<(), uc_error> { + self.emu.emu_start(begin, until, timeout, count) + } + + pub fn debug_print(&self, err: uc_error) { + println!(); + println!("Snap... something went wrong"); + println!("Error: {:?}", err); + + let pc = self.emu.pc_read().unwrap(); + println!(); + println!("Status when crash happened"); + + println!("PC: {:X}", pc); + let arch = self.emu.get_arch(); + + match arch { + Arch::ARM => { + println!("SP: {:X}", self.emu.reg_read(RegisterARM::SP).unwrap()); + } + Arch::ARM64 => { + println!("SP: {:X}", self.emu.reg_read(RegisterARM64::SP).unwrap()); + println!("X0: {:X}", self.emu.reg_read(RegisterARM64::X0).unwrap()); + println!("X1: {:X}", self.emu.reg_read(RegisterARM64::X1).unwrap()); + println!("X2: {:X}", self.emu.reg_read(RegisterARM64::X2).unwrap()); + println!("X3: {:X}", self.emu.reg_read(RegisterARM64::X3).unwrap()); + } + Arch::X86 => { + println!("ESP: {:X}", self.emu.reg_read(RegisterX86::ESP).unwrap()); + println!("RAX: {:X}", self.emu.reg_read(RegisterX86::RAX).unwrap()); + println!("RCX: {:X}", self.emu.reg_read(RegisterX86::RCX).unwrap()); + println!("RPB: {:X}", self.emu.reg_read(RegisterX86::RBP).unwrap()); + println!("RSP: {:X}", self.emu.reg_read(RegisterX86::RSP).unwrap()); + println!("EAX: {:X}", self.emu.reg_read(RegisterX86::EAX).unwrap()); + println!("ECX: {:X}", self.emu.reg_read(RegisterX86::ECX).unwrap()); + println!("EDX: {:X}", self.emu.reg_read(RegisterX86::EDX).unwrap()); + } + _ => {} + } + + if self.emu.get_arch() == Arch::X86 { + // Provide dissasembly at instant of crash for X86 assembly + let regions = self + .emu + .mem_regions() + .expect("Could not get memory regions"); + for i in 0..regions.len() { + if regions[i].perms == Permission::EXEC { + if pc >= regions[i].begin && pc <= regions[i].end { + let mut begin = pc - 32; + let mut end = pc + 32; + if begin < regions[i].begin { + begin = regions[i].begin; + } + if end > regions[i].end { + end = regions[i].end; + } + + let bytes = self + .emu + .mem_read_as_vec(begin, (end - begin) as usize) + .expect("Could not get program code"); + + let mut decoder = Decoder::with_ip(64, &bytes, begin, DecoderOptions::NONE); + + let mut formatter = NasmFormatter::new(); + formatter.options_mut().set_digit_separator("`"); + formatter.options_mut().set_first_operand_char_index(10); + + let mut instruction = Instruction::default(); + let mut output = String::new(); + + while decoder.can_decode() { + decoder.decode_out(&mut instruction); + + // Format the instruction ("disassemble" it) + output.clear(); + formatter.format(&instruction, &mut output); + + let diff = instruction.ip() as i64 - pc as i64; + print!("{:02}\t{:016X} ", diff, instruction.ip()); + let start_index = (instruction.ip() - begin) as usize; + let instr_bytes = &bytes[start_index..start_index + instruction.len()]; + for b in instr_bytes.iter() { + print!("{:02X}", b); + } + if instr_bytes.len() < HEXBYTES_COLUMN_BYTE_LENGTH { + for _ in 0..HEXBYTES_COLUMN_BYTE_LENGTH - instr_bytes.len() { + print!(" "); + } + } + println!(" {}", output); + } + } + } + } + } + } +} + +fn load_code(emu: &mut Unicorn<()>, address: u64, path: &str) -> u64 { + let mut f = File::open(path).expect("Could not open file"); + let mut buffer = Vec::new(); + + // read the whole file + f.read_to_end(&mut buffer).expect("Could not read file"); + + let arm_code = buffer; + + // Define memory regions + emu.mem_map( + address, + match emu.get_arch() { + Arch::ARM => ((arm_code.len() / 1024) + 1) * 1024, + Arch::ARM64 => ((arm_code.len() / 1024) + 1) * 1024, + Arch::X86 => ((arm_code.len() / 4096) + 1) * 4096, + _ => 0, + }, + Permission::EXEC, + ) + .expect("failed to map code page"); + + // Write memory + emu.mem_write(address, &arm_code) + .expect("failed to write instructions"); + return arm_code.len() as u64; +} diff --git a/libafl_unicorn/src/helper.rs b/libafl_unicorn/src/helper.rs new file mode 100644 index 0000000000..756cd84a52 --- /dev/null +++ b/libafl_unicorn/src/helper.rs @@ -0,0 +1,11 @@ +use unicorn_engine::{unicorn_const::Arch, RegisterARM, RegisterARM64, RegisterX86}; + +pub fn get_stack_pointer(emu: &mut unicorn_engine::Unicorn<()>) -> u64 { + let sp = match emu.get_arch() { + Arch::ARM => emu.reg_read(RegisterARM::SP).unwrap(), + Arch::ARM64 => emu.reg_read(RegisterARM64::SP).unwrap(), + Arch::X86 => emu.reg_read(RegisterX86::ESP).unwrap(), + _ => 0, + }; + sp +} diff --git a/libafl_unicorn/src/hooks.rs b/libafl_unicorn/src/hooks.rs new file mode 100644 index 0000000000..c4494da2b7 --- /dev/null +++ b/libafl_unicorn/src/hooks.rs @@ -0,0 +1,12 @@ +pub use libafl_targets::{edges_max_num, EDGES_MAP, EDGES_MAP_PTR, EDGES_MAP_SIZE, MAX_EDGES_NUM}; + +static mut PREV_LOC: u64 = 0; + +pub fn block_hook(_emu: &mut unicorn_engine::Unicorn<()>, address: u64, small: u32) { + unsafe { + let hash = (address ^ PREV_LOC) & (EDGES_MAP_SIZE as u64 - 1); + //println!("Block hook: 0x{:X}\t size:{:#} hash: {:X}", address, small, hash); + EDGES_MAP[hash as usize] += 1; + PREV_LOC = address >> 1; + } +} diff --git a/libafl_unicorn/src/lib.rs b/libafl_unicorn/src/lib.rs new file mode 100644 index 0000000000..f6cef3da8f --- /dev/null +++ b/libafl_unicorn/src/lib.rs @@ -0,0 +1,3 @@ +pub mod emu; +pub mod helper; +pub mod hooks;